25 lines
789 B
Python
25 lines
789 B
Python
import csv
|
|
import bibtexparser
|
|
from tqdm import tqdm
|
|
import pandas as pd
|
|
|
|
df1 = pd.read_csv('auto_dedup_results.csv')
|
|
df2 = pd.read_csv('auto_dedup_results_bad_abstracts.csv')
|
|
filtered_df1 = df1[df1['title'].isin(df2.iloc[:, 2])]
|
|
filtered_df1.to_csv('filtered_dedup_correct.csv', index=False)
|
|
|
|
|
|
'''
|
|
with open("auto_dedup_results.bib") as bibfile:
|
|
bib_db = bibtexparser.load(bibfile)
|
|
|
|
fields = ['duplicate_id', 'bibtype', 'title', 'abstract', 'doi']
|
|
|
|
with open('auto_dedup_results.csv', 'w', newline="", encoding='utf-8') as csvfile:
|
|
writer = csv.DictWriter(csvfile, fieldnames=fields)
|
|
writer.writeheader()
|
|
for entry in tqdm(bib_db.entries, desc="Converting BibTeX to CSV"):
|
|
row = {field: entry.get(field, '') for field in fields}
|
|
writer.writerow(row)
|
|
'''
|