1
0
adaptation-slr/cites/bib_to_csv.py
2025-05-30 16:37:03 -05:00

25 lines
789 B
Python

import csv
import bibtexparser
from tqdm import tqdm
import pandas as pd
df1 = pd.read_csv('auto_dedup_results.csv')
df2 = pd.read_csv('auto_dedup_results_bad_abstracts.csv')
filtered_df1 = df1[df1['title'].isin(df2.iloc[:, 2])]
filtered_df1.to_csv('filtered_dedup_correct.csv', index=False)
'''
with open("auto_dedup_results.bib") as bibfile:
bib_db = bibtexparser.load(bibfile)
fields = ['duplicate_id', 'bibtype', 'title', 'abstract', 'doi']
with open('auto_dedup_results.csv', 'w', newline="", encoding='utf-8') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fields)
writer.writeheader()
for entry in tqdm(bib_db.entries, desc="Converting BibTeX to CSV"):
row = {field: entry.get(field, '') for field in fields}
writer.writerow(row)
'''