1
0

updating with manual dedup of citations

This commit is contained in:
mgaughan 2025-05-30 16:37:03 -05:00
parent 83e9f3b25c
commit 6499aa34cf
3 changed files with 2968 additions and 0 deletions

File diff suppressed because one or more lines are too long

View File

@ -1,8 +1,15 @@
import csv
import bibtexparser
from tqdm import tqdm
import pandas as pd
df1 = pd.read_csv('auto_dedup_results.csv')
df2 = pd.read_csv('auto_dedup_results_bad_abstracts.csv')
filtered_df1 = df1[df1['title'].isin(df2.iloc[:, 2])]
filtered_df1.to_csv('filtered_dedup_correct.csv', index=False)
'''
with open("auto_dedup_results.bib") as bibfile:
bib_db = bibtexparser.load(bibfile)
@ -14,3 +21,4 @@ with open('auto_dedup_results.csv', 'w', newline="", encoding='utf-8') as csvfil
for entry in tqdm(bib_db.entries, desc="Converting BibTeX to CSV"):
row = {field: entry.get(field, '') for field in fields}
writer.writerow(row)
'''

View File

@ -8,6 +8,7 @@ pkgs.mkShell {
pkgs.python312
pkgs.python312Packages.bibtexparser
pkgs.python312Packages.tqdm
pkgs.python312Packages.pandas
git
];
shellHook = ''