updating with manual dedup of citations
This commit is contained in:
parent
1d63537027
commit
86e2cd3ed8
2959
cites/053025_man_filtered_dedup.csv
Normal file
2959
cites/053025_man_filtered_dedup.csv
Normal file
File diff suppressed because one or more lines are too long
@ -1,8 +1,15 @@
|
||||
import csv
|
||||
import bibtexparser
|
||||
from tqdm import tqdm
|
||||
import pandas as pd
|
||||
|
||||
df1 = pd.read_csv('auto_dedup_results.csv')
|
||||
df2 = pd.read_csv('auto_dedup_results_bad_abstracts.csv')
|
||||
filtered_df1 = df1[df1['title'].isin(df2.iloc[:, 2])]
|
||||
filtered_df1.to_csv('filtered_dedup_correct.csv', index=False)
|
||||
|
||||
|
||||
'''
|
||||
with open("auto_dedup_results.bib") as bibfile:
|
||||
bib_db = bibtexparser.load(bibfile)
|
||||
|
||||
@ -14,3 +21,4 @@ with open('auto_dedup_results.csv', 'w', newline="", encoding='utf-8') as csvfil
|
||||
for entry in tqdm(bib_db.entries, desc="Converting BibTeX to CSV"):
|
||||
row = {field: entry.get(field, '') for field in fields}
|
||||
writer.writerow(row)
|
||||
'''
|
||||
|
@ -8,6 +8,7 @@ pkgs.mkShell {
|
||||
pkgs.python312
|
||||
pkgs.python312Packages.bibtexparser
|
||||
pkgs.python312Packages.tqdm
|
||||
pkgs.python312Packages.pandas
|
||||
git
|
||||
];
|
||||
shellHook = ''
|
||||
|
Loading…
Reference in New Issue
Block a user