1
0

updating for classification tasks

This commit is contained in:
mjgaughan 2025-05-05 12:14:26 -05:00
parent 747ddf8ee9
commit 57151a3618
5 changed files with 305939 additions and 1 deletions

13362
auto-dedup-cites.csv Normal file

File diff suppressed because one or more lines are too long

140208
auto_dedup_results.bib Normal file

File diff suppressed because one or more lines are too long

View File

@ -1,2 +1,25 @@
install.packages("devtools")
devtools::install_github("camaradesuk/ASySD")
devtools::install_github("camaradesuk/ASySD")
library(ASySD)
library(tidyverse)
#sv_files <- list.files("/Users/mgone/Desktop/SLR_references_total/both_waves_references", pattern = "*.bib", full.names = TRUE)
t1all <- ASySD::load_search("/Users/mgone/Desktop/SLR_references_total/SLR_references_1/t1all.bib", method="bib")
t2all <- ASySD::load_search("/Users/mgone/Desktop/SLR_references_total/SLR_references_2/t2all.bib", method="bib")
common_columns <- intersect(names(t1all), names(t2all))
t1_common <- t1all[, common_columns, drop = FALSE]
t2_common <- t2all[, common_columns, drop = FALSE]
all_citations <- rbind(t1_common, t2_common)
dedup_results <- batch_dedup(all_citations, batch_n=1000, sort_by = c('year', 'title', 'abstract', 'author'))
unique_dedup_results <- dedup_results$unique
write_citations(unique_dedup_results, type="bib", filename="auto_dedup_results.bib")
#dedup
#results <- batch_dedup(citations, batch_n=2000, sort_by = c("year", "title","author"))
#unique_r2 <- results_r2$unique
#write_citations(citations, type = c("ris", "txt", "csv", "bib"), filename)

104229
t1all.bib Normal file

File diff suppressed because it is too large Load Diff

48116
t2all.bib Normal file

File diff suppressed because it is too large Load Diff