Keep better track of time.

- Add timestamp ot transliterations output file. - Append wikidata search terms instead of overwriting
2020-03-28 13:49:19 -07:00 · 2020-03-28 13:49:19 -07:00 · 282208507a
commit 282208507a
parent e720653a23
2 changed files with 12 additions and 7 deletions
--- a/transliterations/src/compile_transliterated_phrases.sh
+++ b/transliterations/src/compile_transliterated_phrases.sh
@ -12,4 +12,5 @@ echo "Searching for Wikidata entities using Google trends"
 python3 wikidata_search.py ../data/output/related_searches_rising.csv ../data/output/related_searches_top.csv --use-gtrends --output ../data/output/wikidata_search_results_from_gtrends.csv
 echo "Finding transliterations from Wikidata using sparql"
-python3 wikidata_transliterations.py  ../data/output/wikidata_search_results_from_gtrends.csv  ../data/output/wikidata_search_results.csv --topN 10 20 --output ../data/output/wikidata_entity_labels.csv
+python3 wikidata_transliterations.py  ../data/output/wikidata_search_results_from_gtrends.csv  ../data/output/wikidata_search_results.csv --topN 10 20 --output ../data/output/$(date '+%Y-%m-%d')_wikidata_entity_labels.csv
--- a/transliterations/src/wikidata_search.py
+++ b/transliterations/src/wikidata_search.py
@ -15,10 +15,13 @@ class Wikidata_ResultSet:
             for i, result in enumerate(results))
        )
-    def to_csv(self, outfile=None):
+    def to_csv(self, outfile=None, mode='w'):
        if outfile is None:
            of = stdout
        else:
            if path.exists(outfile) and mode != 'w':
                of = open(outfile,'a',newline='')
            else:
                of = open(outfile,'w',newline='')
        writer = csv.writer(of)
@ -64,15 +67,15 @@ def read_google_trends_files(terms_files):
        yield row['query']
-def trawl_google_trends(terms_files, outfile = None):
+def trawl_google_trends(terms_files, outfile = None, mode='w'):
    terms = read_google_trends_files(terms_files)
    resultset = run_wikidata_searches(terms)
-    resultset.to_csv(outfile)
+    resultset.to_csv(outfile, mode)
-def trawl_base_terms(infiles, outfile = None):
+def trawl_base_terms(infiles, outfile = None, mode='w'):
    terms = chain(* (open(infile,'r') for infile in infiles))
    resultset = run_wikidata_searches(terms)
-    resultset.to_csv(outfile)
+    resultset.to_csv(outfile, mode)
    ## search each of the base terms in wikidata
@ -84,6 +87,7 @@ if __name__ == "__main__":
    parser.add_argument('inputs', type=str, nargs='+', help='one or more files to read')
    parser.add_argument('--use-gtrends', action='store_true', help = 'toggle whether the input is the output from google trends')
    parser.add_argument('--output', type=str, help='an output file. defaults to stdout')
    parser.add_argument('--overwrite', action='store_true', help = 'overwrite existing output files instead of appending')
    args = parser.parse_args()
    if args.use_gtrends:
        trawl_google_trends(args.inputs, args.output)