diff --git a/README.md b/README.md index d3aa877..0627be0 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,11 @@ In order to not commit your changes into git, you can run this command: git update-index --assume-unchanged .taguette_gdocs ``` -I also create a directory called `taguette_backups` like: +I also create a directory called `taguette_backups` and one called `exported_tags` like: ``` mkdir taguette_backups +mkdir exported_tags ``` ## Step 1: Backing things up @@ -52,7 +53,7 @@ sudo chown taguette:taguette /var/lib/taguette/taguette.sqlite3 Exporting tags should be as easy as: ``` -python3 taguette-export_tags_to_csv.py > exported_tags.tsv +python3 taguette-export_tags_to_csv.py ``` This will create a new file called `exported_tags.tsv` which you can manually diff --git a/taguette-export_tags_to_csv.py b/taguette-export_tags_to_csv.py index 8c39554..d0fd28c 100755 --- a/taguette-export_tags_to_csv.py +++ b/taguette-export_tags_to_csv.py @@ -4,51 +4,60 @@ import re import json import sqlite3 from configparser import ConfigParser +import csv +import os -config = ConfigParser() -config.read('.taguette_gdocs') +config_files = [f for f in os.listdir() if f.startswith('.taguette_gdocs')] -## this is project ID from the configuration -project_id = int(config['General']['taguette_project_id']) -taguette_database_file = config['General']['taguette_database_file'] +for file_path in config_files: + config = ConfigParser() + config.read(file_path) -## connect to sqlite3 -con = sqlite3.connect(taguette_database_file) -cur = con.cursor() + ## this is project ID from the configuration + project_id = int(config['General']['taguette_project_id']) + taguette_database_file = config['General']['taguette_database_file'] -# Run this if you just want tags and no highlights -sql_stmt_get = "SELECT id, path, description FROM tags WHERE project_id = ?" + # set output file name + output_file_name = f'exported_tags/exported_tags_{project_id}.tsv' -# Run this if you want tags AND highlights -#sql_stmt_get = "SELECT tags.id, tags.path, tags.description, highlights.snippet FROM highlight_tags INNER JOIN tags ON highlight_tags.tag_id = tags.id INNER JOIN highlights ON highlight_tags.highlight_id = highlights.id WHERE project_id = ?" -cur.execute(sql_stmt_get, (project_id,)) + ## connect to sqlite3 + con = sqlite3.connect(taguette_database_file) + cur = con.cursor() -# print out a header -print("\t".join(['id', 'axial codes', 'tags', 'category', 'description'])) + # Run this if you just want tags and no highlights + sql_stmt_get = "SELECT id, path, description FROM tags WHERE project_id = ?" -while True: - row = cur.fetchone() - if row == None: - break - - tag_id, path, description = row - - tag_match = re.match(r'^(.+)\_(.*)$', path) - if tag_match: - axial = tag_match.group(1) - tag = tag_match.group(2) - else: - axial = "" - tag = path - - # look for extra category information stored in the description - cat_match = re.match('^(.*)\s*(\{(.*)\})$', description) - if cat_match: - description = cat_match.group(1) - category = json.loads(cat_match.group(2))["category"] - else: - category = "" + cur.execute(sql_stmt_get, (project_id,)) - print("\t".join([str(tag_id), axial, tag, category, description])) + with open(output_file_name, 'w', newline='') as output_file: + writer = csv.writer(output_file, delimiter='\t') + writer.writerow(['id', 'axial codes', 'tag', 'category', 'description', 'url']) + while True: + row = cur.fetchone() + if row == None: + break + + tag_id, path, description = row + + tag_match = re.match(r'^(.+)\_(.*)$', path) + if tag_match: + axial = tag_match.group(1) + tag = tag_match.group(2) + else: + axial = "" + tag = path + + # look for extra category information stored in the description + cat_match = re.match('^(.*)\s*(\{(.*)\})$', description) + if cat_match: + description = cat_match.group(1) + category = json.loads(cat_match.group(2))["category"] + else: + category = "" + + # create a URL that will link to the list of highlights + url = f"https://taguette.communitydata.science/project/{project_id}/highlights/{tag}" + + writer.writerow([str(tag_id), axial, tag, category, description, url])