{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import json\n", "import csv\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "def recursive_comment_parsing(dict_array, subcomment):\n", " # Append the current comment details to the dict_array\n", " dict_array.append({\n", " 'comment_text': subcomment.get('text_blocks', ''),\n", " 'date_created': subcomment.get('time_stamp', ''),\n", " 'Author': subcomment.get('author', '')\n", " })\n", " \n", " # Process nested comments recursively\n", " if 'comments' in subcomment and subcomment['comments']:\n", " for nested_comment in subcomment['comments']:\n", " recursive_comment_parsing(dict_array, nested_comment)\n", " \n", " return dict_array" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "def generate_csv(data):\n", " comments = []\n", " for subsection in data:\n", " if subsection['comments'] != []:\n", " for subcomment in subsection['comments']:\n", " comments = recursive_comment_parsing(comments, subcomment)\n", " df = pd.DataFrame(comments)\n", " return df" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "fp =\"/data/users/mgaughan/mw-repo-lifecycles/discussion_data/visualeditor/ve-feedback-2011-12.json\"\n", "with open(fp, 'r') as file:\n", " data = json.load(file)\n", "data = data['sections']\n", "test_df = generate_csv(data)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | comment_text | \n", "date_created | \n", "Author | \n", "
---|---|---|---|
0 | \n", "[{{talkarchive}}\\n, \\n] | \n", "\n", " | \n", " |
1 | \n", "[== Link editing should work when cursor is on... | \n", "20:38, 13 December 2011 (UTC) | \n", "Dantman | \n", "
2 | \n", "[\\n, :Yes, the little down-arrow with B/F/Link... | \n", "22:16, 13 December 2011 (UTC) | \n", "JakobVoss | \n", "
3 | \n", "[\\n] | \n", "\n", " | \n", " |
4 | \n", "[== Collapse same text links ==\\n, \\n, <small>... | \n", "20:43, 13 December 2011 (UTC) | \n", "Dantman | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
429 | \n", "[\\n] | \n", "\n", " | \n", " |
430 | \n", "[== User Interface ==\\n, \\n, <small>User agent... | \n", "12:02, 30 December 2011 (UTC) | \n", "TJRana | \n", "
431 | \n", "[\\n] | \n", "\n", " | \n", " |
432 | \n", "[== Feedback by users that see no feedback lin... | \n", "\n", " | \n", " |
433 | \n", "[== Usability ==\\n, \\n, It's great that you're... | \n", "\n", " | \n", " |
434 rows × 3 columns
\n", "\n", " |
---|
\n", " | comment_text | \n", "date_created | \n", "Author | \n", "
---|---|---|---|
1 | \n", "[==Parsoid working but getting JSON error==\\n,... | \n", "03:07, 31 May 2015 (UTC) | \n", "Alex Mashin | \n", "
2 | \n", "[\\n, ----\\n, \\n, I find out that when you use ... | \n", "09:30, 20 Jun 2015 (UTC) | \n", "mnrahimi | \n", "
4 | \n", "[==Set up in web server?==\\n, My wiki is on th... | \n", "15:37, 24 June 2015 (UTC) | \n", "Arlolra | \n", "
6 | \n", "[== Multiple wikis ==\\n, Can several Mediawiki... | \n", "18:35, 13 October 2012 (UTC) | \n", "Amire80 | \n", "
7 | \n", "[\\n, : Yes. The Parsoid takes a prefix when th... | \n", "22:34, 15 October 2012 (UTC) | \n", "Jdforrester (WMF) | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
160 | \n", "[== Installation on OS X {{Resolved}} ==\\n, \\n... | \n", "13:53, 31 May 2015 (UTC) | \n", "Dieudo | \n", "
161 | \n", "[\\n, : Resolved by [https://www.mediawiki.org/... | \n", "15:09, 24 June 2015 (UTC) | \n", "Arlolra | \n", "
163 | \n", "[== Error: Cannot find module '/etc/mediawiki/... | \n", "16:35, 29 October 2015 (UTC) | \n", "Krauss | \n", "
164 | \n", "[\\n, : bin/server.js is in parsoid master and ... | \n", "16:37, 29 October 2015 (UTC) | \n", "SSastry (WMF) | \n", "
166 | \n", "[==Can see VisualEditor interface but can't us... | \n", "00:44, 19 March 2016 (UTC) | \n", "2A01:E35:2F5E:A0E0:948F:579B:2435:72DB | \n", "
106 rows × 3 columns
\n", "