From 21ce4f78547c251cb2d22cb1958794684c046a52 Mon Sep 17 00:00:00 2001 From: Matthew Gaughan Date: Tue, 3 Jun 2025 16:47:33 -0500 Subject: [PATCH] misc updates to phabricator cleaning script --- data_cleaning/cleaning_phabricator.ipynb | 26 ++++++++++++------------ 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/data_cleaning/cleaning_phabricator.ipynb b/data_cleaning/cleaning_phabricator.ipynb index c745228..d24f63d 100644 --- a/data_cleaning/cleaning_phabricator.ipynb +++ b/data_cleaning/cleaning_phabricator.ipynb @@ -12,22 +12,22 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "18152" + "4583" ] }, - "execution_count": 20, + "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "fp =\"/data/users/mgaughan/mw-repo-lifecycles/phab_data/http_phab_data/http_09-01-2013_12-01-2014_phab_data.json\"\n", + "fp =\"/data/users/mgaughan/mw-repo-lifecycles/phab_data/visualeditor/2012_1_1_to_2014_12_31.json\"\n", "with open(fp, 'r') as file:\n", " data1 = json.load(file)\n", "len(data1)" @@ -104,11 +104,11 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "data_list = [data1, data3, data4]" + "#data_list = [data1, data3, data4]" ] }, { @@ -21943,7 +21943,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -22077,29 +22077,29 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ - "generate_csv(data, \"0402_https2_phab_comments.csv\")" + "generate_csv(data1, \"060325_c1_ve_phab_comments.csv\")" ] }, { "cell_type": "code", - "execution_count": 25, + "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Minimum date_created: 1378012620\n", - "Maximum date_created: 1745047564\n" + "Minimum date_created: 1325581020\n", + "Maximum date_created: 1735855955\n" ] } ], "source": [ - "df = pd.read_csv('060325_c3_http_phab_comments.csv')\n", + "df = pd.read_csv('060325_c1_ve_phab_comments.csv')\n", "# Convert the 'date_created' column to datetime format\n", "\n", "# Get the minimum and maximum date_created values\n",