updating collection scripts/data
This commit is contained in:
parent
9c7ab02e3d
commit
993bbe658b
917917
src/helper_scripts/cleaning_scripts/0514_https_phab_comments.csv
Normal file
917917
src/helper_scripts/cleaning_scripts/0514_https_phab_comments.csv
Normal file
File diff suppressed because one or more lines are too long
@ -81,16 +81,39 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/plain": [
|
||||||
|
"1760"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 6,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"data_list = [data1, data2, data3]"
|
"fp3 =\"/data/users/mgaughan/mw-repo-lifecycles/phab_data/https2013/http_10-21-2013_12-5-2013_phab_data.json\"\n",
|
||||||
|
"with open(fp3, 'r') as file:\n",
|
||||||
|
" data4 = json.load(file)\n",
|
||||||
|
"len(data4)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 6,
|
"execution_count": 7,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"data_list = [data1, data2, data3, data4]"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 8,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -105,7 +128,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 7,
|
"execution_count": 9,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -154,7 +177,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 8,
|
"execution_count": 10,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -208,11 +231,11 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 9,
|
"execution_count": 11,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"concat_and_save(data_list, '0512_https_phab_comments.csv')"
|
"concat_and_save(data_list, '0514_https_phab_comments.csv')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -226,7 +249,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": 12,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -234,12 +257,12 @@
|
|||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"Minimum date_created: 1314866460\n",
|
"Minimum date_created: 1314866460\n",
|
||||||
"Maximum date_created: 1746664402\n"
|
"Maximum date_created: 1746822176\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"df = pd.read_csv(\"0512_https_phab_comments.csv\")\n",
|
"df = pd.read_csv(\"0514_https_phab_comments.csv\")\n",
|
||||||
"# Convert the 'date_created' column to datetime format\n",
|
"# Convert the 'date_created' column to datetime format\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Get the minimum and maximum date_created values\n",
|
"# Get the minimum and maximum date_created values\n",
|
||||||
|
@ -106,7 +106,7 @@ def query_transactions_phid_task(
|
|||||||
'limit':limit,
|
'limit':limit,
|
||||||
'after':after,
|
'after':after,
|
||||||
}
|
}
|
||||||
response = requests.get( api_url_base, params=params)
|
response = requests.get(api_url_base, params=params)
|
||||||
try:
|
try:
|
||||||
result = json.loads(response.text)['result']
|
result = json.loads(response.text)['result']
|
||||||
data_tmp = result['data']
|
data_tmp = result['data']
|
||||||
@ -179,8 +179,8 @@ if __name__ == "__main__":
|
|||||||
api_base = 'https://phabricator.wikimedia.org/api/'
|
api_base = 'https://phabricator.wikimedia.org/api/'
|
||||||
|
|
||||||
#p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2011, 9, 1, 0, 0, 0)))
|
#p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2011, 9, 1, 0, 0, 0)))
|
||||||
p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2012, 6, 14, 0, 0, 0)))
|
p_ts1 = int(datetime.datetime.timestamp(datetime.datetime(2013, 10, 21, 0, 0, 0)))
|
||||||
p_ts2 = int(datetime.datetime.timestamp(datetime.datetime(2013, 6, 16, 0, 0, 0)))
|
p_ts2 = int(datetime.datetime.timestamp(datetime.datetime(2013, 12, 5, 0, 0, 0)))
|
||||||
|
|
||||||
p_data = query_task_tag(tag, ts1=p_ts1, ts2=p_ts2)
|
p_data = query_task_tag(tag, ts1=p_ts1, ts2=p_ts2)
|
||||||
for entry in p_data:
|
for entry in p_data:
|
||||||
@ -192,7 +192,7 @@ if __name__ == "__main__":
|
|||||||
comments[item['id']] = item['comments']
|
comments[item['id']] = item['comments']
|
||||||
entry['task_comments'] = comments
|
entry['task_comments'] = comments
|
||||||
DATA_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/phab_data/"
|
DATA_PREFIX = "/data/users/mgaughan/mw-repo-lifecycles/phab_data/"
|
||||||
with open(f"{DATA_PREFIX}{tag}_06-14-2012_06-16-2013_phab_data.json", "w") as outfile1:
|
with open(f"{DATA_PREFIX}{tag}_10-21-2013_12-5-2013_phab_data.json", "w") as outfile1:
|
||||||
json.dump(p_data, outfile1)
|
json.dump(p_data, outfile1)
|
||||||
'''
|
'''
|
||||||
user = query_users()
|
user = query_users()
|
||||||
|
Loading…
Reference in New Issue
Block a user