fixing one issue with csv data and updating threshold code
This commit is contained in:
parent
456b6e85cf
commit
f9210a6271
@ -525,7 +525,7 @@ https://github.com/spanezz/django-housekeeping.git,2014-05-14 9:53:17,fe18a0159c
|
||||
https://github.com/spaam/svtplay-dl,2011-10-11 18:30:39,4ae1c73079d97b07201d38ea78012e1d877c2eac,"[0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 6, 0, 0, 1]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 1, 10, 3, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 10, 0, 0, 3, 0, 0, 0, 5, 0, 0, 5, 0]","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,1,0,0,Tue Mar 1 17:42:12 2011 +0100,2011-03-01 11:42:12,4863.470694,224.2836458
|
||||
https://github.com/SoundScapeRenderer/ssr,2013-11-27 16:08:24,282398501961d280e7845ca1c2f4841b62d40b65,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[9, 3, 8, 2, 0, 2, 24, 11, 2, 3, 0, 3, 7, 8, 4, 6, 1, 0, 0, 3, 0, 0, 1, 0, 2, 0, 0]","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",1,1,2,0,Wed Nov 27 17:08:24 2013 +0100,2013-11-27 11:08:24,3861.494167,0.2083333333
|
||||
https://github.com/sorich87/bootstrap-tour,2012-07-03 14:03:22,f8cf8c62981ef84ddc2561f7b2939d1f2a92832e,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[10, 22, 4, 1, 0, 11, 1, 1, 14, 0, 0, 0, 0, 2, 4, 1, 1, 9, 2, 0, 0, 0, 0, 0, 0, 1, 0]","[0, 5, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 1, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0]",1,2,12,0,Tue Jul 3 15:02:56 2012 +0100,2012-07-03 10:02:56,4373.581296,0.1669675926
|
||||
https://github.com/agateau/yokadi/issues/new,2009-02-03 9:41:14,b68abca7bf25451a3ee24d62b3d8b4f7d6e9b81b,"[1, 0, 25, 21, 10, 4, 7, 16, 1, 0, 0, 1, 1, 7, 22, 6, 8, 35, 0, 20, 0, 1, 10, 11, 7, 15, 2]","[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0, 0]","[28, 7, 15, 0, 0, 5, 14, 4, 8, 2, 0, 1, 0, 1, 13, 8, 2, 0, 7, 0, 1, 16, 31, 2, 6, 9, 1]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",4,0,6,2,Sun Feb 1 15:37:33 2009 -0700,2009-02-01 17:37:33,5621.223924,1.669224537
|
||||
https://github.com/agateau/yokadi,2009-02-03 9:41:14,b68abca7bf25451a3ee24d62b3d8b4f7d6e9b81b,"[1, 0, 25, 21, 10, 4, 7, 16, 1, 0, 0, 1, 1, 7, 22, 6, 8, 35, 0, 20, 0, 1, 10, 11, 7, 15, 2]","[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0, 0]","[28, 7, 15, 0, 0, 5, 14, 4, 8, 2, 0, 1, 0, 1, 13, 8, 2, 0, 7, 0, 1, 16, 31, 2, 6, 9, 1]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",4,0,6,2,Sun Feb 1 15:37:33 2009 -0700,2009-02-01 17:37:33,5621.223924,1.669224537
|
||||
https://github.com/somiaj/fvwm2-debian,2021-06-02 0:27:05,629cf2e5fbdc1d3ff8b9c28855ae2656a07be11f,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2,0,0,0,Tue Jun 1 18:27:05 2021 -0600,2021-06-01 20:27:05,1118.147859,0.1666666667
|
||||
https://github.com/solvespace/solvespace/issues/new,2015-07-10 12:59:12,636b20bfa9fae2a40dba6b1b8c1adecfbfc5a9b9,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,Tue Mar 25 02:02:13 2008 -0800,2008-03-25 6:02:13,5934.748461,2663.289572
|
||||
https://github.com/aircrack-ng/mdk4,2018-02-06 8:23:27,218029afb9c0e9ef3a9ddfcf4a91a4c727d32ce1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[7, 0, 1, 7, 1, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1]","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2,1,3,0,Mon Feb 5 01:54:04 2018 +0000,2018-02-04 20:54:04,2331.087454,1.478738426
|
||||
|
Can't render this file because it is too large.
|
@ -14,10 +14,29 @@ def for_readme_files():
|
||||
# https://github.com/opengovernment/opengovernment/blob/master/README.md (14s)
|
||||
# Median 48s
|
||||
readme_ta_df = pd.read_csv(ta_csv_path)
|
||||
threshold_count = readme_ta_df[readme_ta_df['reading_time'] >= 48].shape[0]
|
||||
print(threshold_count)
|
||||
# R8 > 0.125
|
||||
time_threshold_df = readme_ta_df[readme_ta_df['reading_time'] >= 48]
|
||||
# R8 > 0.10 (0.8 * 0.125)
|
||||
readme_topic_df = pd.read_csv(topic_csv_path)
|
||||
topic_threshold_df = readme_topic_df[readme_topic_df['t7'] >= 0.1]
|
||||
readme_exemplar_df = pd.merge(time_threshold_df, topic_threshold_df, on="filename", how="inner")
|
||||
exemplary_files = readme_exemplar_df['filename'].str.split('_').str[:-1].str.join("_")
|
||||
#one manual cleaning
|
||||
exemplary_files = exemplary_files[exemplary_files!= "CheMPS2_README_8md"]
|
||||
exemplary_files = pd.concat([exemplary_files, pd.Series("CheMPS2")])
|
||||
print(len(exemplary_files))
|
||||
#connecting with the timeseries data
|
||||
ts_data_df = pd.read_csv(ld_csv_path)
|
||||
#print(ts_data_df['upstream_vcs_link'].str.split("/").str[-1])
|
||||
test_vec = ts_data_df['upstream_vcs_link'].str.split("/").str[-1]
|
||||
diff_vec = exemplary_files[~exemplary_files.isin(test_vec)]
|
||||
subset_ts_data = ts_data_df[ts_data_df['upstream_vcs_link'].str.split("/").str[-1].isin(exemplary_files)]
|
||||
#if "CheMPS2" in exemplary_files:
|
||||
# subset_ts_data = pd.concat([subset_ts_data, ts_data_df[ts_data_df['upstream_vcs_link'] == "https://github.com/SebWouters/CheMPS2"]])
|
||||
print(subset_ts_data.shape[0])
|
||||
#print("https://github.com/SebWouters/CheMPS2" in subset_ts_data["upstream_vcs_link"])
|
||||
|
||||
#subset_ts_data.to_csv('102724_readme_exemplar_subset.csv', index=False)
|
||||
|
||||
|
||||
def for_contributing_files():
|
||||
ld_csv_path = "final_data/deb_contrib_did.csv"
|
||||
@ -30,10 +49,19 @@ def for_contributing_files():
|
||||
# https://github.com/opengovernment/opengovernment/blob/master/CONTRIBUTING.md (45s)
|
||||
# Median 59s
|
||||
contributing_ta_df = pd.read_csv(ta_csv_path)
|
||||
threshold_count = contributing_ta_df[contributing_ta_df['reading_time'] >= 59].shape[0]
|
||||
print(threshold_count)
|
||||
# and then making sure they're on topic, C4 > 0.25
|
||||
contributing_ta_df = pd.read_csv(topic_csv_path)
|
||||
time_threshold_df = contributing_ta_df[contributing_ta_df['reading_time'] >= 59]
|
||||
# and then making sure they're on topic, C4 > 0.2 (0.8 * 0.25)
|
||||
contributing_topic_df = pd.read_csv(topic_csv_path)
|
||||
topic_threshold_df = contributing_topic_df[contributing_topic_df['t3'] >= 0.2]
|
||||
contributing_exemplar_df = pd.merge(topic_threshold_df, time_threshold_df, on='filename', how='inner')
|
||||
print(contributing_exemplar_df.shape[0])
|
||||
exemplary_files = contributing_exemplar_df['filename'].str.replace("_CONTRIBUTING.md", "")
|
||||
# reading in the data
|
||||
ts_data_df = pd.read_csv(ld_csv_path)
|
||||
subset_ts_data = ts_data_df[ts_data_df['upstream_vcs_link'].str.split("/").str[-1].isin(exemplary_files)]
|
||||
print(subset_ts_data.shape[0])
|
||||
|
||||
#subset_ts_data.to_csv('102724_contrib_exemplar_subset.csv', index=False)
|
||||
|
||||
if __name__ == "__main__":
|
||||
for_contributing_files()
|
||||
|
Loading…
Reference in New Issue
Block a user