diff --git a/final_data/deb_readme_did.csv b/final_data/deb_readme_did.csv
index 4604fea..ab3a065 100644
--- a/final_data/deb_readme_did.csv
+++ b/final_data/deb_readme_did.csv
@@ -525,7 +525,7 @@ https://github.com/spanezz/django-housekeeping.git,2014-05-14 9:53:17,fe18a0159c
 https://github.com/spaam/svtplay-dl,2011-10-11 18:30:39,4ae1c73079d97b07201d38ea78012e1d877c2eac,"[0, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 11, 6, 0, 0, 1]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 1, 10, 3, 2, 2, 0, 0, 2, 0, 0, 0, 0, 0, 10, 0, 0, 3, 0, 0, 0, 5, 0, 0, 5, 0]","[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,1,0,0,Tue Mar 1 17:42:12 2011 +0100,2011-03-01 11:42:12,4863.470694,224.2836458
 https://github.com/SoundScapeRenderer/ssr,2013-11-27 16:08:24,282398501961d280e7845ca1c2f4841b62d40b65,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[9, 3, 8, 2, 0, 2, 24, 11, 2, 3, 0, 3, 7, 8, 4, 6, 1, 0, 0, 3, 0, 0, 1, 0, 2, 0, 0]","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",1,1,2,0,Wed Nov 27 17:08:24 2013 +0100,2013-11-27 11:08:24,3861.494167,0.2083333333
 https://github.com/sorich87/bootstrap-tour,2012-07-03 14:03:22,f8cf8c62981ef84ddc2561f7b2939d1f2a92832e,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[10, 22, 4, 1, 0, 11, 1, 1, 14, 0, 0, 0, 0, 2, 4, 1, 1, 9, 2, 0, 0, 0, 0, 0, 0, 1, 0]","[0, 5, 0, 1, 0, 0, 0, 0, 2, 0, 0, 0, 0, 1, 1, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0]",1,2,12,0,Tue Jul 3 15:02:56 2012 +0100,2012-07-03 10:02:56,4373.581296,0.1669675926
-https://github.com/agateau/yokadi/issues/new,2009-02-03 9:41:14,b68abca7bf25451a3ee24d62b3d8b4f7d6e9b81b,"[1, 0, 25, 21, 10, 4, 7, 16, 1, 0, 0, 1, 1, 7, 22, 6, 8, 35, 0, 20, 0, 1, 10, 11, 7, 15, 2]","[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0, 0]","[28, 7, 15, 0, 0, 5, 14, 4, 8, 2, 0, 1, 0, 1, 13, 8, 2, 0, 7, 0, 1, 16, 31, 2, 6, 9, 1]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",4,0,6,2,Sun Feb 1 15:37:33 2009 -0700,2009-02-01 17:37:33,5621.223924,1.669224537
+https://github.com/agateau/yokadi,2009-02-03 9:41:14,b68abca7bf25451a3ee24d62b3d8b4f7d6e9b81b,"[1, 0, 25, 21, 10, 4, 7, 16, 1, 0, 0, 1, 1, 7, 22, 6, 8, 35, 0, 20, 0, 1, 10, 11, 7, 15, 2]","[0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 4, 0, 0, 0, 0, 1, 0, 0, 0, 0]","[28, 7, 15, 0, 0, 5, 14, 4, 8, 2, 0, 1, 0, 1, 13, 8, 2, 0, 7, 0, 1, 16, 31, 2, 6, 9, 1]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",4,0,6,2,Sun Feb 1 15:37:33 2009 -0700,2009-02-01 17:37:33,5621.223924,1.669224537
 https://github.com/somiaj/fvwm2-debian,2021-06-02 0:27:05,629cf2e5fbdc1d3ff8b9c28855ae2656a07be11f,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2,0,0,0,Tue Jun 1 18:27:05 2021 -0600,2021-06-01 20:27:05,1118.147859,0.1666666667
 https://github.com/solvespace/solvespace/issues/new,2015-07-10 12:59:12,636b20bfa9fae2a40dba6b1b8c1adecfbfc5a9b9,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",0,0,0,0,Tue Mar 25 02:02:13 2008 -0800,2008-03-25 6:02:13,5934.748461,2663.289572
 https://github.com/aircrack-ng/mdk4,2018-02-06 8:23:27,218029afb9c0e9ef3a9ddfcf4a91a4c727d32ce1,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2]","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]","[7, 0, 1, 7, 1, 0, 0, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1]","[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]",2,1,3,0,Mon Feb 5 01:54:04 2018 +0000,2018-02-04 20:54:04,2331.087454,1.478738426
diff --git a/sample_good_subset.py b/sample_good_subset.py
index 27752e2..2858ba2 100644
--- a/sample_good_subset.py
+++ b/sample_good_subset.py
@@ -14,10 +14,29 @@ def for_readme_files():
     # https://github.com/opengovernment/opengovernment/blob/master/README.md (14s)
     # Median 48s
     readme_ta_df = pd.read_csv(ta_csv_path)
-    threshold_count = readme_ta_df[readme_ta_df['reading_time'] >= 48].shape[0]
-    print(threshold_count)
-    # R8 > 0.125
+    time_threshold_df = readme_ta_df[readme_ta_df['reading_time'] >= 48]
+    # R8 > 0.10 (0.8 * 0.125)
     readme_topic_df = pd.read_csv(topic_csv_path)
+    topic_threshold_df = readme_topic_df[readme_topic_df['t7'] >= 0.1]
+    readme_exemplar_df = pd.merge(time_threshold_df, topic_threshold_df, on="filename", how="inner")
+    exemplary_files = readme_exemplar_df['filename'].str.split('_').str[:-1].str.join("_")
+    #one manual cleaning
+    exemplary_files = exemplary_files[exemplary_files!= "CheMPS2_README_8md"]
+    exemplary_files = pd.concat([exemplary_files, pd.Series("CheMPS2")])
+    print(len(exemplary_files))
+    #connecting with the timeseries data
+    ts_data_df = pd.read_csv(ld_csv_path)
+    #print(ts_data_df['upstream_vcs_link'].str.split("/").str[-1])
+    test_vec = ts_data_df['upstream_vcs_link'].str.split("/").str[-1]
+    diff_vec = exemplary_files[~exemplary_files.isin(test_vec)]
+    subset_ts_data = ts_data_df[ts_data_df['upstream_vcs_link'].str.split("/").str[-1].isin(exemplary_files)]
+    #if "CheMPS2" in exemplary_files:
+    #    subset_ts_data = pd.concat([subset_ts_data, ts_data_df[ts_data_df['upstream_vcs_link'] == "https://github.com/SebWouters/CheMPS2"]])
+    print(subset_ts_data.shape[0])
+    #print("https://github.com/SebWouters/CheMPS2" in subset_ts_data["upstream_vcs_link"])
+    
+    #subset_ts_data.to_csv('102724_readme_exemplar_subset.csv', index=False) 
+
 
 def for_contributing_files():
     ld_csv_path = "final_data/deb_contrib_did.csv"
@@ -30,10 +49,19 @@ def for_contributing_files():
     # https://github.com/opengovernment/opengovernment/blob/master/CONTRIBUTING.md (45s)
     # Median 59s
     contributing_ta_df = pd.read_csv(ta_csv_path)
-    threshold_count = contributing_ta_df[contributing_ta_df['reading_time'] >= 59].shape[0]
-    print(threshold_count)
-    # and then making sure they're on topic, C4 > 0.25
-    contributing_ta_df = pd.read_csv(topic_csv_path)
+    time_threshold_df = contributing_ta_df[contributing_ta_df['reading_time'] >= 59]
+    # and then making sure they're on topic, C4 > 0.2 (0.8 * 0.25)
+    contributing_topic_df = pd.read_csv(topic_csv_path)
+    topic_threshold_df = contributing_topic_df[contributing_topic_df['t3'] >= 0.2]
+    contributing_exemplar_df = pd.merge(topic_threshold_df, time_threshold_df, on='filename', how='inner')
+    print(contributing_exemplar_df.shape[0])
+    exemplary_files = contributing_exemplar_df['filename'].str.replace("_CONTRIBUTING.md", "")
+    # reading in the data
+    ts_data_df = pd.read_csv(ld_csv_path)
+    subset_ts_data = ts_data_df[ts_data_df['upstream_vcs_link'].str.split("/").str[-1].isin(exemplary_files)]
+    print(subset_ts_data.shape[0])
+
+    #subset_ts_data.to_csv('102724_contrib_exemplar_subset.csv', index=False) 
 
 if __name__ == "__main__":
     for_contributing_files()