subsetting data set for stuff
This commit is contained in:
parent
5bc4003a99
commit
456b6e85cf
@ -5,17 +5,36 @@ import pandas as pd
|
|||||||
|
|
||||||
def for_readme_files():
|
def for_readme_files():
|
||||||
ld_csv_path = "final_data/deb_readme_did.csv"
|
ld_csv_path = "final_data/deb_readme_did.csv"
|
||||||
ta_csv_path = "d_readability_readme.csv"
|
ta_csv_path = "text_analysis/d_readability_readme.csv"
|
||||||
topic_csv_path = "text_analysis/readme_file_topic_distributions.csv"
|
topic_csv_path = "text_analysis/readme_file_topic_distributions.csv"
|
||||||
# criteria for good readme
|
# criteria for good readme
|
||||||
# longer than half of a pageview
|
# mean of three examples listed on the contributing page
|
||||||
|
# https://github.com/rails/rails/blob/main/README.md (71s)
|
||||||
|
# https://github.com/github/docs/blob/main/README.md (48s)
|
||||||
|
# https://github.com/opengovernment/opengovernment/blob/master/README.md (14s)
|
||||||
|
# Median 48s
|
||||||
|
readme_ta_df = pd.read_csv(ta_csv_path)
|
||||||
|
threshold_count = readme_ta_df[readme_ta_df['reading_time'] >= 48].shape[0]
|
||||||
|
print(threshold_count)
|
||||||
|
# R8 > 0.125
|
||||||
|
readme_topic_df = pd.read_csv(topic_csv_path)
|
||||||
|
|
||||||
def for_contributing_files():
|
def for_contributing_files():
|
||||||
ld_csv_path = "final_data/deb_contrib_did.csv"
|
ld_csv_path = "final_data/deb_contrib_did.csv"
|
||||||
ta_csv_path = "d_readability_contrib.csv"
|
ta_csv_path = "text_analysis/d_readability_contributing.csv"
|
||||||
topic_csv_path = "text_analysis/contrib_file_topic_distributions.csv"
|
topic_csv_path = "text_analysis/contrib_file_topic_distributions.csv"
|
||||||
# criteria for good contributing
|
# criteria for good contributing
|
||||||
# longer than half of a pageview
|
# mean of three examples listed on https://docs.github.com/en/communities/setting-up-your-project-for-healthy-contributions/setting-guidelines-for-repository-contributors
|
||||||
|
# https://github.com/rails/rails/blob/main/CONTRIBUTING.md (116s)
|
||||||
|
# https://github.com/github/docs/blob/main/.github/CONTRIBUTING.md (59s)
|
||||||
|
# https://github.com/opengovernment/opengovernment/blob/master/CONTRIBUTING.md (45s)
|
||||||
|
# Median 59s
|
||||||
|
contributing_ta_df = pd.read_csv(ta_csv_path)
|
||||||
|
threshold_count = contributing_ta_df[contributing_ta_df['reading_time'] >= 59].shape[0]
|
||||||
|
print(threshold_count)
|
||||||
|
# and then making sure they're on topic, C4 > 0.25
|
||||||
|
contributing_ta_df = pd.read_csv(topic_csv_path)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
for_contributing_files()
|
||||||
|
for_readme_files()
|
Loading…
Reference in New Issue
Block a user