1
0

updated topic model and combo analysis

This commit is contained in:
Matthew Gaughan 2025-02-03 19:06:13 -06:00
parent 5ab4b58542
commit d931bec7d8
15 changed files with 9338 additions and 9495 deletions

View File

@ -1514,7 +1514,7 @@ bbonev_dhcpdump,bbonev_dhcpdump_commits.csv,bbonev_dhcpdump_hullabaloo_README.md
sphinx-contrib_autoprogram.git,_sphinx-contrib_autoprogram.git_commits.csv,sphinx-contrib_autoprogram.git_hullabaloo_README.rst
elmar_ldap-git-backup,elmar_ldap-git-backup_commits.csv,elmar_ldap-git-backup_hullabaloo_README.md
GNOME_gnome-notes,GNOME_gnome-notes_commits.csv,GNOME_gnome-notes_hullabaloo_README
quodlibet_mutagen.git,quodlibet_mutagen.git_commits.csv,quodlibet_mutagen.git_hullabaloo_README.rst
quodlibet_mutagen.git,quodlibet_mutagen.git_commits.csv,quodlibet_mutagen.git_hullabaloo_README
GNOME_gnome-calculator.git,GNOME_gnome-calculator.git_commits.csv,GNOME_gnome-calculator.git_hullabaloo_README
pulseaudio_pulseaudio.git,pulseaudio_pulseaudio.git_commits.csv,pulseaudio_pulseaudio.git_hullabaloo_README.html.in
rear_rear.git,_rear_rear.git_commits.csv,rear_rear.git_hullabaloo_README
@ -2743,7 +2743,7 @@ jbazik_Archive-Ar.git,_jbazik_Archive-Ar.git_commits.csv,jbazik_Archive-Ar.git_h
davical-project_awl,davical-project_awl_commits.csv,davical-project_awl_hullabaloo_README
phillord_assess.git,_phillord_assess.git_commits.csv,phillord_assess.git_hullabaloo_README.md
public_git_gst123.git,public_git_gst123.git_commits.csv,public_git_gst123.git_hullabaloo_README
pure-data_deken,_pure-data_deken_commits.csv,pure-data_deken_hullabaloo_README.plugin.txt
pure-data_deken,_pure-data_deken_commits.csv,pure-data_deken_hullabaloo_README.md
p_posixovl_posixovl,p_posixovl_posixovl_commits.csv,p_posixovl_posixovl_hullabaloo_README.rst
czerwonk_bird_exporter,_czerwonk_bird_exporter_commits.csv,czerwonk_bird_exporter_hullabaloo_README.md
es128_glob-parent,_es128_glob-parent_commits.csv,es128_glob-parent_hullabaloo_README.md
@ -3341,7 +3341,7 @@ spacetelescope_imexam,_spacetelescope_imexam_commits.csv,spacetelescope_imexam_h
knavalbattle.git,knavalbattle.git_commits.csv,knavalbattle.git_hullabaloo_README
wagtail_django-modelcluster.git,_wagtail_django-modelcluster.git_commits.csv,wagtail_django-modelcluster.git_hullabaloo_README.md
dgedit.git,dgedit.git_commits.csv,dgedit.git_hullabaloo_README
python-babel_flask-babel,_python-babel_flask-babel_commits.csv,python-babel_flask-babel_hullabaloo_README
python-babel_flask-babel,_python-babel_flask-babel_commits.csv,python-babel_flask-babel_hullabaloo_README.md
OpenShot_libopenshot-audio.git,_OpenShot_libopenshot-audio.git_commits.csv,OpenShot_libopenshot-audio.git_hullabaloo_README
sfcgal_SFCGAL,sfcgal_SFCGAL_commits.csv,sfcgal_SFCGAL_hullabaloo_README.md
ekenberg_quotatool,_ekenberg_quotatool_commits.csv,ekenberg_quotatool_hullabaloo_README

1 repo_id commits_filepath fvf_filepath
1514 sphinx-contrib_autoprogram.git _sphinx-contrib_autoprogram.git_commits.csv sphinx-contrib_autoprogram.git_hullabaloo_README.rst
1515 elmar_ldap-git-backup elmar_ldap-git-backup_commits.csv elmar_ldap-git-backup_hullabaloo_README.md
1516 GNOME_gnome-notes GNOME_gnome-notes_commits.csv GNOME_gnome-notes_hullabaloo_README
1517 quodlibet_mutagen.git quodlibet_mutagen.git_commits.csv quodlibet_mutagen.git_hullabaloo_README.rst quodlibet_mutagen.git_hullabaloo_README
1518 GNOME_gnome-calculator.git GNOME_gnome-calculator.git_commits.csv GNOME_gnome-calculator.git_hullabaloo_README
1519 pulseaudio_pulseaudio.git pulseaudio_pulseaudio.git_commits.csv pulseaudio_pulseaudio.git_hullabaloo_README.html.in
1520 rear_rear.git _rear_rear.git_commits.csv rear_rear.git_hullabaloo_README
2743 davical-project_awl davical-project_awl_commits.csv davical-project_awl_hullabaloo_README
2744 phillord_assess.git _phillord_assess.git_commits.csv phillord_assess.git_hullabaloo_README.md
2745 public_git_gst123.git public_git_gst123.git_commits.csv public_git_gst123.git_hullabaloo_README
2746 pure-data_deken _pure-data_deken_commits.csv pure-data_deken_hullabaloo_README.plugin.txt pure-data_deken_hullabaloo_README.md
2747 p_posixovl_posixovl p_posixovl_posixovl_commits.csv p_posixovl_posixovl_hullabaloo_README.rst
2748 czerwonk_bird_exporter _czerwonk_bird_exporter_commits.csv czerwonk_bird_exporter_hullabaloo_README.md
2749 es128_glob-parent _es128_glob-parent_commits.csv es128_glob-parent_hullabaloo_README.md
3341 knavalbattle.git knavalbattle.git_commits.csv knavalbattle.git_hullabaloo_README
3342 wagtail_django-modelcluster.git _wagtail_django-modelcluster.git_commits.csv wagtail_django-modelcluster.git_hullabaloo_README.md
3343 dgedit.git dgedit.git_commits.csv dgedit.git_hullabaloo_README
3344 python-babel_flask-babel _python-babel_flask-babel_commits.csv python-babel_flask-babel_hullabaloo_README python-babel_flask-babel_hullabaloo_README.md
3345 OpenShot_libopenshot-audio.git _OpenShot_libopenshot-audio.git_commits.csv OpenShot_libopenshot-audio.git_hullabaloo_README
3346 sfcgal_SFCGAL sfcgal_SFCGAL_commits.csv sfcgal_SFCGAL_hullabaloo_README.md
3347 ekenberg_quotatool _ekenberg_quotatool_commits.csv ekenberg_quotatool_hullabaloo_README

View File

Can't render this file because it is too large.

File diff suppressed because it is too large Load Diff

Binary file not shown.

File diff suppressed because it is too large Load Diff

Binary file not shown.

View File

@ -314,7 +314,6 @@ rsyslog_libestr.git_hullabaloo_README,77.91,2.9,0.0,0.1,3.0,0.15,10,2
Yubico_yubikey-manager.git_hullabaloo_README,29.75,13.1,14.8,11.0,20.4,10.78,734,113
afewmail_afew_hullabaloo_README,47.42,12.5,28.5,9.78,32.1,47.71,3248,521
astro_node-expat_hullabaloo_README.markdown,62.75,6.6,4.888888888888889,12.27,11.9,8.26,562,77
python-babel_flask-babel_hullabaloo_README,66.4,5.2,2.5,13.36,5.0,0.43,29,5
c-cube_gen.git_hullabaloo_README.md,69.28,6.2,5.5,9.65,14.4,9.86,671,115
douardda_pyramid_multiauth_hullabaloo_README.txt,40.69,13.1,14.5,11.19,32.0,1.91,130,22
craneworks_python-ipcalc_hullabaloo_README.source,37.16,10.3,4.625,17.16,12.3,4.11,280,35
@ -1241,7 +1240,6 @@ git_nano.git_hullabaloo_README,63.8,8.3,13.4,8.6,22.5,38.49,2620,526
PyCQA_prospector_hullabaloo_README.md,55.24,9.5,10.5,17.26,20.0,1.57,107,16
NigelCunningham_pam-MySQL.git_hullabaloo_README,59.8,7.8,7.0,8.02,16.1,139.64,9506,1622
biojava_biojava.git_hullabaloo_README.txt,35.64,10.8,7.375,12.62,12.8,3.92,267,41
pure-data_deken_hullabaloo_README.plugin.txt,53.1,10.3,8.0,8.03,25.5,29.51,2009,343
jfhbrook_pyee_hullabaloo_README.rst,61.93,7.0,5.5,10.06,12.8,24.12,1642,236
GNOME_perl-pango_hullabaloo_README,64.51,8.0,6.75,10.18,20.8,30.72,2091,349
ceres-solver_ceres-solver.git_hullabaloo_README,62.85,6.6,5.75,14.28,11.5,2.12,144,17
@ -1897,7 +1895,6 @@ umanwizard_libeot.git_hullabaloo_README.md,74.86,4.1,2.75,13.36,7.5,0.84,57,10
xhtml2pdf_xhtml2pdf_hullabaloo_README.txt,45.83,9.0,3.7142857142857144,11.77,11.7,13.94,949,129
traviscross_mtr_hullabaloo_README,68.47,6.5,6.75,9.29,16.2,16.61,1131,214
GNOME_libwnck.git_hullabaloo_README,72.53,5.0,3.333333333333333,9.02,11.0,1.65,112,22
quodlibet_mutagen.git_hullabaloo_README.rst,34.83,11.2,6.111111111111111,14.16,15.2,13.72,934,133
rvaser_bioparser_hullabaloo_README.md,36.62,8.4,0.0,19.48,1.0,0.15,10,1
gweis_isodate_hullabaloo_README.txt,64.61,8.0,7.857142857142858,8.78,20.6,32.26,2196,392
webpy_webpy.git_hullabaloo_README.tests,76.82,5.4,6.1,8.63,15.4,4.76,324,57
@ -2461,7 +2458,6 @@ panel-plugins_xfce4-timer-plugin.git_hullabaloo_README,81.7,3.5,2.7,9.71,9.6,4.6
routeKIT_jbzip2.git_hullabaloo_README.md,39.23,11.5,5.6,11.66,20.3,27.01,1839,241
sebastianbergmann_php-token-stream_hullabaloo_README.markdown,35.34,11.0,7.375,10.25,13.7,13.78,938,127
eugmes_fntsample.git_hullabaloo_README.rst,38.38,9.8,4.583333333333333,11.55,9.9,32.39,2205,266
scrapy_cssselect_hullabaloo_README,31.07,12.6,11.142857142857142,10.71,20.1,40.15,2733,356
include-what-you-use_include-what-you-use_hullabaloo_README.txt,65.62,7.6,3.071428571428571,7.1,20.0,214.69,14615,2658
google_brotli_hullabaloo_README,59.8,7.8,6.25,11.76,16.0,1.78,121,23
svgpp_svgpp.git_hullabaloo_README.md,83.32,2.9,2.0,16.52,8.0,0.4,27,5
@ -2997,7 +2993,6 @@ eerimoq_bitstruct_hullabaloo_README.rst,32.6,12.0,16.25,12.71,17.9,28.41,1934,21
ronf_asyncssh_hullabaloo_README,32.6,12.0,7.5,12.19,18.1,10.19,694,106
cryptsetup_cryptsetup_hullabaloo_README,206.84,-15.7,-1.0,0.0,0.0,0.0,0,0
ralovich_antpm_hullabaloo_README,83.46,4.9,8.166666666666666,8.21,18.7,9.28,632,145
scop_bash-completion_hullabaloo_README.md,48.7,10.0,9.0,7.41,20.0,188.65,12842,2114
residuum_PuRestJson.git_hullabaloo_README,46.98,10.6,7.5,10.69,20.6,16.42,1118,189
joaotavora_yasnippet_hullabaloo_README,51.85,8.8,6.375,12.08,15.5,3.86,263,44
astropy_astroquery.git_hullabaloo_README.rst,55.44,9.5,10.333333333333334,7.17,22.1,108.94,7416,1314
@ -4243,7 +4238,6 @@ jpadilla_pyjwt.git_hullabaloo_README.md,41.56,10.6,7.5,11.18,16.6,12.78,870,128
osslugaru_lugaru_hullabaloo_README,61.33,7.2,3.916666666666667,8.43,13.6,152.19,10360,1768
neurodebian_Psychtoolbox-3_hullabaloo_README.rtf,46.98,10.6,11.333333333333334,8.87,22.1,58.52,3984,600
Bioconductor_DelayedArray.git_hullabaloo_README.md,-8.56,19.5,14.75,16.98,26.5,5.6,381,41
mongoengine_flask-mongoengine_hullabaloo_README.md,-7.38,17.0,13.333333333333334,10.18,13.9,54.88,3736,363
howardabrams_node-mocks-http_hullabaloo_README.md,40.14,11.2,10.166666666666666,11.85,18.2,21.01,1430,185
karenetheridge_Module-Manifest_hullabaloo_README,46.67,10.7,11.666666666666666,9.13,22.3,39.82,2711,498
plotly_plotly.R.git_hullabaloo_README.md,27.08,12.1,10.666666666666666,12.19,13.4,46.99,3199,385
1 filename flesch_reading_ease flesch_kincaid_grade linsear_write_formula dale_chall_readability_score mcalpine_eflaw reading_time char_count word_count
314 Yubico_yubikey-manager.git_hullabaloo_README 29.75 13.1 14.8 11.0 20.4 10.78 734 113
315 afewmail_afew_hullabaloo_README 47.42 12.5 28.5 9.78 32.1 47.71 3248 521
316 astro_node-expat_hullabaloo_README.markdown 62.75 6.6 4.888888888888889 12.27 11.9 8.26 562 77
python-babel_flask-babel_hullabaloo_README 66.4 5.2 2.5 13.36 5.0 0.43 29 5
317 c-cube_gen.git_hullabaloo_README.md 69.28 6.2 5.5 9.65 14.4 9.86 671 115
318 douardda_pyramid_multiauth_hullabaloo_README.txt 40.69 13.1 14.5 11.19 32.0 1.91 130 22
319 craneworks_python-ipcalc_hullabaloo_README.source 37.16 10.3 4.625 17.16 12.3 4.11 280 35
1240 PyCQA_prospector_hullabaloo_README.md 55.24 9.5 10.5 17.26 20.0 1.57 107 16
1241 NigelCunningham_pam-MySQL.git_hullabaloo_README 59.8 7.8 7.0 8.02 16.1 139.64 9506 1622
1242 biojava_biojava.git_hullabaloo_README.txt 35.64 10.8 7.375 12.62 12.8 3.92 267 41
pure-data_deken_hullabaloo_README.plugin.txt 53.1 10.3 8.0 8.03 25.5 29.51 2009 343
1243 jfhbrook_pyee_hullabaloo_README.rst 61.93 7.0 5.5 10.06 12.8 24.12 1642 236
1244 GNOME_perl-pango_hullabaloo_README 64.51 8.0 6.75 10.18 20.8 30.72 2091 349
1245 ceres-solver_ceres-solver.git_hullabaloo_README 62.85 6.6 5.75 14.28 11.5 2.12 144 17
1895 xhtml2pdf_xhtml2pdf_hullabaloo_README.txt 45.83 9.0 3.7142857142857144 11.77 11.7 13.94 949 129
1896 traviscross_mtr_hullabaloo_README 68.47 6.5 6.75 9.29 16.2 16.61 1131 214
1897 GNOME_libwnck.git_hullabaloo_README 72.53 5.0 3.333333333333333 9.02 11.0 1.65 112 22
quodlibet_mutagen.git_hullabaloo_README.rst 34.83 11.2 6.111111111111111 14.16 15.2 13.72 934 133
1898 rvaser_bioparser_hullabaloo_README.md 36.62 8.4 0.0 19.48 1.0 0.15 10 1
1899 gweis_isodate_hullabaloo_README.txt 64.61 8.0 7.857142857142858 8.78 20.6 32.26 2196 392
1900 webpy_webpy.git_hullabaloo_README.tests 76.82 5.4 6.1 8.63 15.4 4.76 324 57
2458 routeKIT_jbzip2.git_hullabaloo_README.md 39.23 11.5 5.6 11.66 20.3 27.01 1839 241
2459 sebastianbergmann_php-token-stream_hullabaloo_README.markdown 35.34 11.0 7.375 10.25 13.7 13.78 938 127
2460 eugmes_fntsample.git_hullabaloo_README.rst 38.38 9.8 4.583333333333333 11.55 9.9 32.39 2205 266
scrapy_cssselect_hullabaloo_README 31.07 12.6 11.142857142857142 10.71 20.1 40.15 2733 356
2461 include-what-you-use_include-what-you-use_hullabaloo_README.txt 65.62 7.6 3.071428571428571 7.1 20.0 214.69 14615 2658
2462 google_brotli_hullabaloo_README 59.8 7.8 6.25 11.76 16.0 1.78 121 23
2463 svgpp_svgpp.git_hullabaloo_README.md 83.32 2.9 2.0 16.52 8.0 0.4 27 5
2993 ronf_asyncssh_hullabaloo_README 32.6 12.0 7.5 12.19 18.1 10.19 694 106
2994 cryptsetup_cryptsetup_hullabaloo_README 206.84 -15.7 -1.0 0.0 0.0 0.0 0 0
2995 ralovich_antpm_hullabaloo_README 83.46 4.9 8.166666666666666 8.21 18.7 9.28 632 145
scop_bash-completion_hullabaloo_README.md 48.7 10.0 9.0 7.41 20.0 188.65 12842 2114
2996 residuum_PuRestJson.git_hullabaloo_README 46.98 10.6 7.5 10.69 20.6 16.42 1118 189
2997 joaotavora_yasnippet_hullabaloo_README 51.85 8.8 6.375 12.08 15.5 3.86 263 44
2998 astropy_astroquery.git_hullabaloo_README.rst 55.44 9.5 10.333333333333334 7.17 22.1 108.94 7416 1314
4238 osslugaru_lugaru_hullabaloo_README 61.33 7.2 3.916666666666667 8.43 13.6 152.19 10360 1768
4239 neurodebian_Psychtoolbox-3_hullabaloo_README.rtf 46.98 10.6 11.333333333333334 8.87 22.1 58.52 3984 600
4240 Bioconductor_DelayedArray.git_hullabaloo_README.md -8.56 19.5 14.75 16.98 26.5 5.6 381 41
mongoengine_flask-mongoengine_hullabaloo_README.md -7.38 17.0 13.333333333333334 10.18 13.9 54.88 3736 363
4241 howardabrams_node-mocks-http_hullabaloo_README.md 40.14 11.2 10.166666666666666 11.85 18.2 21.01 1430 185
4242 karenetheridge_Module-Manifest_hullabaloo_README 46.67 10.7 11.666666666666666 9.13 22.3 39.82 2711 498
4243 plotly_plotly.R.git_hullabaloo_README.md 27.08 12.1 10.666666666666666 12.19 13.4 46.99 3199 385

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@ -17,7 +17,7 @@
"outputs": [],
"source": [
"contributing_readability = pd.read_csv(\"020125_CONTRIBUTING_readability.csv\")\n",
"readme_readability = pd.read_csv(\"020125_README_readability.csv\")"
"readme_readability = pd.read_csv(\"020325_README_readability.csv\")"
]
},
{
@ -156,7 +156,7 @@
},
{
"cell_type": "code",
"execution_count": 20,
"execution_count": 3,
"metadata": {},
"outputs": [
{
@ -165,7 +165,7 @@
"214.0"
]
},
"execution_count": 20,
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
@ -176,7 +176,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 4,
"metadata": {},
"outputs": [
{
@ -185,7 +185,7 @@
"9.1"
]
},
"execution_count": 18,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
@ -13,7 +13,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
@ -25,7 +25,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
@ -43,7 +43,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
@ -72,11 +72,11 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"generate_file('020125_README_readability.csv', readme_directory)"
"generate_file('020325_README_readability.csv', readme_directory)"
]
}
],

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 22,
"metadata": {},
"outputs": [],
"source": [
@ -11,27 +11,27 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"merged_manifest = pd.read_csv('0203_readme_merged_manifest.csv')\n",
"topic_distributions = pd.read_csv('020125_README_file_topic_distributions.csv')\n",
"readability_scores = pd.read_csv('020125_README_readability.csv')"
"topic_distributions = pd.read_csv('020325_README_file_topic_distributions.csv')\n",
"readability_scores = pd.read_csv('020325_README_readability.csv')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"4248"
"4247"
]
},
"execution_count": 3,
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
@ -40,8 +40,9 @@
"first_merge = readability_scores.merge(topic_distributions, on=['filename'],how=\"inner\")\n",
"#primary_merge = first_merge.merge(readability_scores, )\n",
"first_merge['fvf_filepath'] = first_merge['filename']\n",
"#len(first_merge)\n",
"second_merge = first_merge.merge(merged_manifest, on=['fvf_filepath'], how=\"inner\")\n",
"len(second_merge)"
"len(second_merge)\n"
]
},
{
@ -276,7 +277,7 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 27,
"metadata": {},
"outputs": [
{
@ -330,11 +331,11 @@
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>55.137558</td>\n",
" <td>52.935</td>\n",
" <td>23.486399</td>\n",
" <td>9.29</td>\n",
" <td>256.988900</td>\n",
" <td>55.089708</td>\n",
" <td>52.900</td>\n",
" <td>23.486886</td>\n",
" <td>9.28</td>\n",
" <td>256.973406</td>\n",
" <td>90.0</td>\n",
" </tr>\n",
" <tr>\n",
@ -354,7 +355,7 @@
" flesch_reading_ease reading_time word_count \\\n",
" mean median mean median mean \n",
"ranef_grouping \n",
"1 55.137558 52.935 23.486399 9.29 256.988900 \n",
"1 55.089708 52.900 23.486886 9.28 256.973406 \n",
"2 46.425909 51.365 22.760642 9.53 249.534759 \n",
"\n",
" \n",
@ -364,7 +365,7 @@
"2 100.0 "
]
},
"execution_count": 7,
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
@ -381,7 +382,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 29,
"metadata": {},
"outputs": [
{
@ -453,31 +454,31 @@
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.035938</td>\n",
" <td>0.082578</td>\n",
" <td>0.096817</td>\n",
" <td>0.156525</td>\n",
" <td>0.111305</td>\n",
" <td>0.094973</td>\n",
" <td>0.068736</td>\n",
" <td>0.11493</td>\n",
" <td>0.061243</td>\n",
" <td>0.097605</td>\n",
" <td>0.079349</td>\n",
" <td>0.095142</td>\n",
" <td>0.101366</td>\n",
" <td>0.084609</td>\n",
" <td>0.127050</td>\n",
" <td>0.024873</td>\n",
" <td>0.060155</td>\n",
" <td>0.088660</td>\n",
" <td>0.136191</td>\n",
" <td>0.060451</td>\n",
" <td>0.134408</td>\n",
" <td>0.087095</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.024329</td>\n",
" <td>0.053934</td>\n",
" <td>0.095806</td>\n",
" <td>0.209661</td>\n",
" <td>0.101464</td>\n",
" <td>0.076628</td>\n",
" <td>0.082048</td>\n",
" <td>0.15226</td>\n",
" <td>0.064241</td>\n",
" <td>0.088526</td>\n",
" <td>0.051103</td>\n",
" <td>0.122912</td>\n",
" <td>0.102389</td>\n",
" <td>0.050782</td>\n",
" <td>0.168774</td>\n",
" <td>0.015752</td>\n",
" <td>0.072154</td>\n",
" <td>0.102839</td>\n",
" <td>0.115994</td>\n",
" <td>0.053674</td>\n",
" <td>0.119278</td>\n",
" <td>0.075450</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
@ -487,17 +488,17 @@
" t0 t1 t2 t3 t4 t5 \\\n",
" mean mean mean mean mean mean \n",
"ranef_grouping \n",
"1 0.035938 0.082578 0.096817 0.156525 0.111305 0.094973 \n",
"2 0.024329 0.053934 0.095806 0.209661 0.101464 0.076628 \n",
"1 0.095142 0.101366 0.084609 0.127050 0.024873 0.060155 \n",
"2 0.122912 0.102389 0.050782 0.168774 0.015752 0.072154 \n",
"\n",
" t6 t7 t8 t9 t10 \n",
" mean mean mean mean mean \n",
"ranef_grouping \n",
"1 0.068736 0.11493 0.061243 0.097605 0.079349 \n",
"2 0.082048 0.15226 0.064241 0.088526 0.051103 "
" t6 t7 t8 t9 t10 \n",
" mean mean mean mean mean \n",
"ranef_grouping \n",
"1 0.088660 0.136191 0.060451 0.134408 0.087095 \n",
"2 0.102839 0.115994 0.053674 0.119278 0.075450 "
]
},
"execution_count": 10,
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}

File diff suppressed because it is too large Load Diff

View File

@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
@ -13,17 +13,17 @@
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"contributing_ranef = pd.read_csv(\"../mlm/data/0201_contributing_dweek_ranefs.csv\")\n",
"readme_ranef = pd.read_csv(\"../mlm/data/0201_readme_dweek_ranefs.csv\")"
"readme_ranef = pd.read_csv(\"../mlm/data/0203_readme_dweek_ranefs.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
@ -33,7 +33,7 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
@ -43,7 +43,7 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 6,
"metadata": {},
"outputs": [
{
@ -197,7 +197,7 @@
"4 0.246845 0.449997 2 658.0 "
]
},
"execution_count": 35,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
@ -240,161 +240,16 @@
},
{
"cell_type": "code",
"execution_count": 37,
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>repo_id</th>\n",
" <th>commits_filepath</th>\n",
" <th>fvf_filepath</th>\n",
" <th>effect</th>\n",
" <th>group</th>\n",
" <th>level</th>\n",
" <th>term</th>\n",
" <th>estimate</th>\n",
" <th>std.error</th>\n",
" <th>conf.low</th>\n",
" <th>conf.high</th>\n",
" <th>ranef_grouping</th>\n",
" <th>rank</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>italiangrid_voms</td>\n",
" <td>_italiangrid_voms_commits.csv</td>\n",
" <td>italiangrid_voms_hullabaloo_README.md</td>\n",
" <td>ran_vals</td>\n",
" <td>project_id</td>\n",
" <td>italiangrid_voms</td>\n",
" <td>before_after:week_index</td>\n",
" <td>-0.014093</td>\n",
" <td>0.638314</td>\n",
" <td>-1.265166</td>\n",
" <td>1.236980</td>\n",
" <td>1</td>\n",
" <td>2294.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>ultrajson_ultrajson</td>\n",
" <td>_ultrajson_ultrajson_commits.csv</td>\n",
" <td>ultrajson_ultrajson_hullabaloo_README</td>\n",
" <td>ran_vals</td>\n",
" <td>project_id</td>\n",
" <td>ultrajson_ultrajson</td>\n",
" <td>before_after:week_index</td>\n",
" <td>0.096429</td>\n",
" <td>0.616497</td>\n",
" <td>-1.111884</td>\n",
" <td>1.304742</td>\n",
" <td>1</td>\n",
" <td>2814.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>swipely_docker-api</td>\n",
" <td>_swipely_docker-api_commits.csv</td>\n",
" <td>swipely_docker-api_hullabaloo_README.md</td>\n",
" <td>ran_vals</td>\n",
" <td>project_id</td>\n",
" <td>swipely_docker-api</td>\n",
" <td>before_after:week_index</td>\n",
" <td>0.084690</td>\n",
" <td>0.612034</td>\n",
" <td>-1.114875</td>\n",
" <td>1.284254</td>\n",
" <td>1</td>\n",
" <td>2760.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>aio-libs_aiomysql.git</td>\n",
" <td>_aio-libs_aiomysql.git_commits.csv</td>\n",
" <td>aio-libs_aiomysql.git_hullabaloo_README</td>\n",
" <td>ran_vals</td>\n",
" <td>project_id</td>\n",
" <td>aio-libs_aiomysql.git</td>\n",
" <td>before_after:week_index</td>\n",
" <td>-0.494687</td>\n",
" <td>0.731528</td>\n",
" <td>-1.928454</td>\n",
" <td>0.939081</td>\n",
" <td>1</td>\n",
" <td>118.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>shekyan_slowhttptest</td>\n",
" <td>_shekyan_slowhttptest_commits.csv</td>\n",
" <td>shekyan_slowhttptest_hullabaloo_README.md</td>\n",
" <td>ran_vals</td>\n",
" <td>project_id</td>\n",
" <td>shekyan_slowhttptest</td>\n",
" <td>before_after:week_index</td>\n",
" <td>-0.335128</td>\n",
" <td>0.704815</td>\n",
" <td>-1.716541</td>\n",
" <td>1.046284</td>\n",
" <td>1</td>\n",
" <td>1027.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" repo_id commits_filepath \\\n",
"0 italiangrid_voms _italiangrid_voms_commits.csv \n",
"1 ultrajson_ultrajson _ultrajson_ultrajson_commits.csv \n",
"2 swipely_docker-api _swipely_docker-api_commits.csv \n",
"3 aio-libs_aiomysql.git _aio-libs_aiomysql.git_commits.csv \n",
"4 shekyan_slowhttptest _shekyan_slowhttptest_commits.csv \n",
"\n",
" fvf_filepath effect group \\\n",
"0 italiangrid_voms_hullabaloo_README.md ran_vals project_id \n",
"1 ultrajson_ultrajson_hullabaloo_README ran_vals project_id \n",
"2 swipely_docker-api_hullabaloo_README.md ran_vals project_id \n",
"3 aio-libs_aiomysql.git_hullabaloo_README ran_vals project_id \n",
"4 shekyan_slowhttptest_hullabaloo_README.md ran_vals project_id \n",
"\n",
" level term estimate std.error \\\n",
"0 italiangrid_voms before_after:week_index -0.014093 0.638314 \n",
"1 ultrajson_ultrajson before_after:week_index 0.096429 0.616497 \n",
"2 swipely_docker-api before_after:week_index 0.084690 0.612034 \n",
"3 aio-libs_aiomysql.git before_after:week_index -0.494687 0.731528 \n",
"4 shekyan_slowhttptest before_after:week_index -0.335128 0.704815 \n",
"\n",
" conf.low conf.high ranef_grouping rank \n",
"0 -1.265166 1.236980 1 2294.0 \n",
"1 -1.111884 1.304742 1 2814.0 \n",
"2 -1.114875 1.284254 1 2760.0 \n",
"3 -1.928454 0.939081 1 118.0 \n",
"4 -1.716541 1.046284 1 1027.0 "
"4247"
]
},
"execution_count": 37,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@ -402,12 +257,12 @@
"source": [
"readme_merged = pd.merge(readme_manifest, readme_ranef, on=['repo_id'], how='inner')\n",
"readme_merged = readme_merged.drop(columns='Unnamed: 0')\n",
"readme_merged.head()"
"len(readme_merged)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [