updated topic model and combo analysis
This commit is contained in:
parent
5ab4b58542
commit
d931bec7d8
@ -1514,7 +1514,7 @@ bbonev_dhcpdump,bbonev_dhcpdump_commits.csv,bbonev_dhcpdump_hullabaloo_README.md
|
|||||||
sphinx-contrib_autoprogram.git,_sphinx-contrib_autoprogram.git_commits.csv,sphinx-contrib_autoprogram.git_hullabaloo_README.rst
|
sphinx-contrib_autoprogram.git,_sphinx-contrib_autoprogram.git_commits.csv,sphinx-contrib_autoprogram.git_hullabaloo_README.rst
|
||||||
elmar_ldap-git-backup,elmar_ldap-git-backup_commits.csv,elmar_ldap-git-backup_hullabaloo_README.md
|
elmar_ldap-git-backup,elmar_ldap-git-backup_commits.csv,elmar_ldap-git-backup_hullabaloo_README.md
|
||||||
GNOME_gnome-notes,GNOME_gnome-notes_commits.csv,GNOME_gnome-notes_hullabaloo_README
|
GNOME_gnome-notes,GNOME_gnome-notes_commits.csv,GNOME_gnome-notes_hullabaloo_README
|
||||||
quodlibet_mutagen.git,quodlibet_mutagen.git_commits.csv,quodlibet_mutagen.git_hullabaloo_README.rst
|
quodlibet_mutagen.git,quodlibet_mutagen.git_commits.csv,quodlibet_mutagen.git_hullabaloo_README
|
||||||
GNOME_gnome-calculator.git,GNOME_gnome-calculator.git_commits.csv,GNOME_gnome-calculator.git_hullabaloo_README
|
GNOME_gnome-calculator.git,GNOME_gnome-calculator.git_commits.csv,GNOME_gnome-calculator.git_hullabaloo_README
|
||||||
pulseaudio_pulseaudio.git,pulseaudio_pulseaudio.git_commits.csv,pulseaudio_pulseaudio.git_hullabaloo_README.html.in
|
pulseaudio_pulseaudio.git,pulseaudio_pulseaudio.git_commits.csv,pulseaudio_pulseaudio.git_hullabaloo_README.html.in
|
||||||
rear_rear.git,_rear_rear.git_commits.csv,rear_rear.git_hullabaloo_README
|
rear_rear.git,_rear_rear.git_commits.csv,rear_rear.git_hullabaloo_README
|
||||||
@ -2743,7 +2743,7 @@ jbazik_Archive-Ar.git,_jbazik_Archive-Ar.git_commits.csv,jbazik_Archive-Ar.git_h
|
|||||||
davical-project_awl,davical-project_awl_commits.csv,davical-project_awl_hullabaloo_README
|
davical-project_awl,davical-project_awl_commits.csv,davical-project_awl_hullabaloo_README
|
||||||
phillord_assess.git,_phillord_assess.git_commits.csv,phillord_assess.git_hullabaloo_README.md
|
phillord_assess.git,_phillord_assess.git_commits.csv,phillord_assess.git_hullabaloo_README.md
|
||||||
public_git_gst123.git,public_git_gst123.git_commits.csv,public_git_gst123.git_hullabaloo_README
|
public_git_gst123.git,public_git_gst123.git_commits.csv,public_git_gst123.git_hullabaloo_README
|
||||||
pure-data_deken,_pure-data_deken_commits.csv,pure-data_deken_hullabaloo_README.plugin.txt
|
pure-data_deken,_pure-data_deken_commits.csv,pure-data_deken_hullabaloo_README.md
|
||||||
p_posixovl_posixovl,p_posixovl_posixovl_commits.csv,p_posixovl_posixovl_hullabaloo_README.rst
|
p_posixovl_posixovl,p_posixovl_posixovl_commits.csv,p_posixovl_posixovl_hullabaloo_README.rst
|
||||||
czerwonk_bird_exporter,_czerwonk_bird_exporter_commits.csv,czerwonk_bird_exporter_hullabaloo_README.md
|
czerwonk_bird_exporter,_czerwonk_bird_exporter_commits.csv,czerwonk_bird_exporter_hullabaloo_README.md
|
||||||
es128_glob-parent,_es128_glob-parent_commits.csv,es128_glob-parent_hullabaloo_README.md
|
es128_glob-parent,_es128_glob-parent_commits.csv,es128_glob-parent_hullabaloo_README.md
|
||||||
@ -3341,7 +3341,7 @@ spacetelescope_imexam,_spacetelescope_imexam_commits.csv,spacetelescope_imexam_h
|
|||||||
knavalbattle.git,knavalbattle.git_commits.csv,knavalbattle.git_hullabaloo_README
|
knavalbattle.git,knavalbattle.git_commits.csv,knavalbattle.git_hullabaloo_README
|
||||||
wagtail_django-modelcluster.git,_wagtail_django-modelcluster.git_commits.csv,wagtail_django-modelcluster.git_hullabaloo_README.md
|
wagtail_django-modelcluster.git,_wagtail_django-modelcluster.git_commits.csv,wagtail_django-modelcluster.git_hullabaloo_README.md
|
||||||
dgedit.git,dgedit.git_commits.csv,dgedit.git_hullabaloo_README
|
dgedit.git,dgedit.git_commits.csv,dgedit.git_hullabaloo_README
|
||||||
python-babel_flask-babel,_python-babel_flask-babel_commits.csv,python-babel_flask-babel_hullabaloo_README
|
python-babel_flask-babel,_python-babel_flask-babel_commits.csv,python-babel_flask-babel_hullabaloo_README.md
|
||||||
OpenShot_libopenshot-audio.git,_OpenShot_libopenshot-audio.git_commits.csv,OpenShot_libopenshot-audio.git_hullabaloo_README
|
OpenShot_libopenshot-audio.git,_OpenShot_libopenshot-audio.git_commits.csv,OpenShot_libopenshot-audio.git_hullabaloo_README
|
||||||
sfcgal_SFCGAL,sfcgal_SFCGAL_commits.csv,sfcgal_SFCGAL_hullabaloo_README.md
|
sfcgal_SFCGAL,sfcgal_SFCGAL_commits.csv,sfcgal_SFCGAL_hullabaloo_README.md
|
||||||
ekenberg_quotatool,_ekenberg_quotatool_commits.csv,ekenberg_quotatool_hullabaloo_README
|
ekenberg_quotatool,_ekenberg_quotatool_commits.csv,ekenberg_quotatool_hullabaloo_README
|
||||||
|
|
Can't render this file because it is too large.
|
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
4248
text_analysis/020325_README_file_topic_distributions.csv
Normal file
4248
text_analysis/020325_README_file_topic_distributions.csv
Normal file
File diff suppressed because it is too large
Load Diff
BIN
text_analysis/020325_README_lda.jl
Normal file
BIN
text_analysis/020325_README_lda.jl
Normal file
Binary file not shown.
@ -314,7 +314,6 @@ rsyslog_libestr.git_hullabaloo_README,77.91,2.9,0.0,0.1,3.0,0.15,10,2
|
|||||||
Yubico_yubikey-manager.git_hullabaloo_README,29.75,13.1,14.8,11.0,20.4,10.78,734,113
|
Yubico_yubikey-manager.git_hullabaloo_README,29.75,13.1,14.8,11.0,20.4,10.78,734,113
|
||||||
afewmail_afew_hullabaloo_README,47.42,12.5,28.5,9.78,32.1,47.71,3248,521
|
afewmail_afew_hullabaloo_README,47.42,12.5,28.5,9.78,32.1,47.71,3248,521
|
||||||
astro_node-expat_hullabaloo_README.markdown,62.75,6.6,4.888888888888889,12.27,11.9,8.26,562,77
|
astro_node-expat_hullabaloo_README.markdown,62.75,6.6,4.888888888888889,12.27,11.9,8.26,562,77
|
||||||
python-babel_flask-babel_hullabaloo_README,66.4,5.2,2.5,13.36,5.0,0.43,29,5
|
|
||||||
c-cube_gen.git_hullabaloo_README.md,69.28,6.2,5.5,9.65,14.4,9.86,671,115
|
c-cube_gen.git_hullabaloo_README.md,69.28,6.2,5.5,9.65,14.4,9.86,671,115
|
||||||
douardda_pyramid_multiauth_hullabaloo_README.txt,40.69,13.1,14.5,11.19,32.0,1.91,130,22
|
douardda_pyramid_multiauth_hullabaloo_README.txt,40.69,13.1,14.5,11.19,32.0,1.91,130,22
|
||||||
craneworks_python-ipcalc_hullabaloo_README.source,37.16,10.3,4.625,17.16,12.3,4.11,280,35
|
craneworks_python-ipcalc_hullabaloo_README.source,37.16,10.3,4.625,17.16,12.3,4.11,280,35
|
||||||
@ -1241,7 +1240,6 @@ git_nano.git_hullabaloo_README,63.8,8.3,13.4,8.6,22.5,38.49,2620,526
|
|||||||
PyCQA_prospector_hullabaloo_README.md,55.24,9.5,10.5,17.26,20.0,1.57,107,16
|
PyCQA_prospector_hullabaloo_README.md,55.24,9.5,10.5,17.26,20.0,1.57,107,16
|
||||||
NigelCunningham_pam-MySQL.git_hullabaloo_README,59.8,7.8,7.0,8.02,16.1,139.64,9506,1622
|
NigelCunningham_pam-MySQL.git_hullabaloo_README,59.8,7.8,7.0,8.02,16.1,139.64,9506,1622
|
||||||
biojava_biojava.git_hullabaloo_README.txt,35.64,10.8,7.375,12.62,12.8,3.92,267,41
|
biojava_biojava.git_hullabaloo_README.txt,35.64,10.8,7.375,12.62,12.8,3.92,267,41
|
||||||
pure-data_deken_hullabaloo_README.plugin.txt,53.1,10.3,8.0,8.03,25.5,29.51,2009,343
|
|
||||||
jfhbrook_pyee_hullabaloo_README.rst,61.93,7.0,5.5,10.06,12.8,24.12,1642,236
|
jfhbrook_pyee_hullabaloo_README.rst,61.93,7.0,5.5,10.06,12.8,24.12,1642,236
|
||||||
GNOME_perl-pango_hullabaloo_README,64.51,8.0,6.75,10.18,20.8,30.72,2091,349
|
GNOME_perl-pango_hullabaloo_README,64.51,8.0,6.75,10.18,20.8,30.72,2091,349
|
||||||
ceres-solver_ceres-solver.git_hullabaloo_README,62.85,6.6,5.75,14.28,11.5,2.12,144,17
|
ceres-solver_ceres-solver.git_hullabaloo_README,62.85,6.6,5.75,14.28,11.5,2.12,144,17
|
||||||
@ -1897,7 +1895,6 @@ umanwizard_libeot.git_hullabaloo_README.md,74.86,4.1,2.75,13.36,7.5,0.84,57,10
|
|||||||
xhtml2pdf_xhtml2pdf_hullabaloo_README.txt,45.83,9.0,3.7142857142857144,11.77,11.7,13.94,949,129
|
xhtml2pdf_xhtml2pdf_hullabaloo_README.txt,45.83,9.0,3.7142857142857144,11.77,11.7,13.94,949,129
|
||||||
traviscross_mtr_hullabaloo_README,68.47,6.5,6.75,9.29,16.2,16.61,1131,214
|
traviscross_mtr_hullabaloo_README,68.47,6.5,6.75,9.29,16.2,16.61,1131,214
|
||||||
GNOME_libwnck.git_hullabaloo_README,72.53,5.0,3.333333333333333,9.02,11.0,1.65,112,22
|
GNOME_libwnck.git_hullabaloo_README,72.53,5.0,3.333333333333333,9.02,11.0,1.65,112,22
|
||||||
quodlibet_mutagen.git_hullabaloo_README.rst,34.83,11.2,6.111111111111111,14.16,15.2,13.72,934,133
|
|
||||||
rvaser_bioparser_hullabaloo_README.md,36.62,8.4,0.0,19.48,1.0,0.15,10,1
|
rvaser_bioparser_hullabaloo_README.md,36.62,8.4,0.0,19.48,1.0,0.15,10,1
|
||||||
gweis_isodate_hullabaloo_README.txt,64.61,8.0,7.857142857142858,8.78,20.6,32.26,2196,392
|
gweis_isodate_hullabaloo_README.txt,64.61,8.0,7.857142857142858,8.78,20.6,32.26,2196,392
|
||||||
webpy_webpy.git_hullabaloo_README.tests,76.82,5.4,6.1,8.63,15.4,4.76,324,57
|
webpy_webpy.git_hullabaloo_README.tests,76.82,5.4,6.1,8.63,15.4,4.76,324,57
|
||||||
@ -2461,7 +2458,6 @@ panel-plugins_xfce4-timer-plugin.git_hullabaloo_README,81.7,3.5,2.7,9.71,9.6,4.6
|
|||||||
routeKIT_jbzip2.git_hullabaloo_README.md,39.23,11.5,5.6,11.66,20.3,27.01,1839,241
|
routeKIT_jbzip2.git_hullabaloo_README.md,39.23,11.5,5.6,11.66,20.3,27.01,1839,241
|
||||||
sebastianbergmann_php-token-stream_hullabaloo_README.markdown,35.34,11.0,7.375,10.25,13.7,13.78,938,127
|
sebastianbergmann_php-token-stream_hullabaloo_README.markdown,35.34,11.0,7.375,10.25,13.7,13.78,938,127
|
||||||
eugmes_fntsample.git_hullabaloo_README.rst,38.38,9.8,4.583333333333333,11.55,9.9,32.39,2205,266
|
eugmes_fntsample.git_hullabaloo_README.rst,38.38,9.8,4.583333333333333,11.55,9.9,32.39,2205,266
|
||||||
scrapy_cssselect_hullabaloo_README,31.07,12.6,11.142857142857142,10.71,20.1,40.15,2733,356
|
|
||||||
include-what-you-use_include-what-you-use_hullabaloo_README.txt,65.62,7.6,3.071428571428571,7.1,20.0,214.69,14615,2658
|
include-what-you-use_include-what-you-use_hullabaloo_README.txt,65.62,7.6,3.071428571428571,7.1,20.0,214.69,14615,2658
|
||||||
google_brotli_hullabaloo_README,59.8,7.8,6.25,11.76,16.0,1.78,121,23
|
google_brotli_hullabaloo_README,59.8,7.8,6.25,11.76,16.0,1.78,121,23
|
||||||
svgpp_svgpp.git_hullabaloo_README.md,83.32,2.9,2.0,16.52,8.0,0.4,27,5
|
svgpp_svgpp.git_hullabaloo_README.md,83.32,2.9,2.0,16.52,8.0,0.4,27,5
|
||||||
@ -2997,7 +2993,6 @@ eerimoq_bitstruct_hullabaloo_README.rst,32.6,12.0,16.25,12.71,17.9,28.41,1934,21
|
|||||||
ronf_asyncssh_hullabaloo_README,32.6,12.0,7.5,12.19,18.1,10.19,694,106
|
ronf_asyncssh_hullabaloo_README,32.6,12.0,7.5,12.19,18.1,10.19,694,106
|
||||||
cryptsetup_cryptsetup_hullabaloo_README,206.84,-15.7,-1.0,0.0,0.0,0.0,0,0
|
cryptsetup_cryptsetup_hullabaloo_README,206.84,-15.7,-1.0,0.0,0.0,0.0,0,0
|
||||||
ralovich_antpm_hullabaloo_README,83.46,4.9,8.166666666666666,8.21,18.7,9.28,632,145
|
ralovich_antpm_hullabaloo_README,83.46,4.9,8.166666666666666,8.21,18.7,9.28,632,145
|
||||||
scop_bash-completion_hullabaloo_README.md,48.7,10.0,9.0,7.41,20.0,188.65,12842,2114
|
|
||||||
residuum_PuRestJson.git_hullabaloo_README,46.98,10.6,7.5,10.69,20.6,16.42,1118,189
|
residuum_PuRestJson.git_hullabaloo_README,46.98,10.6,7.5,10.69,20.6,16.42,1118,189
|
||||||
joaotavora_yasnippet_hullabaloo_README,51.85,8.8,6.375,12.08,15.5,3.86,263,44
|
joaotavora_yasnippet_hullabaloo_README,51.85,8.8,6.375,12.08,15.5,3.86,263,44
|
||||||
astropy_astroquery.git_hullabaloo_README.rst,55.44,9.5,10.333333333333334,7.17,22.1,108.94,7416,1314
|
astropy_astroquery.git_hullabaloo_README.rst,55.44,9.5,10.333333333333334,7.17,22.1,108.94,7416,1314
|
||||||
@ -4243,7 +4238,6 @@ jpadilla_pyjwt.git_hullabaloo_README.md,41.56,10.6,7.5,11.18,16.6,12.78,870,128
|
|||||||
osslugaru_lugaru_hullabaloo_README,61.33,7.2,3.916666666666667,8.43,13.6,152.19,10360,1768
|
osslugaru_lugaru_hullabaloo_README,61.33,7.2,3.916666666666667,8.43,13.6,152.19,10360,1768
|
||||||
neurodebian_Psychtoolbox-3_hullabaloo_README.rtf,46.98,10.6,11.333333333333334,8.87,22.1,58.52,3984,600
|
neurodebian_Psychtoolbox-3_hullabaloo_README.rtf,46.98,10.6,11.333333333333334,8.87,22.1,58.52,3984,600
|
||||||
Bioconductor_DelayedArray.git_hullabaloo_README.md,-8.56,19.5,14.75,16.98,26.5,5.6,381,41
|
Bioconductor_DelayedArray.git_hullabaloo_README.md,-8.56,19.5,14.75,16.98,26.5,5.6,381,41
|
||||||
mongoengine_flask-mongoengine_hullabaloo_README.md,-7.38,17.0,13.333333333333334,10.18,13.9,54.88,3736,363
|
|
||||||
howardabrams_node-mocks-http_hullabaloo_README.md,40.14,11.2,10.166666666666666,11.85,18.2,21.01,1430,185
|
howardabrams_node-mocks-http_hullabaloo_README.md,40.14,11.2,10.166666666666666,11.85,18.2,21.01,1430,185
|
||||||
karenetheridge_Module-Manifest_hullabaloo_README,46.67,10.7,11.666666666666666,9.13,22.3,39.82,2711,498
|
karenetheridge_Module-Manifest_hullabaloo_README,46.67,10.7,11.666666666666666,9.13,22.3,39.82,2711,498
|
||||||
plotly_plotly.R.git_hullabaloo_README.md,27.08,12.1,10.666666666666666,12.19,13.4,46.99,3199,385
|
plotly_plotly.R.git_hullabaloo_README.md,27.08,12.1,10.666666666666666,12.19,13.4,46.99,3199,385
|
|
BIN
text_analysis/020325_README_vectorizer.joblib
Normal file
BIN
text_analysis/020325_README_vectorizer.joblib
Normal file
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 5,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -17,7 +17,7 @@
|
|||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"contributing_readability = pd.read_csv(\"020125_CONTRIBUTING_readability.csv\")\n",
|
"contributing_readability = pd.read_csv(\"020125_CONTRIBUTING_readability.csv\")\n",
|
||||||
"readme_readability = pd.read_csv(\"020125_README_readability.csv\")"
|
"readme_readability = pd.read_csv(\"020325_README_readability.csv\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -156,7 +156,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 20,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -165,7 +165,7 @@
|
|||||||
"214.0"
|
"214.0"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 20,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -176,7 +176,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 18,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -185,7 +185,7 @@
|
|||||||
"9.1"
|
"9.1"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 18,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 1,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -13,7 +13,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 2,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -25,7 +25,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 4,
|
"execution_count": 3,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -43,7 +43,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 8,
|
"execution_count": 4,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -72,11 +72,11 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 9,
|
"execution_count": 5,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"generate_file('020125_README_readability.csv', readme_directory)"
|
"generate_file('020325_README_readability.csv', readme_directory)"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 1,
|
"execution_count": 22,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -11,27 +11,27 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 2,
|
"execution_count": 23,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"merged_manifest = pd.read_csv('0203_readme_merged_manifest.csv')\n",
|
"merged_manifest = pd.read_csv('0203_readme_merged_manifest.csv')\n",
|
||||||
"topic_distributions = pd.read_csv('020125_README_file_topic_distributions.csv')\n",
|
"topic_distributions = pd.read_csv('020325_README_file_topic_distributions.csv')\n",
|
||||||
"readability_scores = pd.read_csv('020125_README_readability.csv')"
|
"readability_scores = pd.read_csv('020325_README_readability.csv')"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 3,
|
"execution_count": 26,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
"4248"
|
"4247"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 3,
|
"execution_count": 26,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -40,8 +40,9 @@
|
|||||||
"first_merge = readability_scores.merge(topic_distributions, on=['filename'],how=\"inner\")\n",
|
"first_merge = readability_scores.merge(topic_distributions, on=['filename'],how=\"inner\")\n",
|
||||||
"#primary_merge = first_merge.merge(readability_scores, )\n",
|
"#primary_merge = first_merge.merge(readability_scores, )\n",
|
||||||
"first_merge['fvf_filepath'] = first_merge['filename']\n",
|
"first_merge['fvf_filepath'] = first_merge['filename']\n",
|
||||||
|
"#len(first_merge)\n",
|
||||||
"second_merge = first_merge.merge(merged_manifest, on=['fvf_filepath'], how=\"inner\")\n",
|
"second_merge = first_merge.merge(merged_manifest, on=['fvf_filepath'], how=\"inner\")\n",
|
||||||
"len(second_merge)"
|
"len(second_merge)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -276,7 +277,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": 27,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -330,11 +331,11 @@
|
|||||||
" <tbody>\n",
|
" <tbody>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>1</th>\n",
|
" <th>1</th>\n",
|
||||||
" <td>55.137558</td>\n",
|
" <td>55.089708</td>\n",
|
||||||
" <td>52.935</td>\n",
|
" <td>52.900</td>\n",
|
||||||
" <td>23.486399</td>\n",
|
" <td>23.486886</td>\n",
|
||||||
" <td>9.29</td>\n",
|
" <td>9.28</td>\n",
|
||||||
" <td>256.988900</td>\n",
|
" <td>256.973406</td>\n",
|
||||||
" <td>90.0</td>\n",
|
" <td>90.0</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
@ -354,7 +355,7 @@
|
|||||||
" flesch_reading_ease reading_time word_count \\\n",
|
" flesch_reading_ease reading_time word_count \\\n",
|
||||||
" mean median mean median mean \n",
|
" mean median mean median mean \n",
|
||||||
"ranef_grouping \n",
|
"ranef_grouping \n",
|
||||||
"1 55.137558 52.935 23.486399 9.29 256.988900 \n",
|
"1 55.089708 52.900 23.486886 9.28 256.973406 \n",
|
||||||
"2 46.425909 51.365 22.760642 9.53 249.534759 \n",
|
"2 46.425909 51.365 22.760642 9.53 249.534759 \n",
|
||||||
"\n",
|
"\n",
|
||||||
" \n",
|
" \n",
|
||||||
@ -364,7 +365,7 @@
|
|||||||
"2 100.0 "
|
"2 100.0 "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 7,
|
"execution_count": 27,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -381,7 +382,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 10,
|
"execution_count": 29,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -453,31 +454,31 @@
|
|||||||
" <tbody>\n",
|
" <tbody>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>1</th>\n",
|
" <th>1</th>\n",
|
||||||
" <td>0.035938</td>\n",
|
" <td>0.095142</td>\n",
|
||||||
" <td>0.082578</td>\n",
|
" <td>0.101366</td>\n",
|
||||||
" <td>0.096817</td>\n",
|
" <td>0.084609</td>\n",
|
||||||
" <td>0.156525</td>\n",
|
" <td>0.127050</td>\n",
|
||||||
" <td>0.111305</td>\n",
|
" <td>0.024873</td>\n",
|
||||||
" <td>0.094973</td>\n",
|
" <td>0.060155</td>\n",
|
||||||
" <td>0.068736</td>\n",
|
" <td>0.088660</td>\n",
|
||||||
" <td>0.11493</td>\n",
|
" <td>0.136191</td>\n",
|
||||||
" <td>0.061243</td>\n",
|
" <td>0.060451</td>\n",
|
||||||
" <td>0.097605</td>\n",
|
" <td>0.134408</td>\n",
|
||||||
" <td>0.079349</td>\n",
|
" <td>0.087095</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" <tr>\n",
|
" <tr>\n",
|
||||||
" <th>2</th>\n",
|
" <th>2</th>\n",
|
||||||
" <td>0.024329</td>\n",
|
" <td>0.122912</td>\n",
|
||||||
" <td>0.053934</td>\n",
|
" <td>0.102389</td>\n",
|
||||||
" <td>0.095806</td>\n",
|
" <td>0.050782</td>\n",
|
||||||
" <td>0.209661</td>\n",
|
" <td>0.168774</td>\n",
|
||||||
" <td>0.101464</td>\n",
|
" <td>0.015752</td>\n",
|
||||||
" <td>0.076628</td>\n",
|
" <td>0.072154</td>\n",
|
||||||
" <td>0.082048</td>\n",
|
" <td>0.102839</td>\n",
|
||||||
" <td>0.15226</td>\n",
|
" <td>0.115994</td>\n",
|
||||||
" <td>0.064241</td>\n",
|
" <td>0.053674</td>\n",
|
||||||
" <td>0.088526</td>\n",
|
" <td>0.119278</td>\n",
|
||||||
" <td>0.051103</td>\n",
|
" <td>0.075450</td>\n",
|
||||||
" </tr>\n",
|
" </tr>\n",
|
||||||
" </tbody>\n",
|
" </tbody>\n",
|
||||||
"</table>\n",
|
"</table>\n",
|
||||||
@ -487,17 +488,17 @@
|
|||||||
" t0 t1 t2 t3 t4 t5 \\\n",
|
" t0 t1 t2 t3 t4 t5 \\\n",
|
||||||
" mean mean mean mean mean mean \n",
|
" mean mean mean mean mean mean \n",
|
||||||
"ranef_grouping \n",
|
"ranef_grouping \n",
|
||||||
"1 0.035938 0.082578 0.096817 0.156525 0.111305 0.094973 \n",
|
"1 0.095142 0.101366 0.084609 0.127050 0.024873 0.060155 \n",
|
||||||
"2 0.024329 0.053934 0.095806 0.209661 0.101464 0.076628 \n",
|
"2 0.122912 0.102389 0.050782 0.168774 0.015752 0.072154 \n",
|
||||||
"\n",
|
"\n",
|
||||||
" t6 t7 t8 t9 t10 \n",
|
" t6 t7 t8 t9 t10 \n",
|
||||||
" mean mean mean mean mean \n",
|
" mean mean mean mean mean \n",
|
||||||
"ranef_grouping \n",
|
"ranef_grouping \n",
|
||||||
"1 0.068736 0.11493 0.061243 0.097605 0.079349 \n",
|
"1 0.088660 0.136191 0.060451 0.134408 0.087095 \n",
|
||||||
"2 0.082048 0.15226 0.064241 0.088526 0.051103 "
|
"2 0.102839 0.115994 0.053674 0.119278 0.075450 "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 10,
|
"execution_count": 29,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -2,7 +2,7 @@
|
|||||||
"cells": [
|
"cells": [
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 22,
|
"execution_count": 9,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -13,17 +13,17 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 23,
|
"execution_count": 10,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"contributing_ranef = pd.read_csv(\"../mlm/data/0201_contributing_dweek_ranefs.csv\")\n",
|
"contributing_ranef = pd.read_csv(\"../mlm/data/0201_contributing_dweek_ranefs.csv\")\n",
|
||||||
"readme_ranef = pd.read_csv(\"../mlm/data/0201_readme_dweek_ranefs.csv\")"
|
"readme_ranef = pd.read_csv(\"../mlm/data/0203_readme_dweek_ranefs.csv\")"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 24,
|
"execution_count": 11,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -33,7 +33,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 25,
|
"execution_count": 12,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
@ -43,7 +43,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 35,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -197,7 +197,7 @@
|
|||||||
"4 0.246845 0.449997 2 658.0 "
|
"4 0.246845 0.449997 2 658.0 "
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 35,
|
"execution_count": 6,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -240,161 +240,16 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 37,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
"data": {
|
"data": {
|
||||||
"text/html": [
|
|
||||||
"<div>\n",
|
|
||||||
"<style scoped>\n",
|
|
||||||
" .dataframe tbody tr th:only-of-type {\n",
|
|
||||||
" vertical-align: middle;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe tbody tr th {\n",
|
|
||||||
" vertical-align: top;\n",
|
|
||||||
" }\n",
|
|
||||||
"\n",
|
|
||||||
" .dataframe thead th {\n",
|
|
||||||
" text-align: right;\n",
|
|
||||||
" }\n",
|
|
||||||
"</style>\n",
|
|
||||||
"<table border=\"1\" class=\"dataframe\">\n",
|
|
||||||
" <thead>\n",
|
|
||||||
" <tr style=\"text-align: right;\">\n",
|
|
||||||
" <th></th>\n",
|
|
||||||
" <th>repo_id</th>\n",
|
|
||||||
" <th>commits_filepath</th>\n",
|
|
||||||
" <th>fvf_filepath</th>\n",
|
|
||||||
" <th>effect</th>\n",
|
|
||||||
" <th>group</th>\n",
|
|
||||||
" <th>level</th>\n",
|
|
||||||
" <th>term</th>\n",
|
|
||||||
" <th>estimate</th>\n",
|
|
||||||
" <th>std.error</th>\n",
|
|
||||||
" <th>conf.low</th>\n",
|
|
||||||
" <th>conf.high</th>\n",
|
|
||||||
" <th>ranef_grouping</th>\n",
|
|
||||||
" <th>rank</th>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </thead>\n",
|
|
||||||
" <tbody>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>0</th>\n",
|
|
||||||
" <td>italiangrid_voms</td>\n",
|
|
||||||
" <td>_italiangrid_voms_commits.csv</td>\n",
|
|
||||||
" <td>italiangrid_voms_hullabaloo_README.md</td>\n",
|
|
||||||
" <td>ran_vals</td>\n",
|
|
||||||
" <td>project_id</td>\n",
|
|
||||||
" <td>italiangrid_voms</td>\n",
|
|
||||||
" <td>before_after:week_index</td>\n",
|
|
||||||
" <td>-0.014093</td>\n",
|
|
||||||
" <td>0.638314</td>\n",
|
|
||||||
" <td>-1.265166</td>\n",
|
|
||||||
" <td>1.236980</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>2294.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>1</th>\n",
|
|
||||||
" <td>ultrajson_ultrajson</td>\n",
|
|
||||||
" <td>_ultrajson_ultrajson_commits.csv</td>\n",
|
|
||||||
" <td>ultrajson_ultrajson_hullabaloo_README</td>\n",
|
|
||||||
" <td>ran_vals</td>\n",
|
|
||||||
" <td>project_id</td>\n",
|
|
||||||
" <td>ultrajson_ultrajson</td>\n",
|
|
||||||
" <td>before_after:week_index</td>\n",
|
|
||||||
" <td>0.096429</td>\n",
|
|
||||||
" <td>0.616497</td>\n",
|
|
||||||
" <td>-1.111884</td>\n",
|
|
||||||
" <td>1.304742</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>2814.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>2</th>\n",
|
|
||||||
" <td>swipely_docker-api</td>\n",
|
|
||||||
" <td>_swipely_docker-api_commits.csv</td>\n",
|
|
||||||
" <td>swipely_docker-api_hullabaloo_README.md</td>\n",
|
|
||||||
" <td>ran_vals</td>\n",
|
|
||||||
" <td>project_id</td>\n",
|
|
||||||
" <td>swipely_docker-api</td>\n",
|
|
||||||
" <td>before_after:week_index</td>\n",
|
|
||||||
" <td>0.084690</td>\n",
|
|
||||||
" <td>0.612034</td>\n",
|
|
||||||
" <td>-1.114875</td>\n",
|
|
||||||
" <td>1.284254</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>2760.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>3</th>\n",
|
|
||||||
" <td>aio-libs_aiomysql.git</td>\n",
|
|
||||||
" <td>_aio-libs_aiomysql.git_commits.csv</td>\n",
|
|
||||||
" <td>aio-libs_aiomysql.git_hullabaloo_README</td>\n",
|
|
||||||
" <td>ran_vals</td>\n",
|
|
||||||
" <td>project_id</td>\n",
|
|
||||||
" <td>aio-libs_aiomysql.git</td>\n",
|
|
||||||
" <td>before_after:week_index</td>\n",
|
|
||||||
" <td>-0.494687</td>\n",
|
|
||||||
" <td>0.731528</td>\n",
|
|
||||||
" <td>-1.928454</td>\n",
|
|
||||||
" <td>0.939081</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>118.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" <tr>\n",
|
|
||||||
" <th>4</th>\n",
|
|
||||||
" <td>shekyan_slowhttptest</td>\n",
|
|
||||||
" <td>_shekyan_slowhttptest_commits.csv</td>\n",
|
|
||||||
" <td>shekyan_slowhttptest_hullabaloo_README.md</td>\n",
|
|
||||||
" <td>ran_vals</td>\n",
|
|
||||||
" <td>project_id</td>\n",
|
|
||||||
" <td>shekyan_slowhttptest</td>\n",
|
|
||||||
" <td>before_after:week_index</td>\n",
|
|
||||||
" <td>-0.335128</td>\n",
|
|
||||||
" <td>0.704815</td>\n",
|
|
||||||
" <td>-1.716541</td>\n",
|
|
||||||
" <td>1.046284</td>\n",
|
|
||||||
" <td>1</td>\n",
|
|
||||||
" <td>1027.0</td>\n",
|
|
||||||
" </tr>\n",
|
|
||||||
" </tbody>\n",
|
|
||||||
"</table>\n",
|
|
||||||
"</div>"
|
|
||||||
],
|
|
||||||
"text/plain": [
|
"text/plain": [
|
||||||
" repo_id commits_filepath \\\n",
|
"4247"
|
||||||
"0 italiangrid_voms _italiangrid_voms_commits.csv \n",
|
|
||||||
"1 ultrajson_ultrajson _ultrajson_ultrajson_commits.csv \n",
|
|
||||||
"2 swipely_docker-api _swipely_docker-api_commits.csv \n",
|
|
||||||
"3 aio-libs_aiomysql.git _aio-libs_aiomysql.git_commits.csv \n",
|
|
||||||
"4 shekyan_slowhttptest _shekyan_slowhttptest_commits.csv \n",
|
|
||||||
"\n",
|
|
||||||
" fvf_filepath effect group \\\n",
|
|
||||||
"0 italiangrid_voms_hullabaloo_README.md ran_vals project_id \n",
|
|
||||||
"1 ultrajson_ultrajson_hullabaloo_README ran_vals project_id \n",
|
|
||||||
"2 swipely_docker-api_hullabaloo_README.md ran_vals project_id \n",
|
|
||||||
"3 aio-libs_aiomysql.git_hullabaloo_README ran_vals project_id \n",
|
|
||||||
"4 shekyan_slowhttptest_hullabaloo_README.md ran_vals project_id \n",
|
|
||||||
"\n",
|
|
||||||
" level term estimate std.error \\\n",
|
|
||||||
"0 italiangrid_voms before_after:week_index -0.014093 0.638314 \n",
|
|
||||||
"1 ultrajson_ultrajson before_after:week_index 0.096429 0.616497 \n",
|
|
||||||
"2 swipely_docker-api before_after:week_index 0.084690 0.612034 \n",
|
|
||||||
"3 aio-libs_aiomysql.git before_after:week_index -0.494687 0.731528 \n",
|
|
||||||
"4 shekyan_slowhttptest before_after:week_index -0.335128 0.704815 \n",
|
|
||||||
"\n",
|
|
||||||
" conf.low conf.high ranef_grouping rank \n",
|
|
||||||
"0 -1.265166 1.236980 1 2294.0 \n",
|
|
||||||
"1 -1.111884 1.304742 1 2814.0 \n",
|
|
||||||
"2 -1.114875 1.284254 1 2760.0 \n",
|
|
||||||
"3 -1.928454 0.939081 1 118.0 \n",
|
|
||||||
"4 -1.716541 1.046284 1 1027.0 "
|
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"execution_count": 37,
|
"execution_count": 13,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"output_type": "execute_result"
|
"output_type": "execute_result"
|
||||||
}
|
}
|
||||||
@ -402,12 +257,12 @@
|
|||||||
"source": [
|
"source": [
|
||||||
"readme_merged = pd.merge(readme_manifest, readme_ranef, on=['repo_id'], how='inner')\n",
|
"readme_merged = pd.merge(readme_manifest, readme_ranef, on=['repo_id'], how='inner')\n",
|
||||||
"readme_merged = readme_merged.drop(columns='Unnamed: 0')\n",
|
"readme_merged = readme_merged.drop(columns='Unnamed: 0')\n",
|
||||||
"readme_merged.head()"
|
"len(readme_merged)"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 38,
|
"execution_count": 14,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
|
Loading…
Reference in New Issue
Block a user