{ "cells": [ { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "import shutil" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "contributing_ranef = pd.read_csv(\"../mlm/data/0201_contributing_dweek_ranefs.csv\")\n", "readme_ranef = pd.read_csv(\"../mlm/data/0201_readme_dweek_ranefs.csv\")" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "contributing_ranef['repo_id'] = contributing_ranef['level']\n", "readme_ranef['repo_id'] = readme_ranef['level']" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "contributing_manifest = pd.read_csv(\"../all_fvf_CONTRIBUTING_manifest-link.csv\")\n", "readme_manifest = pd.read_csv(\"../all_fvf_README_manifest-link.csv\")" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
repo_idcommits_filepathfvf_filepatheffectgroupleveltermestimatestd.errorconf.lowconf.highranef_groupingrank
0aio-libs_aiomysql.git_aio-libs_aiomysql.git_commits.csvaio-libs_aiomysql.git_hullabaloo_CONTRIBUTING.rstran_valsproject_idaio-libs_aiomysql.gitbefore_after:week_index-0.1424760.116488-0.3707890.0858381222.0
1qTox_qTox_qTox_qTox_commits.csvqTox_qTox_hullabaloo_CONTRIBUTING.mdran_valsproject_idqTox_qToxbefore_after:week_index0.3856890.0484330.2907630.4806152684.0
2gohugoio_hugo.git_gohugoio_hugo.git_commits.csvgohugoio_hugo.git_hullabaloo_CONTRIBUTING.mdran_valsproject_idgohugoio_hugo.gitbefore_after:week_index0.3345180.0531720.2303020.4387332647.0
3ycm-core_ycmd_ycm-core_ycmd_commits.csvycm-core_ycmd_hullabaloo_CONTRIBUTING.mdran_valsproject_idycm-core_ycmdbefore_after:week_index-0.0006050.094662-0.1861390.1849281328.0
4aio-libs_aiohttp.git_aio-libs_aiohttp.git_commits.csvaio-libs_aiohttp.git_hullabaloo_CONTRIBUTING.rstran_valsproject_idaio-libs_aiohttp.gitbefore_after:week_index0.3484210.0518260.2468450.4499972658.0
\n", "
" ], "text/plain": [ " repo_id commits_filepath \\\n", "0 aio-libs_aiomysql.git _aio-libs_aiomysql.git_commits.csv \n", "1 qTox_qTox _qTox_qTox_commits.csv \n", "2 gohugoio_hugo.git _gohugoio_hugo.git_commits.csv \n", "3 ycm-core_ycmd _ycm-core_ycmd_commits.csv \n", "4 aio-libs_aiohttp.git _aio-libs_aiohttp.git_commits.csv \n", "\n", " fvf_filepath effect group \\\n", "0 aio-libs_aiomysql.git_hullabaloo_CONTRIBUTING.rst ran_vals project_id \n", "1 qTox_qTox_hullabaloo_CONTRIBUTING.md ran_vals project_id \n", "2 gohugoio_hugo.git_hullabaloo_CONTRIBUTING.md ran_vals project_id \n", "3 ycm-core_ycmd_hullabaloo_CONTRIBUTING.md ran_vals project_id \n", "4 aio-libs_aiohttp.git_hullabaloo_CONTRIBUTING.rst ran_vals project_id \n", "\n", " level term estimate std.error \\\n", "0 aio-libs_aiomysql.git before_after:week_index -0.142476 0.116488 \n", "1 qTox_qTox before_after:week_index 0.385689 0.048433 \n", "2 gohugoio_hugo.git before_after:week_index 0.334518 0.053172 \n", "3 ycm-core_ycmd before_after:week_index -0.000605 0.094662 \n", "4 aio-libs_aiohttp.git before_after:week_index 0.348421 0.051826 \n", "\n", " conf.low conf.high ranef_grouping rank \n", "0 -0.370789 0.085838 1 222.0 \n", "1 0.290763 0.480615 2 684.0 \n", "2 0.230302 0.438733 2 647.0 \n", "3 -0.186139 0.184928 1 328.0 \n", "4 0.246845 0.449997 2 658.0 " ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "contributing_merged = pd.merge(contributing_manifest, contributing_ranef, on=['repo_id'], how='inner')\n", "contributing_merged = contributing_merged.drop(columns='Unnamed: 0')\n", "contributing_merged.head()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "contributing_merged.to_csv('0203_contributing_merged_manifest.csv', index=False)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Empty DataFrame\n", "Columns: [repo_id, commits_filepath, fvf_filepath]\n", "Index: []\n" ] } ], "source": [ "#duplicates = readme_manifest[readme_manifest.duplicated(subset=['repo_id'], keep=False)]\n", "#len(duplicates)\n", "#print(duplicates)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
repo_idcommits_filepathfvf_filepatheffectgroupleveltermestimatestd.errorconf.lowconf.highranef_groupingrank
0italiangrid_voms_italiangrid_voms_commits.csvitaliangrid_voms_hullabaloo_README.mdran_valsproject_iditaliangrid_vomsbefore_after:week_index-0.0140930.638314-1.2651661.23698012294.0
1ultrajson_ultrajson_ultrajson_ultrajson_commits.csvultrajson_ultrajson_hullabaloo_READMEran_valsproject_idultrajson_ultrajsonbefore_after:week_index0.0964290.616497-1.1118841.30474212814.0
2swipely_docker-api_swipely_docker-api_commits.csvswipely_docker-api_hullabaloo_README.mdran_valsproject_idswipely_docker-apibefore_after:week_index0.0846900.612034-1.1148751.28425412760.0
3aio-libs_aiomysql.git_aio-libs_aiomysql.git_commits.csvaio-libs_aiomysql.git_hullabaloo_READMEran_valsproject_idaio-libs_aiomysql.gitbefore_after:week_index-0.4946870.731528-1.9284540.9390811118.0
4shekyan_slowhttptest_shekyan_slowhttptest_commits.csvshekyan_slowhttptest_hullabaloo_README.mdran_valsproject_idshekyan_slowhttptestbefore_after:week_index-0.3351280.704815-1.7165411.04628411027.0
\n", "
" ], "text/plain": [ " repo_id commits_filepath \\\n", "0 italiangrid_voms _italiangrid_voms_commits.csv \n", "1 ultrajson_ultrajson _ultrajson_ultrajson_commits.csv \n", "2 swipely_docker-api _swipely_docker-api_commits.csv \n", "3 aio-libs_aiomysql.git _aio-libs_aiomysql.git_commits.csv \n", "4 shekyan_slowhttptest _shekyan_slowhttptest_commits.csv \n", "\n", " fvf_filepath effect group \\\n", "0 italiangrid_voms_hullabaloo_README.md ran_vals project_id \n", "1 ultrajson_ultrajson_hullabaloo_README ran_vals project_id \n", "2 swipely_docker-api_hullabaloo_README.md ran_vals project_id \n", "3 aio-libs_aiomysql.git_hullabaloo_README ran_vals project_id \n", "4 shekyan_slowhttptest_hullabaloo_README.md ran_vals project_id \n", "\n", " level term estimate std.error \\\n", "0 italiangrid_voms before_after:week_index -0.014093 0.638314 \n", "1 ultrajson_ultrajson before_after:week_index 0.096429 0.616497 \n", "2 swipely_docker-api before_after:week_index 0.084690 0.612034 \n", "3 aio-libs_aiomysql.git before_after:week_index -0.494687 0.731528 \n", "4 shekyan_slowhttptest before_after:week_index -0.335128 0.704815 \n", "\n", " conf.low conf.high ranef_grouping rank \n", "0 -1.265166 1.236980 1 2294.0 \n", "1 -1.111884 1.304742 1 2814.0 \n", "2 -1.114875 1.284254 1 2760.0 \n", "3 -1.928454 0.939081 1 118.0 \n", "4 -1.716541 1.046284 1 1027.0 " ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "readme_merged = pd.merge(readme_manifest, readme_ranef, on=['repo_id'], how='inner')\n", "readme_merged = readme_merged.drop(columns='Unnamed: 0')\n", "readme_merged.head()" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "readme_merged.to_csv('0203_readme_merged_manifest.csv', index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }