{ "cells": [ { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import os\n", "import shutil" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "contributing_ranef = pd.read_csv(\"../mlm/data/0201_contributing_dweek_ranefs.csv\")\n", "readme_ranef = pd.read_csv(\"../mlm/data/0203_readme_dweek_ranefs.csv\")" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "contributing_ranef['repo_id'] = contributing_ranef['level']\n", "readme_ranef['repo_id'] = readme_ranef['level']" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "contributing_manifest = pd.read_csv(\"../all_fvf_CONTRIBUTING_manifest-link.csv\")\n", "readme_manifest = pd.read_csv(\"../all_fvf_README_manifest-link.csv\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
repo_idcommits_filepathfvf_filepatheffectgroupleveltermestimatestd.errorconf.lowconf.highranef_groupingrank
0aio-libs_aiomysql.git_aio-libs_aiomysql.git_commits.csvaio-libs_aiomysql.git_hullabaloo_CONTRIBUTING.rstran_valsproject_idaio-libs_aiomysql.gitbefore_after:week_index-0.1424760.116488-0.3707890.0858381222.0
1qTox_qTox_qTox_qTox_commits.csvqTox_qTox_hullabaloo_CONTRIBUTING.mdran_valsproject_idqTox_qToxbefore_after:week_index0.3856890.0484330.2907630.4806152684.0
2gohugoio_hugo.git_gohugoio_hugo.git_commits.csvgohugoio_hugo.git_hullabaloo_CONTRIBUTING.mdran_valsproject_idgohugoio_hugo.gitbefore_after:week_index0.3345180.0531720.2303020.4387332647.0
3ycm-core_ycmd_ycm-core_ycmd_commits.csvycm-core_ycmd_hullabaloo_CONTRIBUTING.mdran_valsproject_idycm-core_ycmdbefore_after:week_index-0.0006050.094662-0.1861390.1849281328.0
4aio-libs_aiohttp.git_aio-libs_aiohttp.git_commits.csvaio-libs_aiohttp.git_hullabaloo_CONTRIBUTING.rstran_valsproject_idaio-libs_aiohttp.gitbefore_after:week_index0.3484210.0518260.2468450.4499972658.0
\n", "
" ], "text/plain": [ " repo_id commits_filepath \\\n", "0 aio-libs_aiomysql.git _aio-libs_aiomysql.git_commits.csv \n", "1 qTox_qTox _qTox_qTox_commits.csv \n", "2 gohugoio_hugo.git _gohugoio_hugo.git_commits.csv \n", "3 ycm-core_ycmd _ycm-core_ycmd_commits.csv \n", "4 aio-libs_aiohttp.git _aio-libs_aiohttp.git_commits.csv \n", "\n", " fvf_filepath effect group \\\n", "0 aio-libs_aiomysql.git_hullabaloo_CONTRIBUTING.rst ran_vals project_id \n", "1 qTox_qTox_hullabaloo_CONTRIBUTING.md ran_vals project_id \n", "2 gohugoio_hugo.git_hullabaloo_CONTRIBUTING.md ran_vals project_id \n", "3 ycm-core_ycmd_hullabaloo_CONTRIBUTING.md ran_vals project_id \n", "4 aio-libs_aiohttp.git_hullabaloo_CONTRIBUTING.rst ran_vals project_id \n", "\n", " level term estimate std.error \\\n", "0 aio-libs_aiomysql.git before_after:week_index -0.142476 0.116488 \n", "1 qTox_qTox before_after:week_index 0.385689 0.048433 \n", "2 gohugoio_hugo.git before_after:week_index 0.334518 0.053172 \n", "3 ycm-core_ycmd before_after:week_index -0.000605 0.094662 \n", "4 aio-libs_aiohttp.git before_after:week_index 0.348421 0.051826 \n", "\n", " conf.low conf.high ranef_grouping rank \n", "0 -0.370789 0.085838 1 222.0 \n", "1 0.290763 0.480615 2 684.0 \n", "2 0.230302 0.438733 2 647.0 \n", "3 -0.186139 0.184928 1 328.0 \n", "4 0.246845 0.449997 2 658.0 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "contributing_merged = pd.merge(contributing_manifest, contributing_ranef, on=['repo_id'], how='inner')\n", "contributing_merged = contributing_merged.drop(columns='Unnamed: 0')\n", "contributing_merged.head()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "contributing_merged.to_csv('0203_contributing_merged_manifest.csv', index=False)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Empty DataFrame\n", "Columns: [repo_id, commits_filepath, fvf_filepath]\n", "Index: []\n" ] } ], "source": [ "#duplicates = readme_manifest[readme_manifest.duplicated(subset=['repo_id'], keep=False)]\n", "#len(duplicates)\n", "#print(duplicates)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4247" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "readme_merged = pd.merge(readme_manifest, readme_ranef, on=['repo_id'], how='inner')\n", "readme_merged = readme_merged.drop(columns='Unnamed: 0')\n", "len(readme_merged)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "readme_merged.to_csv('0203_readme_merged_manifest.csv', index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }