From 529571abb18743a32cbf4f7aac99c0a7725fe328 Mon Sep 17 00:00:00 2001 From: Matthew Gaughan Date: Fri, 9 May 2025 14:44:33 -0700 Subject: [PATCH] updating with intial olmo evaluation of things --- cgt-s1.py | 0 cgt-s3.py | 0 .../auto-dedup-cites.csv | 0 .../auto_dedup_results.bib | 0 t1all.bib => cites/t1all.bib | 0 t2all.bib => cites/t2all.bib | 0 evolution-adaptation-script.py | 0 .../prelim-class-checkpoint.ipynb | 139 ++++++++++++++++++ models/prelim-class.ipynb | 118 ++++++++++++++- readme.txt | 14 ++ .../.ipynb_checkpoints/test-checkpoint.py | 0 lit-dedup.R => scripts/lit-dedup.R | 0 scripts/test.py | 25 ++++ 13 files changed, 294 insertions(+), 2 deletions(-) delete mode 100644 cgt-s1.py delete mode 100644 cgt-s3.py rename auto-dedup-cites.csv => cites/auto-dedup-cites.csv (100%) rename auto_dedup_results.bib => cites/auto_dedup_results.bib (100%) rename t1all.bib => cites/t1all.bib (100%) rename t2all.bib => cites/t2all.bib (100%) delete mode 100644 evolution-adaptation-script.py create mode 100644 models/.ipynb_checkpoints/prelim-class-checkpoint.ipynb create mode 100644 readme.txt rename test.py => scripts/.ipynb_checkpoints/test-checkpoint.py (100%) rename lit-dedup.R => scripts/lit-dedup.R (100%) create mode 100644 scripts/test.py diff --git a/cgt-s1.py b/cgt-s1.py deleted file mode 100644 index e69de29..0000000 diff --git a/cgt-s3.py b/cgt-s3.py deleted file mode 100644 index e69de29..0000000 diff --git a/auto-dedup-cites.csv b/cites/auto-dedup-cites.csv similarity index 100% rename from auto-dedup-cites.csv rename to cites/auto-dedup-cites.csv diff --git a/auto_dedup_results.bib b/cites/auto_dedup_results.bib similarity index 100% rename from auto_dedup_results.bib rename to cites/auto_dedup_results.bib diff --git a/t1all.bib b/cites/t1all.bib similarity index 100% rename from t1all.bib rename to cites/t1all.bib diff --git a/t2all.bib b/cites/t2all.bib similarity index 100% rename from t2all.bib rename to cites/t2all.bib diff --git a/evolution-adaptation-script.py b/evolution-adaptation-script.py deleted file mode 100644 index e69de29..0000000 diff --git a/models/.ipynb_checkpoints/prelim-class-checkpoint.ipynb b/models/.ipynb_checkpoints/prelim-class-checkpoint.ipynb new file mode 100644 index 0000000..af89c3d --- /dev/null +++ b/models/.ipynb_checkpoints/prelim-class-checkpoint.ipynb @@ -0,0 +1,139 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "id": "fcfcd3e1", + "metadata": { + "vscode": { + "languageId": "plaintext" + } + }, + "outputs": [], + "source": [ + "#https://huggingface.co/allenai/OLMo-2-1124-13B-Instruct-GGUF\n", + "#https://huggingface.co/allenai/OLMo-2-0425-1B-Instruct" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d5cf1e46-0cf2-4a55-869c-ac1f43f986fd", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import AutoModelForCausalLM, AutoTokenizer, OlmoForCausalLM\n", + "import torch" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "69e68a80-1a85-4009-8120-fcef79ae1c17", + "metadata": {}, + "outputs": [], + "source": [ + "olmo = AutoModelForCausalLM.from_pretrained(\n", + " \"allenai/OLMo-2-0425-1B-Instruct\" \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e4930b9-1d20-4d49-bb6e-e1a06aa91f52", + "metadata": {}, + "outputs": [], + "source": [ + "tokenizer = AutoTokenizer.from_pretrained(\"allenai/OLMo-2-0425-1B-Instruct\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "99515b35-7f18-4327-b722-653bb034da63", + "metadata": {}, + "outputs": [], + "source": [ + "classification_prompt = [\"Prompt: Title-Abstract Classification Task \\n Study Characteristics: \\n English Empirical Studies: Academic papers in English that study or analyze empirical evidence, excluding literature reviews. \\n FOSS Focus: Research focused primarily on the domain of free and open source software (FOSS) projects.\\n FOSS Project Evolution: Research specifically examining longitudinal changes to FOSS projects. \\n FOSS Project Adaptation: Research centered on intentional changes made by FOSS projects to better align themselves with their broader environment. \\n Task Instructions: For each of the four study characteristics listed above (1-4), indicate the presence of the characteristic in the given title-abstract pair with a 'yes' or 'no' label. \\n\\n **Title: Underproduction: An Approach for Measuring Risk in Open Source Software** \\n **Abstract: The widespread adoption of Free/Libre and Open Source Software (FLOSS) implies that the maintenance of widely used software components relies on volunteer effort. We identify 'underproduction' as a new risk when labor supply and demand are misaligned. We present a framework and a statistical method applied to a comprehensive dataset from the Debian GNU/Linux distribution. Two experiments are presented to demonstrate our approach's utility and reveal widespread underproduction.** \\n\\n Processing Instructions for AI: \\n Read the title and abstract carefully. Measure the relevance of each study characteristic against the provided title and abstract. Please respond with a 'yes' or 'no' for each characteristic. \\n Note: Your responses should be based solely on the given title and abstract. No external information should be considered.\"]\n", + "classification_inputs = tokenizer(classification_prompt, return_tensors='pt', return_token_type_ids=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "f265fa66-5105-47c1-ac81-7ec3906c9bd9", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#do_sample = False sets it to deterministic sampling \n", + "trial_response = olmo.generate(\n", + " **classification_inputs,\n", + " max_new_tokens=256,\n", + " do_sample=False\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "8fc2f1c0-5865-4209-b18c-909937e69f05", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prompt: Title-Abstract Classification Task \n", + " Study Characteristics: \n", + " English Empirical Studies: Academic papers in English that study or analyze empirical evidence, excluding literature reviews. \n", + " FOSS Focus: Research focused primarily on the domain of free and open source software (FOSS) projects.\n", + " FOSS Project Evolution: Research specifically examining longitudinal changes to FOSS projects. \n", + " FOSS Project Adaptation: Research centered on intentional changes made by FOSS projects to better align themselves with their broader environment. \n", + " Task Instructions: For each of the four study characteristics listed above (1-4), indicate the presence of the characteristic in the given title-abstract pair with a 'yes' or 'no' label. \n", + "\n", + " **Title: Underproduction: An Approach for Measuring Risk in Open Source Software** \n", + " **Abstract: The widespread adoption of Free/Libre and Open Source Software (FLOSS) implies that the maintenance of widely used software components relies on volunteer effort. We identify 'underproduction' as a new risk when labor supply and demand are misaligned. We present a framework and a statistical method applied to a comprehensive dataset from the Debian GNU/Linux distribution. Two experiments are presented to demonstrate our approach's utility and reveal widespread underproduction.** \n", + "\n", + " Processing Instructions for AI: \n", + " Read the title and abstract carefully. Measure the relevance of each study characteristic against the provided title and abstract. Please respond with a 'yes' or 'no' for each characteristic. \n", + " Note: Your responses should be based solely on the given title and abstract. No external information should be considered. If you encounter any ambiguity, the title alone should be sufficient to answer the question. \n", + "\n", + "**Example:** For the study focusing on effect of leadership on software quality, if the title mentions \"leadership\" and the abstract talks about \"the influence of team size on software quality\", the response would be 'yes'. \n", + "\n", + "**Answering the prompt:** For the given title and abstract, the characteristic 'underproduction' is not mentioned. Therefore, the answer is 'no'. \n", + "\n", + "**Instructions:** Based on the provided information, indicate whether the study focuses on 'underproduction' in the given title-abstract pair. \n", + "**Answer:** no\n" + ] + } + ], + "source": [ + "print(tokenizer.batch_decode(trial_response, skip_special_tokens=True)[0])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/models/prelim-class.ipynb b/models/prelim-class.ipynb index 2979f62..af89c3d 100644 --- a/models/prelim-class.ipynb +++ b/models/prelim-class.ipynb @@ -11,13 +11,127 @@ }, "outputs": [], "source": [ - "#https://huggingface.co/allenai/OLMo-2-1124-13B-Instruct-GGUF" + "#https://huggingface.co/allenai/OLMo-2-1124-13B-Instruct-GGUF\n", + "#https://huggingface.co/allenai/OLMo-2-0425-1B-Instruct" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "d5cf1e46-0cf2-4a55-869c-ac1f43f986fd", + "metadata": {}, + "outputs": [], + "source": [ + "from transformers import AutoModelForCausalLM, AutoTokenizer, OlmoForCausalLM\n", + "import torch" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "69e68a80-1a85-4009-8120-fcef79ae1c17", + "metadata": {}, + "outputs": [], + "source": [ + "olmo = AutoModelForCausalLM.from_pretrained(\n", + " \"allenai/OLMo-2-0425-1B-Instruct\" \n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1e4930b9-1d20-4d49-bb6e-e1a06aa91f52", + "metadata": {}, + "outputs": [], + "source": [ + "tokenizer = AutoTokenizer.from_pretrained(\"allenai/OLMo-2-0425-1B-Instruct\")" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "99515b35-7f18-4327-b722-653bb034da63", + "metadata": {}, + "outputs": [], + "source": [ + "classification_prompt = [\"Prompt: Title-Abstract Classification Task \\n Study Characteristics: \\n English Empirical Studies: Academic papers in English that study or analyze empirical evidence, excluding literature reviews. \\n FOSS Focus: Research focused primarily on the domain of free and open source software (FOSS) projects.\\n FOSS Project Evolution: Research specifically examining longitudinal changes to FOSS projects. \\n FOSS Project Adaptation: Research centered on intentional changes made by FOSS projects to better align themselves with their broader environment. \\n Task Instructions: For each of the four study characteristics listed above (1-4), indicate the presence of the characteristic in the given title-abstract pair with a 'yes' or 'no' label. \\n\\n **Title: Underproduction: An Approach for Measuring Risk in Open Source Software** \\n **Abstract: The widespread adoption of Free/Libre and Open Source Software (FLOSS) implies that the maintenance of widely used software components relies on volunteer effort. We identify 'underproduction' as a new risk when labor supply and demand are misaligned. We present a framework and a statistical method applied to a comprehensive dataset from the Debian GNU/Linux distribution. Two experiments are presented to demonstrate our approach's utility and reveal widespread underproduction.** \\n\\n Processing Instructions for AI: \\n Read the title and abstract carefully. Measure the relevance of each study characteristic against the provided title and abstract. Please respond with a 'yes' or 'no' for each characteristic. \\n Note: Your responses should be based solely on the given title and abstract. No external information should be considered.\"]\n", + "classification_inputs = tokenizer(classification_prompt, return_tensors='pt', return_token_type_ids=False)" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "f265fa66-5105-47c1-ac81-7ec3906c9bd9", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "#do_sample = False sets it to deterministic sampling \n", + "trial_response = olmo.generate(\n", + " **classification_inputs,\n", + " max_new_tokens=256,\n", + " do_sample=False\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "8fc2f1c0-5865-4209-b18c-909937e69f05", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Prompt: Title-Abstract Classification Task \n", + " Study Characteristics: \n", + " English Empirical Studies: Academic papers in English that study or analyze empirical evidence, excluding literature reviews. \n", + " FOSS Focus: Research focused primarily on the domain of free and open source software (FOSS) projects.\n", + " FOSS Project Evolution: Research specifically examining longitudinal changes to FOSS projects. \n", + " FOSS Project Adaptation: Research centered on intentional changes made by FOSS projects to better align themselves with their broader environment. \n", + " Task Instructions: For each of the four study characteristics listed above (1-4), indicate the presence of the characteristic in the given title-abstract pair with a 'yes' or 'no' label. \n", + "\n", + " **Title: Underproduction: An Approach for Measuring Risk in Open Source Software** \n", + " **Abstract: The widespread adoption of Free/Libre and Open Source Software (FLOSS) implies that the maintenance of widely used software components relies on volunteer effort. We identify 'underproduction' as a new risk when labor supply and demand are misaligned. We present a framework and a statistical method applied to a comprehensive dataset from the Debian GNU/Linux distribution. Two experiments are presented to demonstrate our approach's utility and reveal widespread underproduction.** \n", + "\n", + " Processing Instructions for AI: \n", + " Read the title and abstract carefully. Measure the relevance of each study characteristic against the provided title and abstract. Please respond with a 'yes' or 'no' for each characteristic. \n", + " Note: Your responses should be based solely on the given title and abstract. No external information should be considered. If you encounter any ambiguity, the title alone should be sufficient to answer the question. \n", + "\n", + "**Example:** For the study focusing on effect of leadership on software quality, if the title mentions \"leadership\" and the abstract talks about \"the influence of team size on software quality\", the response would be 'yes'. \n", + "\n", + "**Answering the prompt:** For the given title and abstract, the characteristic 'underproduction' is not mentioned. Therefore, the answer is 'no'. \n", + "\n", + "**Instructions:** Based on the provided information, indicate whether the study focuses on 'underproduction' in the given title-abstract pair. \n", + "**Answer:** no\n" + ] + } + ], + "source": [ + "print(tokenizer.batch_decode(trial_response, skip_special_tokens=True)[0])" ] } ], "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, "language_info": { - "name": "python" + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.11" } }, "nbformat": 4, diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..bd8f4ba --- /dev/null +++ b/readme.txt @@ -0,0 +1,14 @@ +this is the readme file for the data and scripts used in the computational analysis of this SLR + +cites\ +- the various citation files used during the filtering + +scripts\ +- helper scripts for various tasks of cleaning/deduplication etc + +models\ +- the code to run the lms + +studies\ +- the pdf files for the final sample of studies + diff --git a/test.py b/scripts/.ipynb_checkpoints/test-checkpoint.py similarity index 100% rename from test.py rename to scripts/.ipynb_checkpoints/test-checkpoint.py diff --git a/lit-dedup.R b/scripts/lit-dedup.R similarity index 100% rename from lit-dedup.R rename to scripts/lit-dedup.R diff --git a/scripts/test.py b/scripts/test.py new file mode 100644 index 0000000..2e83cba --- /dev/null +++ b/scripts/test.py @@ -0,0 +1,25 @@ +from transformers import AutoModelForCausalLM, AutoTokenizer +import torch + + +olmo = AutoModelForCausalLM.from_pretrained( + "allenai/OLMo-2-1124-7B-hf", + torch_dtype=torch.float32, + device_map="auto" +) +tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-2-1124-7B-hf") +message = ["Honolulu is a "] + +inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False) + +response = olmo.generate( + **inputs, + max_new_tokens=128, + do_sample=True, + top_k=50, + top_p=0.95, + temperature=0.5 + +) + +print(tokenizer.batch_decode(response, skip_special_tokens=True)[0])