adaptation-slr/models/p1-categorization.py

from transformers import AutoModelForCausalLM, AutoTokenizer, OlmoForCausalLM
import torch

#load in the different models
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
olmo = AutoModelForCausalLM.from_pretrained("allenai/OLMo-2-0425-1B-Instruct").to(device)
tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-2-0425-1B-Instruct")

#priming prompt
first_sentence = "Given the following data:"

data_prompt = "'Title - Underproduction: An Approach for Measuring Risk in Open Source Software \n Abstract - The widespread adoption of Free/Libre and Open Source Software (FLOSS) means that the ongoing maintenance of many widely used software components relies on the collaborative effort of volunteers who set their own priorities and choose their own tasks. We argue that this has created a new form of risk that we call 'underproduction' which occurs when the supply of software engineering labor becomes out of alignment with the demand of people who rely on the software produced. We present a conceptual framework for identifying relative underproduction in software as well as a statistical method for applying our framework to a comprehensive dataset from the Debian GNU/Linux distribution that includes 21,902 source packages and the full history of 461,656 bugs. We draw on this application to present two experiments: (1) a demonstration of how our technique can be used to identify at-risk software packages in a large FLOSS repository and (2) a validation of these results using an alternate indicator of package risk. Our analysis demonstrates both the utility of our approach and reveals the existence of widespread underproduction in a range of widely-installed software components in Debian.'"


third_prompt="please categorize it based on the following numbered characteristics: \n\n 1: YES/NO (Characteristic 1. This is an English language empirical study, this an academic papers written in Egnlish that studies or analyzes evidence. Literature reviews are not empirical studies.)  \n 2: YES/NO (Characteristic 2. This focuses on FOSS projects, the focus of the research work is on the domain of free and open source software projects.) \n 3: YES/NO (Characteristic 3. This studies FOSS evolution, the data focuses on longitudinal changes to free and open source projects over time.) \n 4: YES/NO (Characteristic 4. This studies FOSS adaptation, the data focuses on intentional changes made by free and open source software projects to better align themselves with their broader environment.) \n\n Only respond with the appropriate number followed by 'YES' if the characteristic is present in the provided data or 'NO' if it is not (e.g. '1: NO; 2: YES;'. Do not provide any additional information."

prompt = f"{first_sentence}\n{data_prompt}\n{third_prompt}"

inputs = tokenizer(prompt, return_tensors='pt', return_token_type_ids=False).to(device)

#deterministic sampling
response = olmo.generate(**inputs, max_new_tokens=256, do_sample=False)
response_txt = tokenizer.batch_decode(response, skip_special_tokens=True)[0]

with open('/home/nws8519/git/adaptation-slr/trial-output.txt', 'w') as file:
    file.write(response_txt)