from transformers import AutoModelForCausalLM, AutoTokenizer import torch olmo = AutoModelForCausalLM.from_pretrained( "allenai/OLMo-2-1124-7B-hf", torch_dtype=torch.float32, device_map="auto" ) tokenizer = AutoTokenizer.from_pretrained("allenai/OLMo-2-1124-7B-hf") message = ["Honolulu is a "] inputs = tokenizer(message, return_tensors='pt', return_token_type_ids=False) response = olmo.generate( **inputs, max_new_tokens=128, do_sample=True, top_k=50, top_p=0.95, temperature=0.5 ) print(tokenizer.batch_decode(response, skip_special_tokens=True)[0])