| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import sys | |
| import os | |
| model_id = os.getcwd() | |
| if len(sys.argv) > 1: | |
| model_id = sys.argv[1] | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id).cuda().bfloat16() | |
| prompt = "Lily picked up a flower." | |
| inputs = tokenizer(prompt, return_tensors="pt", return_token_type_ids=False).to('cuda') | |
| out = model.generate(**inputs, max_new_tokens=80).ravel() | |
| out = tokenizer.decode(out) | |
| print(out) | |