from transformers import AutoModelForCausalLM, AutoTokenizer
from PIL import Image
import requests
import torch
model = AutoModelForCausalLM.from_pretrained(
"anananan116/TinyVLM",
trust_remote_code = True,
torch_dtype=torch.float16,
).to('cuda').eval()
tokenizer = AutoTokenizer.from_pretrained("anananan116/TinyVLM")
# `<IMGPLH>` is the image placeholder which will be replaced by image embeddings.
# the number of `<IMGPLH>` should be equal to the number of input images
prompt = "Here's an image:<IMGPLH>Describe this image."
image = Image.open(requests.get('https://github.com/anananan116/TinyVLM/blob/main/test.png?raw=true',stream=True).raw)
inputs = model.prepare_input_ids_for_generation([prompt], [image], tokenizer)
with torch.no_grad():
outputs = model.generate(
input_ids=inputs['input_ids'].to("cuda"),
attention_mask=inputs['attention_mask'].to("cuda"),
encoded_image = inputs["encoded_image"],
max_new_tokens=128,
do_sample=True
)
output_text = tokenizer.batch_decode(outputs, skip_special_tokens=True)
- Downloads last month
- 48
Inference Providers
NEW
This model isn't deployed by any Inference Provider.
๐
Ask for provider support