HW05_03 / app.py
Pichayada's picture
Create app.py
f4d48ca verified
raw
history blame
901 Bytes
def generate_caption(image):
if not isinstance(image, Image.Image):
image = Image.fromarray(image)
task = "<CAPTION>" # เปลี่ยน task เป็น caption ธรรมดา
inputs = florence_processor(text=task, images=image, return_tensors="pt").to(device)
generated_ids = florence_model.generate(
input_ids=inputs["input_ids"],
pixel_values=inputs["pixel_values"],
max_new_tokens=1024,
early_stopping=False,
do_sample=False,
num_beams=3,
)
generated_text = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0]
parsed_answer = florence_processor.post_process_generation(
generated_text,
task=task,
image_size=(image.width, image.height)
)
prompt = parsed_answer[task]
print("\n\nGeneration completed!:" + prompt)
return prompt