update app
Browse files
app.py
CHANGED
|
@@ -222,15 +222,6 @@ model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
|
|
| 222 |
torch_dtype=torch.float16
|
| 223 |
).to(device).eval()
|
| 224 |
|
| 225 |
-
# Load Nanonets-OCR2-1.5B-exp
|
| 226 |
-
MODEL_ID_Y = "nanonets/Nanonets-OCR2-1.5B-exp"
|
| 227 |
-
processor_y = AutoProcessor.from_pretrained(MODEL_ID_Y, trust_remote_code=True)
|
| 228 |
-
model_y = Qwen2VLForConditionalGeneration.from_pretrained(
|
| 229 |
-
MODEL_ID_Y,
|
| 230 |
-
trust_remote_code=True,
|
| 231 |
-
torch_dtype=torch.float16
|
| 232 |
-
).to(device).eval()
|
| 233 |
-
|
| 234 |
def downsample_video(video_path):
|
| 235 |
"""
|
| 236 |
Downsamples the video to evenly spaced frames.
|
|
@@ -279,9 +270,6 @@ def generate_image(model_name: str, text: str, image: Image.Image,
|
|
| 279 |
elif model_name == "olmOCR-7B-0725":
|
| 280 |
processor = processor_w
|
| 281 |
model = model_w
|
| 282 |
-
elif model_name == "Nanonets-OCR2-1.5B-exp":
|
| 283 |
-
processor = processor_y
|
| 284 |
-
model = model_y
|
| 285 |
else:
|
| 286 |
yield "Invalid model selected.", "Invalid model selected."
|
| 287 |
return
|
|
@@ -344,9 +332,6 @@ def generate_video(model_name: str, text: str, video_path: str,
|
|
| 344 |
elif model_name == "olmOCR-7B-0725":
|
| 345 |
processor = processor_w
|
| 346 |
model = model_w
|
| 347 |
-
elif model_name == "Nanonets-OCR2-1.5B-exp":
|
| 348 |
-
processor = processor_y
|
| 349 |
-
model = model_y
|
| 350 |
else:
|
| 351 |
yield "Invalid model selected.", "Invalid model selected."
|
| 352 |
return
|
|
@@ -439,7 +424,7 @@ with gr.Blocks(css=css, theme=thistle_theme) as demo:
|
|
| 439 |
markdown_output = gr.Markdown(label="(Result.Md)")
|
| 440 |
|
| 441 |
model_choice = gr.Radio(
|
| 442 |
-
choices=["Nanonets-OCR2-3B", "olmOCR-7B-0725", "RolmOCR-7B",
|
| 443 |
"Aya-Vision-8B", "Qwen2-VL-OCR-2B"],
|
| 444 |
label="Select Model",
|
| 445 |
value="Nanonets-OCR2-3B"
|
|
|
|
| 222 |
torch_dtype=torch.float16
|
| 223 |
).to(device).eval()
|
| 224 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 225 |
def downsample_video(video_path):
|
| 226 |
"""
|
| 227 |
Downsamples the video to evenly spaced frames.
|
|
|
|
| 270 |
elif model_name == "olmOCR-7B-0725":
|
| 271 |
processor = processor_w
|
| 272 |
model = model_w
|
|
|
|
|
|
|
|
|
|
| 273 |
else:
|
| 274 |
yield "Invalid model selected.", "Invalid model selected."
|
| 275 |
return
|
|
|
|
| 332 |
elif model_name == "olmOCR-7B-0725":
|
| 333 |
processor = processor_w
|
| 334 |
model = model_w
|
|
|
|
|
|
|
|
|
|
| 335 |
else:
|
| 336 |
yield "Invalid model selected.", "Invalid model selected."
|
| 337 |
return
|
|
|
|
| 424 |
markdown_output = gr.Markdown(label="(Result.Md)")
|
| 425 |
|
| 426 |
model_choice = gr.Radio(
|
| 427 |
+
choices=["Nanonets-OCR2-3B", "olmOCR-7B-0725", "RolmOCR-7B",
|
| 428 |
"Aya-Vision-8B", "Qwen2-VL-OCR-2B"],
|
| 429 |
label="Select Model",
|
| 430 |
value="Nanonets-OCR2-3B"
|