Spaces:
Running
on
Zero
Running
on
Zero
File size: 1,972 Bytes
ced4fcf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
OCR text line-level detection example using Rex Omni (polygon format)
"""
import matplotlib.pyplot as plt
import torch
from PIL import Image
from rex_omni import RexOmniVisualize, RexOmniWrapper
def main():
# Model path - replace with your actual model path
model_path = "/comp_robot/jiangqing/projects/2023/research/R1/QwenSFTOfficial/open_source/IDEA-Research/Rex-Omni"
print("π Initializing Rex Omni model...")
# Create wrapper with custom parameters
rex_model = RexOmniWrapper(
model_path=model_path,
backend="transformers", # Choose "transformers" or "vllm"
max_tokens=2048,
temperature=0.0,
top_p=0.05,
top_k=1,
repetition_penalty=1.05,
)
# Load image
image_path = (
"tutorials/ocr_example/test_images/ocr.png" # Replace with your image path
)
image = Image.open(image_path).convert("RGB")
print(f"β
Image loaded successfully!")
print(f"π Image size: {image.size}")
# OCR text line-level detection in polygon format
categories = ["text line"]
print("π Performing text line-level OCR detection (polygon format)...")
results = rex_model.inference(
images=image, task="ocr_polygon", categories=categories
)
# Process results
result = results[0]
if result["success"]:
predictions = result["extracted_predictions"]
vis_image = RexOmniVisualize(
image=image,
predictions=predictions,
font_size=15,
draw_width=5,
show_labels=True,
)
# Save visualization
output_path = "tutorials/ocr_example/test_images/ocr_polygon_visualize.jpg"
vis_image.save(output_path)
print(f"β
Polygon OCR visualization saved to: {output_path}")
else:
print(f"β Inference failed: {result['error']}")
if __name__ == "__main__":
main()
|