Rex-Omni / tutorials /pointing_example /gui_pointing_example.py
Mountchicken's picture
Upload 35 files
ced4fcf verified
raw
history blame
1.85 kB
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
GUI pointing example using Rex Omni
"""
import matplotlib.pyplot as plt
import torch
from PIL import Image
from rex_omni import RexOmniVisualize, RexOmniWrapper
def main():
# Model path - replace with your actual model path
model_path = "IDEA-Research/Rex-Omni"
print("πŸš€ Initializing Rex Omni model...")
# Create wrapper with custom parameters
rex_model = RexOmniWrapper(
model_path=model_path,
backend="transformers", # Choose "transformers" or "vllm"
max_tokens=2048,
temperature=0.0,
top_p=0.05,
top_k=1,
repetition_penalty=1.05,
)
# Load image
image_path = "tutorials/detection_example/test_images/gui.png" # Replace with your image path
image = Image.open(image_path).convert("RGB")
print(f"βœ… Image loaded successfully!")
print(f"πŸ“ Image size: {image.size}")
# GUI pointing - find specific UI element
categories = ["element 'pause current song'"]
print("πŸ–±οΈ Performing GUI pointing...")
results = rex_model.inference(images=image, task="pointing", categories=categories)
# Process results
result = results[0]
if result["success"]:
predictions = result["extracted_predictions"]
vis_image = RexOmniVisualize(
image=image,
predictions=predictions,
font_size=50,
draw_width=15,
show_labels=True,
)
# Save visualization
output_path = (
"tutorials/pointing_example/test_images/gui_pointing_visualize.jpg"
)
vis_image.save(output_path)
print(f"βœ… GUI pointing visualization saved to: {output_path}")
else:
print(f"❌ Inference failed: {result['error']}")
if __name__ == "__main__":
main()