Spaces:

Hammedalmodel
/

handwritten_to_text

Runtime error

App Files Files Community

Hammedalmodel commited on Jan 20

Commit

9f8dfc0

verified ·

1 Parent(s): 3387487

Update app.py

Browse files

Files changed (1) hide show

app.py +51 -49

app.py CHANGED Viewed

@@ -1,8 +1,12 @@
 from transformers import MllamaForConditionalGeneration, AutoProcessor
 from PIL import Image
 import torch
-import gradio as gr
-import spaces
 # Initialize model and processor
 ckpt = "unsloth/Llama-3.2-11B-Vision-Instruct"
@@ -12,52 +16,50 @@ model = MllamaForConditionalGeneration.from_pretrained(
 ).to("cuda")
 processor = AutoProcessor.from_pretrained(ckpt)
-@spaces.GPU
-def extract_text(image):
-    # Convert image to RGB
-    image = Image.open(image).convert("RGB")
-    # Create message structure
-    messages = [
-        {
-            "role": "user",
-            "content": [
-                {"type": "text", "text": "Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output"},
-                {"type": "image"}
-            ]
-        }
-    ]
-    # Process input
-    texts = processor.apply_chat_template(messages, add_generation_prompt=True)
-    inputs = processor(text=texts, images=[image], return_tensors="pt").to("cuda")
-    # Generate output
-    outputs = model.generate(**inputs, max_new_tokens=250)
-    result = processor.decode(outputs[0], skip_special_tokens=True)
-    print(result)
-    # Clean up the output to remove the prompt and assistant text
-    if "assistant" in result.lower():
-        result = result[result.lower().find("assistant") + len("assistant"):].strip()
-    # Remove any remaining conversation markers
-    result = result.replace("user", "").replace("Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output", "").strip()
-    print(result)
-    return result
-# Create Gradio interface
-demo = gr.Interface(
-    fn=extract_text,
-    inputs=gr.Image(type="filepath", label="Upload Image"),
-    outputs=gr.Textbox(label="Extracted Text"),
-    title="Handwritten Text Extractor",
-    description="Upload an image containing handwritten text to extract its content.",
-)
-# Launch the app
-demo.launch(debug=True)

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
 from transformers import MllamaForConditionalGeneration, AutoProcessor
 from PIL import Image
 import torch
+import requests
+from io import BytesIO
+app = FastAPI()
 # Initialize model and processor
 ckpt = "unsloth/Llama-3.2-11B-Vision-Instruct"
 ).to("cuda")
 processor = AutoProcessor.from_pretrained(ckpt)
+class ImageRequest(BaseModel):
+    image_path: str
+@app.post("/extract_text")
+async def extract_text(request: ImageRequest):
+    try:
+        # Download image from URL
+        response = requests.get(request.image_path)
+        if response.status_code != 200:
+            raise HTTPException(status_code=400, detail="Failed to fetch image from URL")
+        # Open image from bytes
+        image = Image.open(BytesIO(response.content)).convert("RGB")
+        # Create message structure
+        messages = [
+            {
+                "role": "user",
+                "content": [
+                    {"type": "text", "text": "Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output"},
+                    {"type": "image"}
+                ]
+            }
+        ]
+        # Process input
+        texts = processor.apply_chat_template(messages, add_generation_prompt=True)
+        inputs = processor(text=texts, images=[image], return_tensors="pt").to("cuda")
+        # Generate output
+        outputs = model.generate(**inputs, max_new_tokens=250)
+        result = processor.decode(outputs[0], skip_special_tokens=True)
+        # Clean up the output
+        if "assistant" in result.lower():
+            result = result[result.lower().find("assistant") + len("assistant"):].strip()
+        result = result.replace("user", "").replace("Extract handwritten text from the image and output only the extracted text without any additional description or commentary in output", "").strip()
+        return {"text": f"\n{result}\n"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)