CaptchaOCR / app.py
mohakkapoor4
Remove fixed height constraint for CAPTCHA image display in UI.
99f9a39
import os
import random
import gradio as gr
from PIL import Image
import torch
# Import your inference module
import inference as inf
from src.generateCaptcha import generate_captcha
from src.config import cfg # sizes, charset, dirs
# Device and one-time model load
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL = inf.load_model("checkpoints/best_model.pth").to(DEVICE).eval()
# Ensure results dir exists
os.makedirs(cfg.RESULT_DIR, exist_ok=True)
def random_text():
L = random.randint(cfg.CAPTCHA_LEN_LOWER_LIMIT, cfg.CAPTCHA_LEN_UPPER_LIMIT)
return "".join(random.choices(cfg.chars, k=L))
def calculate_accuracy(prediction, target):
"""Calculate character-by-character accuracy."""
if not prediction or not target:
return "0%"
correct_chars = 0
min_len = min(len(prediction), len(target))
for i in range(min_len):
if prediction[i] == target[i]:
correct_chars += 1
if min_len == 0:
return "0%"
accuracy = (correct_chars / min_len) * 100
return f"{accuracy:.1f}%"
def ui_generate():
text = random_text()
filename = f"{text}_{random.randint(1000,9999)}.png"
# Use generateCaptcha.py directly
img = generate_captcha(text, width=cfg.W_max, height=cfg.H)
# Save to results directory
filepath = os.path.join(cfg.RESULT_DIR, filename)
img.save(filepath)
# Enable and turn Solve green now that an image exists
solve_btn_state = gr.update(interactive=True, variant="primary")
return img, text, filepath, solve_btn_state
def ui_solve(path_hint: str, ground_truth: str):
if path_hint and os.path.exists(path_hint):
tensor = inf.preprocess_image(path_hint, (cfg.W_max, cfg.H))
pred = inf.predict_captcha(MODEL, tensor, DEVICE)
# Calculate accuracy
accuracy = calculate_accuracy(pred, ground_truth)
return accuracy, pred
return "0%", "No image generated yet. Click Generate CAPTCHA first."
with gr.Blocks(title="CAPTCHA OCR (checkpoint)") as demo:
gr.Markdown("## CAPTCHA OCR ")
with gr.Row():
# Left column: Generate button + Solve button stacked vertically
with gr.Column(scale=1):
gen_btn = gr.Button("Generate CAPTCHA", variant="primary")
solve_btn = gr.Button("Solve", interactive=False, variant="secondary")
# Right column: Ground Truth
gt_out = gr.Textbox(label="Ground Truth", interactive=False, text_align="center")
with gr.Row():
img_out = gr.Image(label="Generated CAPTCHA", type="pil")
path_box = gr.Textbox(label="Internal Path", interactive=False, visible=False)
# Prediction row split into two columns
with gr.Row():
accuracy_out = gr.Textbox(label="Character Accuracy", interactive=False, text_align="center")
pred_out = gr.Textbox(label="Prediction", interactive=False, text_align="center")
# Generate: outputs image, ground truth, path, and enables Solve (green)
gen_btn.click(
fn=ui_generate,
outputs=[img_out, gt_out, path_box, solve_btn],
)
# Solve: only uses the internal path (no upload option anymore)
solve_btn.click(
fn=ui_solve,
inputs=[path_box, gt_out],
outputs=[accuracy_out, pred_out],
)
if __name__ == "__main__":
demo.launch()