Spaces:
Sleeping
Sleeping
Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,504 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from __future__ import annotations
|
| 2 |
+
import os, io, re, json, time, mimetypes, tempfile, string
|
| 3 |
+
from typing import List, Union, Tuple, Any, Iterable
|
| 4 |
+
|
| 5 |
+
from PIL import Image
|
| 6 |
+
import pandas as pd
|
| 7 |
+
import gradio as gr
|
| 8 |
+
import google.generativeai as genai
|
| 9 |
+
import requests
|
| 10 |
+
|
| 11 |
+
# ================== CONFIG ==================
|
| 12 |
+
# KHÔNG hardcode key. YÊU CẦU đặt biến môi trường GOOGLE_API_KEY.
|
| 13 |
+
DEFAULT_API_KEY = "AIzaSyCwyYCNqWWA7jqcc5WAG5jQhnGdWKslD4o" # để trống. Nếu cần, bạn có thể set tạm thời ở ENV.
|
| 14 |
+
|
| 15 |
+
INTERNAL_MODEL_MAP = {
|
| 16 |
+
"Gemini 2.5 Flash": "gemini-2.5-flash",
|
| 17 |
+
"Gemini 2.5 Pro": "gemini-2.5-pro",
|
| 18 |
+
}
|
| 19 |
+
EXTERNAL_MODEL_NAME = "prithivMLmods/Camel-Doc-OCR-062825 (External)"
|
| 20 |
+
|
| 21 |
+
try:
|
| 22 |
+
RESAMPLE = Image.Resampling.LANCZOS # Pillow >= 10
|
| 23 |
+
except AttributeError:
|
| 24 |
+
RESAMPLE = Image.LANCZOS # Pillow < 10
|
| 25 |
+
|
| 26 |
+
# ================== HELPERS ==================
|
| 27 |
+
import fitz # PyMuPDF
|
| 28 |
+
|
| 29 |
+
def pdf_to_images(pdf_bytes: bytes) -> list[Image.Image]:
|
| 30 |
+
doc = fitz.open(stream=pdf_bytes, filetype="pdf")
|
| 31 |
+
pages = []
|
| 32 |
+
for p in doc:
|
| 33 |
+
pix = p.get_pixmap(dpi=200)
|
| 34 |
+
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
|
| 35 |
+
pages.append(img)
|
| 36 |
+
return pages
|
| 37 |
+
|
| 38 |
+
def ensure_rgb(im: Image.Image) -> Image.Image:
|
| 39 |
+
return im.convert("RGB") if im.mode != "RGB" else im
|
| 40 |
+
|
| 41 |
+
def _read_file_bytes(upload: Union[str, os.PathLike, dict, object] | None) -> bytes:
|
| 42 |
+
if upload is None:
|
| 43 |
+
raise ValueError("No file uploaded.")
|
| 44 |
+
if isinstance(upload, (str, os.PathLike)):
|
| 45 |
+
with open(upload, "rb") as f:
|
| 46 |
+
return f.read()
|
| 47 |
+
if isinstance(upload, dict) and "path" in upload:
|
| 48 |
+
with open(upload["path"], "rb") as f:
|
| 49 |
+
return f.read()
|
| 50 |
+
if hasattr(upload, "read"):
|
| 51 |
+
return upload.read()
|
| 52 |
+
raise TypeError(f"Unsupported file object: {type(upload)}")
|
| 53 |
+
|
| 54 |
+
def _make_previews(file_bytes: bytes, max_side: int = 2000) -> List[Image.Image]:
|
| 55 |
+
"""Trả list PIL.Image đã RGB + resize theo max_side."""
|
| 56 |
+
if len(file_bytes) >= 4 and file_bytes[:4] == b"%PDF":
|
| 57 |
+
pages = pdf_to_images(file_bytes)
|
| 58 |
+
else:
|
| 59 |
+
pages = [Image.open(io.BytesIO(file_bytes))]
|
| 60 |
+
out = []
|
| 61 |
+
for im in pages:
|
| 62 |
+
im = ensure_rgb(im)
|
| 63 |
+
if max_side:
|
| 64 |
+
w, h = im.size
|
| 65 |
+
scale = min(max_side / float(w), max_side / float(h), 1.0)
|
| 66 |
+
if scale < 1.0:
|
| 67 |
+
im = im.resize((max(1, int(w*scale)), max(1, int(h*scale))), RESAMPLE)
|
| 68 |
+
out.append(im)
|
| 69 |
+
return out
|
| 70 |
+
|
| 71 |
+
def _guess_name_and_mime(file, file_bytes: bytes) -> Tuple[str, str]:
|
| 72 |
+
if isinstance(file, (str, os.PathLike)):
|
| 73 |
+
filename = os.path.basename(str(file))
|
| 74 |
+
elif isinstance(file, dict) and "name" in file:
|
| 75 |
+
filename = os.path.basename(file["name"])
|
| 76 |
+
elif isinstance(file, dict) and "path" in file:
|
| 77 |
+
filename = os.path.basename(file["path"])
|
| 78 |
+
else:
|
| 79 |
+
filename = "upload.bin"
|
| 80 |
+
mime, _ = mimetypes.guess_type(filename)
|
| 81 |
+
if not mime:
|
| 82 |
+
if len(file_bytes) >= 4 and file_bytes[:4] == b"%PDF":
|
| 83 |
+
mime = "application/pdf"
|
| 84 |
+
if not filename.lower().endswith(".pdf"):
|
| 85 |
+
filename += ".pdf"
|
| 86 |
+
else:
|
| 87 |
+
mime = "image/png"
|
| 88 |
+
return filename, mime
|
| 89 |
+
|
| 90 |
+
def _extract_json_from_message(msg: str):
|
| 91 |
+
"""Bóc JSON trong ```json ...``` nếu có. Trả về (obj, cleaned_string)."""
|
| 92 |
+
s = (msg or "").strip()
|
| 93 |
+
s = re.sub(r"^\s*```(?:json)?\s*", "", s, flags=re.IGNORECASE)
|
| 94 |
+
s = re.sub(r"\s*```\s*$", "", s)
|
| 95 |
+
try:
|
| 96 |
+
return json.loads(s), s
|
| 97 |
+
except Exception:
|
| 98 |
+
return None, s
|
| 99 |
+
|
| 100 |
+
def _pretty_message(msg: str) -> str:
|
| 101 |
+
obj, s = _extract_json_from_message(msg)
|
| 102 |
+
return json.dumps(obj, ensure_ascii=False, indent=2) if obj is not None else s
|
| 103 |
+
|
| 104 |
+
def _safe_text_from_gemini(resp):
|
| 105 |
+
try:
|
| 106 |
+
return resp.text
|
| 107 |
+
except Exception:
|
| 108 |
+
pass
|
| 109 |
+
texts = []
|
| 110 |
+
for c in getattr(resp, "candidates", []) or []:
|
| 111 |
+
content = getattr(c, "content", None)
|
| 112 |
+
parts = getattr(content, "parts", None) if content else None
|
| 113 |
+
if not parts:
|
| 114 |
+
continue
|
| 115 |
+
for p in parts:
|
| 116 |
+
t = getattr(p, "text", None)
|
| 117 |
+
if t:
|
| 118 |
+
texts.append(t)
|
| 119 |
+
return "\n".join(texts).strip()
|
| 120 |
+
|
| 121 |
+
def _wait_file_active(file_obj, timeout_s: int = 60) -> object:
|
| 122 |
+
"""Chờ file upload sang Gemini ở trạng thái ACTIVE, có timeout + backoff."""
|
| 123 |
+
start = time.time()
|
| 124 |
+
delay = 0.5
|
| 125 |
+
while hasattr(file_obj, "state") and getattr(file_obj.state, "name", "") == "PROCESSING":
|
| 126 |
+
if time.time() - start > timeout_s:
|
| 127 |
+
raise TimeoutError("Upload processing timeout.")
|
| 128 |
+
time.sleep(delay)
|
| 129 |
+
delay = min(delay * 1.5, 2.0)
|
| 130 |
+
file_obj = genai.get_file(file_obj.name)
|
| 131 |
+
if not hasattr(file_obj, "state") or file_obj.state.name != "ACTIVE":
|
| 132 |
+
st = getattr(file_obj, "state", None)
|
| 133 |
+
raise RuntimeError(f"Upload failed or not active. State={getattr(st, 'name', 'UNKNOWN')}")
|
| 134 |
+
return file_obj
|
| 135 |
+
|
| 136 |
+
# ---------- JSON → Excel (schema-agnostic) ----------
|
| 137 |
+
def _flatten_dict(d: dict, parent_key: str = "", sep: str = ".") -> dict:
|
| 138 |
+
"""Flatten dict lồng nhau thành 1 level: {'a':{'b':1}} -> {'a.b':1}"""
|
| 139 |
+
items = []
|
| 140 |
+
for k, v in (d or {}).items():
|
| 141 |
+
new_key = f"{parent_key}{sep}{k}" if parent_key else str(k)
|
| 142 |
+
if isinstance(v, dict):
|
| 143 |
+
items.extend(_flatten_dict(v, new_key, sep=sep).items())
|
| 144 |
+
else:
|
| 145 |
+
items.append((new_key, v))
|
| 146 |
+
return dict(items)
|
| 147 |
+
|
| 148 |
+
def _sanitize_sheet_name(name: str, used: set[str]) -> str:
|
| 149 |
+
# Excel sheet name ≤ 31 chars, không chứa []:*?/\
|
| 150 |
+
invalid = set(r'[]:*?/\'' + '"')
|
| 151 |
+
clean = "".join(ch for ch in name if ch not in invalid)
|
| 152 |
+
clean = clean.strip()
|
| 153 |
+
if not clean:
|
| 154 |
+
clean = "sheet"
|
| 155 |
+
clean = clean[:31]
|
| 156 |
+
# đảm bảo unique
|
| 157 |
+
base, idx = clean, 1
|
| 158 |
+
while clean in used:
|
| 159 |
+
suffix = f"_{idx}"
|
| 160 |
+
clean = (base[: (31 - len(suffix))] + suffix)
|
| 161 |
+
idx += 1
|
| 162 |
+
used.add(clean)
|
| 163 |
+
return clean
|
| 164 |
+
|
| 165 |
+
def _to_excel_generic(data: Any, path: str) -> str:
|
| 166 |
+
"""
|
| 167 |
+
Quy tắc:
|
| 168 |
+
- Nếu là list[dict] -> 1 sheet "data" (json_normalize)
|
| 169 |
+
- Nếu là dict:
|
| 170 |
+
+ Tạo 1 sheet "summary" từ các field dạng scalar/dict (flatten)
|
| 171 |
+
+ Với mỗi field là list:
|
| 172 |
+
· list[dict] -> 1 sheet theo tên key (normalize)
|
| 173 |
+
· list[scalar]-> 1 sheet 1 cột 'value'
|
| 174 |
+
· list[mixed] -> chuyển thành cột 'value' dạng chuỗi
|
| 175 |
+
"""
|
| 176 |
+
with pd.ExcelWriter(path) as writer:
|
| 177 |
+
used_names = set()
|
| 178 |
+
|
| 179 |
+
def add_df(df: pd.DataFrame, sheet: str):
|
| 180 |
+
sheetname = _sanitize_sheet_name(sheet, used_names)
|
| 181 |
+
df.to_excel(writer, index=False, sheet_name=sheetname)
|
| 182 |
+
|
| 183 |
+
if isinstance(data, list):
|
| 184 |
+
# list tổng quát
|
| 185 |
+
try:
|
| 186 |
+
df = pd.json_normalize(data, sep=".")
|
| 187 |
+
except Exception:
|
| 188 |
+
df = pd.DataFrame({"value": [json.dumps(x, ensure_ascii=False) for x in data]})
|
| 189 |
+
add_df(df, "data")
|
| 190 |
+
return path
|
| 191 |
+
|
| 192 |
+
if isinstance(data, dict):
|
| 193 |
+
scalars = {}
|
| 194 |
+
list_sheets: list[tuple[str, pd.DataFrame]] = []
|
| 195 |
+
|
| 196 |
+
for k, v in data.items():
|
| 197 |
+
if isinstance(v, list):
|
| 198 |
+
if len(v) == 0:
|
| 199 |
+
list_sheets.append((k, pd.DataFrame()))
|
| 200 |
+
elif isinstance(v[0], dict):
|
| 201 |
+
try:
|
| 202 |
+
df = pd.json_normalize(v, sep=".")
|
| 203 |
+
except Exception:
|
| 204 |
+
df = pd.DataFrame({"value": [json.dumps(x, ensure_ascii=False) for x in v]})
|
| 205 |
+
list_sheets.append((k, df))
|
| 206 |
+
elif not isinstance(v[0], (list, dict)):
|
| 207 |
+
df = pd.DataFrame({"value": v})
|
| 208 |
+
list_sheets.append((k, df))
|
| 209 |
+
else:
|
| 210 |
+
df = pd.DataFrame({"value": [json.dumps(x, ensure_ascii=False) for x in v]})
|
| 211 |
+
list_sheets.append((k, df))
|
| 212 |
+
elif isinstance(v, dict):
|
| 213 |
+
scalars.update(_flatten_dict({k: v}))
|
| 214 |
+
else:
|
| 215 |
+
scalars[k] = v
|
| 216 |
+
|
| 217 |
+
# summary sheet
|
| 218 |
+
if len(scalars) > 0:
|
| 219 |
+
add_df(pd.DataFrame([scalars]), "summary")
|
| 220 |
+
|
| 221 |
+
# each list -> one sheet
|
| 222 |
+
for k, df in list_sheets:
|
| 223 |
+
add_df(df, k if k else "list")
|
| 224 |
+
|
| 225 |
+
# nếu dict chỉ có list, không có summary => vẫn OK (chỉ có các sheet list)
|
| 226 |
+
return path
|
| 227 |
+
|
| 228 |
+
# kiểu khác: ghi thành 1 cột value
|
| 229 |
+
add_df(pd.DataFrame({"value": [json.dumps(data, ensure_ascii=False)]}), "data")
|
| 230 |
+
return path
|
| 231 |
+
|
| 232 |
+
# ================== HANDLERS ==================
|
| 233 |
+
def preview_process(file):
|
| 234 |
+
"""Trả list đường dẫn ảnh PNG tạm cho Gallery (ổn định hơn list PIL)."""
|
| 235 |
+
if file is None:
|
| 236 |
+
return []
|
| 237 |
+
try:
|
| 238 |
+
file_bytes = _read_file_bytes(file)
|
| 239 |
+
images = _make_previews(file_bytes, max_side=2000)
|
| 240 |
+
paths = []
|
| 241 |
+
for i, im in enumerate(images):
|
| 242 |
+
fd, path = tempfile.mkstemp(suffix=f"_preview_{i}.png")
|
| 243 |
+
os.close(fd)
|
| 244 |
+
im.save(path, format="PNG")
|
| 245 |
+
paths.append(path)
|
| 246 |
+
return paths
|
| 247 |
+
except Exception as e:
|
| 248 |
+
print(f"Preview error: {e}")
|
| 249 |
+
return []
|
| 250 |
+
|
| 251 |
+
# -------- Internal (Gemini) - Base (1 lượt, không thinking) --------
|
| 252 |
+
def run_process_internal_base(file_bytes, filename, mime, question, model_choice,
|
| 253 |
+
temperature, top_p):
|
| 254 |
+
api_key = os.environ.get("GOOGLE_API_KEY", DEFAULT_API_KEY)
|
| 255 |
+
if not api_key:
|
| 256 |
+
return "ERROR: Missing GOOGLE_API_KEY.", None
|
| 257 |
+
genai.configure(api_key=api_key)
|
| 258 |
+
|
| 259 |
+
model_name = INTERNAL_MODEL_MAP.get(model_choice, INTERNAL_MODEL_MAP["Gemini 2.5 Flash"])
|
| 260 |
+
gen_config = {"temperature": float(temperature), "top_p": float(top_p)}
|
| 261 |
+
model = genai.GenerativeModel(model_name=model_name, generation_config=gen_config)
|
| 262 |
+
|
| 263 |
+
uploaded = None
|
| 264 |
+
tmp_path = None
|
| 265 |
+
try:
|
| 266 |
+
if file_bytes:
|
| 267 |
+
suffix = os.path.splitext(filename)[1] or ".bin"
|
| 268 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
|
| 269 |
+
tmp.write(file_bytes)
|
| 270 |
+
tmp_path = tmp.name
|
| 271 |
+
uploaded = genai.upload_file(path=tmp_path, mime_type=mime)
|
| 272 |
+
uploaded = _wait_file_active(uploaded, timeout_s=60)
|
| 273 |
+
|
| 274 |
+
user_prompt = (question or "").strip()
|
| 275 |
+
if not user_prompt:
|
| 276 |
+
user_prompt = (
|
| 277 |
+
"Perform high-quality OCR on the provided file. If PDF: read all pages in order. "
|
| 278 |
+
"Return clean plain text. If structure is obvious (tables, key:value), preserve it. "
|
| 279 |
+
"If you can, output JSON that captures the structure."
|
| 280 |
+
)
|
| 281 |
+
|
| 282 |
+
# Gọi model
|
| 283 |
+
if uploaded:
|
| 284 |
+
resp = model.generate_content([user_prompt, uploaded])
|
| 285 |
+
else:
|
| 286 |
+
resp = model.generate_content(user_prompt)
|
| 287 |
+
|
| 288 |
+
# Lấy đúng message LLM (pretty nếu là JSON)
|
| 289 |
+
answer_raw = _safe_text_from_gemini(resp)
|
| 290 |
+
message = _pretty_message(answer_raw)
|
| 291 |
+
|
| 292 |
+
# Parse JSON (nếu có) để export. Không validate schema.
|
| 293 |
+
parsed_obj, _ = _extract_json_from_message(answer_raw)
|
| 294 |
+
|
| 295 |
+
return message, parsed_obj
|
| 296 |
+
finally:
|
| 297 |
+
if tmp_path and os.path.exists(tmp_path):
|
| 298 |
+
try: os.remove(tmp_path)
|
| 299 |
+
except Exception: pass
|
| 300 |
+
try:
|
| 301 |
+
if uploaded and hasattr(uploaded, "name"):
|
| 302 |
+
genai.delete_file(uploaded.name)
|
| 303 |
+
except Exception:
|
| 304 |
+
pass
|
| 305 |
+
|
| 306 |
+
# -------- External API --------
|
| 307 |
+
def run_process_external(file_bytes, filename, mime, question, api_url,
|
| 308 |
+
temperature, top_p):
|
| 309 |
+
if not api_url or not str(api_url).strip():
|
| 310 |
+
return "ERROR: Missing external API endpoint (hãy dán URL).", None
|
| 311 |
+
try:
|
| 312 |
+
user_prompt = (question or "").strip()
|
| 313 |
+
if not user_prompt:
|
| 314 |
+
user_prompt = (
|
| 315 |
+
"Perform high-quality OCR on the provided file. If PDF: read all pages in order. "
|
| 316 |
+
"Return clean plain text. If structure is obvious (tables, key:value), preserve it. "
|
| 317 |
+
"If you can, output JSON that captures the structure."
|
| 318 |
+
)
|
| 319 |
+
|
| 320 |
+
data = {"prompt": user_prompt, "temperature": str(temperature), "top_p": str(top_p)}
|
| 321 |
+
|
| 322 |
+
if file_bytes:
|
| 323 |
+
files = {"file": (filename, file_bytes, mime)}
|
| 324 |
+
r = requests.post(api_url, files=files, data=data, timeout=60)
|
| 325 |
+
else:
|
| 326 |
+
r = requests.post(api_url, json=data, timeout=60)
|
| 327 |
+
|
| 328 |
+
if r.status_code >= 400:
|
| 329 |
+
return f"ERROR: External API HTTP {r.status_code}: {r.text[:300]}", None
|
| 330 |
+
|
| 331 |
+
answer = None
|
| 332 |
+
try:
|
| 333 |
+
j = r.json()
|
| 334 |
+
answer = j.get("message") or j.get("text") or j.get("data")
|
| 335 |
+
if isinstance(answer, (dict, list)):
|
| 336 |
+
answer = json.dumps(answer, ensure_ascii=False)
|
| 337 |
+
except Exception:
|
| 338 |
+
answer = r.text
|
| 339 |
+
|
| 340 |
+
answer = (answer or "").strip()
|
| 341 |
+
message = _pretty_message(answer)
|
| 342 |
+
parsed_obj, _ = _extract_json_from_message(answer)
|
| 343 |
+
|
| 344 |
+
return message, parsed_obj
|
| 345 |
+
except Exception as e:
|
| 346 |
+
return f"ERROR: {type(e).__name__}: {str(e) or repr(e)}", None
|
| 347 |
+
|
| 348 |
+
# -------- Router --------
|
| 349 |
+
def run_process(file, question, model_choice, temperature, top_p, external_api_url):
|
| 350 |
+
"""
|
| 351 |
+
Router (không Agent, không thinking):
|
| 352 |
+
- Nếu chọn External model -> run_process_external
|
| 353 |
+
- Ngược lại -> Gemini nội bộ (Base 1 lượt)
|
| 354 |
+
"""
|
| 355 |
+
try:
|
| 356 |
+
has_file = file is not None
|
| 357 |
+
file_bytes = filename = mime = None
|
| 358 |
+
if has_file:
|
| 359 |
+
file_bytes = _read_file_bytes(file)
|
| 360 |
+
filename, mime = _guess_name_and_mime(file, file_bytes)
|
| 361 |
+
|
| 362 |
+
if model_choice == EXTERNAL_MODEL_NAME:
|
| 363 |
+
return run_process_external(
|
| 364 |
+
file_bytes=file_bytes, filename=filename, mime=mime,
|
| 365 |
+
question=question, api_url=external_api_url,
|
| 366 |
+
temperature=temperature, top_p=top_p
|
| 367 |
+
)
|
| 368 |
+
|
| 369 |
+
return run_process_internal_base(
|
| 370 |
+
file_bytes=file_bytes, filename=filename, mime=mime,
|
| 371 |
+
question=question, model_choice=model_choice,
|
| 372 |
+
temperature=temperature, top_p=top_p
|
| 373 |
+
)
|
| 374 |
+
except Exception as e:
|
| 375 |
+
return f"ERROR: {type(e).__name__}: {str(e) or repr(e)}", None
|
| 376 |
+
|
| 377 |
+
def on_export_excel(parsed_obj):
|
| 378 |
+
try:
|
| 379 |
+
if not parsed_obj:
|
| 380 |
+
return None # UI sẽ không hiện file nếu None
|
| 381 |
+
tmp_path = os.path.join(tempfile.gettempdir(), f"export_{int(time.time())}.xlsx")
|
| 382 |
+
return _to_excel_generic(parsed_obj, tmp_path)
|
| 383 |
+
except Exception as e:
|
| 384 |
+
print(f"Export error: {e}")
|
| 385 |
+
return None
|
| 386 |
+
|
| 387 |
+
def clear_all():
|
| 388 |
+
# file, preview, output_text, question, model, parsed_state, download,
|
| 389 |
+
# temperature, top_p, external_api_url
|
| 390 |
+
return (
|
| 391 |
+
None, [], "", "",
|
| 392 |
+
"Gemini 2.5 Flash", None, None,
|
| 393 |
+
0.2, 0.95, ""
|
| 394 |
+
)
|
| 395 |
+
|
| 396 |
+
# ================== UI ==================
|
| 397 |
+
def _toggle_external_visibility(selected: str):
|
| 398 |
+
return gr.update(visible=(selected == EXTERNAL_MODEL_NAME))
|
| 399 |
+
|
| 400 |
+
def main():
|
| 401 |
+
custom_css = """
|
| 402 |
+
.gradio-container { max-width: 1400px !important; margin: 0 auto; }
|
| 403 |
+
#main-row { display: flex; gap: 20px; align-items: flex-start; }
|
| 404 |
+
#left-column { flex: 1; min-width: 400px; max-width: 600px; }
|
| 405 |
+
#right-column { flex: 1; min-width: 400px; }
|
| 406 |
+
#file-upload { border: 2px dashed #d1d5db; border-radius: 12px; padding: 20px; text-align: center; transition: border-color 0.3s ease; }
|
| 407 |
+
#file-upload:hover { border-color: #3b82f6; }
|
| 408 |
+
#preview-gallery { max-height: 600px; overflow-y: auto; border: 1px solid #e5e7eb; border-radius: 12px; background: #f9fafb; padding: 10px; }
|
| 409 |
+
#preview-gallery .grid { grid-template-columns: 1fr !important; gap: 10px !important; }
|
| 410 |
+
#preview-gallery img { width: 100% !important; height: auto !important; object-fit: contain !important; background: white; }
|
| 411 |
+
#controls-section { background: #f8fafc; padding: 20px; border-radius: 12px; margin-bottom: 20px; }
|
| 412 |
+
#results-section { background: #ffffff; border: 1px solid #e5e7eb; border-radius: 12px; padding: 20px; }
|
| 413 |
+
#llm-output { max-height: 500px; overflow-y: auto; font-family: monospace; font-size: 13px; }
|
| 414 |
+
.primary-button { background: linear-gradient(90deg, #3b82f6, #1d4ed8) !important; color: white !important; border: none !important; border-radius: 8px !important; padding: 10px 20px !important; font-weight: 500 !important; }
|
| 415 |
+
.primary-button:hover { transform: translateY(-1px) !important; box-shadow: 0 4px 12px rgba(59, 130, 246, 0.3) !important; }
|
| 416 |
+
.secondary-button { background: #f3f4f6 !important; color: #374151 !important; border: 1px solid #d1d5db !important; border-radius: 8px !important; padding: 8px 16px !important; }
|
| 417 |
+
@media (max-width: 1024px) { #main-row { flex-direction: column; } #left-column, #right-column { min-width: 100%; max-width: 100%; } }
|
| 418 |
+
"""
|
| 419 |
+
|
| 420 |
+
with gr.Blocks(title="OCR Multi-Agent System", css=custom_css, theme=gr.themes.Soft()) as demo:
|
| 421 |
+
gr.HTML("""
|
| 422 |
+
<div style="text-align: center; padding: 20px 0; margin-bottom: 30px;">
|
| 423 |
+
<h1 style="color:#1f2937; font-size: 2.5rem; font-weight: bold; margin-bottom: 8px;">📄 OCR Extraction (LLM-first)</h1>
|
| 424 |
+
<p style="color:#6b7280; font-size: 1.1rem; margin: 0;">Upload PDF/images → LLM produces raw text/JSON → Export Excel (schema-agnostic)</p>
|
| 425 |
+
</div>
|
| 426 |
+
""")
|
| 427 |
+
|
| 428 |
+
last_parsed_state = gr.State(value=None)
|
| 429 |
+
|
| 430 |
+
with gr.Row(elem_id="main-row"):
|
| 431 |
+
# Left
|
| 432 |
+
with gr.Column(elem_id="left-column"):
|
| 433 |
+
gr.Markdown("### 📁 Upload Document")
|
| 434 |
+
file = gr.File(
|
| 435 |
+
label="Choose PDF or Image file",
|
| 436 |
+
file_types=[".pdf", ".png", ".jpg", ".jpeg", ".tiff", ".bmp"],
|
| 437 |
+
type="filepath",
|
| 438 |
+
elem_id="file-upload"
|
| 439 |
+
)
|
| 440 |
+
gr.Markdown("### 👁️ Document Preview")
|
| 441 |
+
preview = gr.Gallery(columns=1, height=None, show_label=False, elem_id="preview-gallery", allow_preview=True)
|
| 442 |
+
|
| 443 |
+
# Right
|
| 444 |
+
with gr.Column(elem_id="right-column"):
|
| 445 |
+
with gr.Group(elem_id="controls-section"):
|
| 446 |
+
gr.Markdown("### ⚙️ Processing Options")
|
| 447 |
+
with gr.Row():
|
| 448 |
+
model_choice = gr.Dropdown(
|
| 449 |
+
choices=[*INTERNAL_MODEL_MAP.keys(), EXTERNAL_MODEL_NAME],
|
| 450 |
+
value="Gemini 2.5 Flash",
|
| 451 |
+
label="Model"
|
| 452 |
+
)
|
| 453 |
+
|
| 454 |
+
with gr.Row():
|
| 455 |
+
temperature = gr.Slider(0.0, 2.0, value=0.2, step=0.05, label="temperature")
|
| 456 |
+
top_p = gr.Slider(0.0, 1.0, value=0.95, step=0.01, label="top_p")
|
| 457 |
+
|
| 458 |
+
external_api_url = gr.Textbox(
|
| 459 |
+
label="External API endpoint (URL)",
|
| 460 |
+
placeholder="https://your-host/path/to/ocr",
|
| 461 |
+
visible=False
|
| 462 |
+
)
|
| 463 |
+
|
| 464 |
+
question = gr.Textbox(
|
| 465 |
+
label="Custom Prompt (optional)",
|
| 466 |
+
placeholder="Leave blank for default OCR; or ask model to output JSON by your own schema...",
|
| 467 |
+
lines=3
|
| 468 |
+
)
|
| 469 |
+
with gr.Row():
|
| 470 |
+
run_btn = gr.Button("🚀 Process Document", elem_classes=["primary-button"])
|
| 471 |
+
clear_btn = gr.Button("🗑️ Clear All", elem_classes=["secondary-button"])
|
| 472 |
+
|
| 473 |
+
with gr.Group(elem_id="results-section"):
|
| 474 |
+
gr.Markdown("### 📊 LLM Message (raw/pretty)")
|
| 475 |
+
output_text = gr.Code(label="LLM Message", language="json", elem_id="llm-output")
|
| 476 |
+
with gr.Row():
|
| 477 |
+
export_btn = gr.Button("⬇️ Export to Excel", elem_classes=["secondary-button"])
|
| 478 |
+
download_file = gr.File(label="Download Excel", interactive=False)
|
| 479 |
+
|
| 480 |
+
# Events
|
| 481 |
+
file.change(preview_process, inputs=[file], outputs=[preview])
|
| 482 |
+
model_choice.change(_toggle_external_visibility, inputs=[model_choice], outputs=[external_api_url])
|
| 483 |
+
|
| 484 |
+
run_btn.click(
|
| 485 |
+
run_process,
|
| 486 |
+
inputs=[file, question, model_choice, temperature, top_p, external_api_url],
|
| 487 |
+
outputs=[output_text, last_parsed_state]
|
| 488 |
+
)
|
| 489 |
+
|
| 490 |
+
export_btn.click(on_export_excel, inputs=[last_parsed_state], outputs=[download_file])
|
| 491 |
+
|
| 492 |
+
clear_btn.click(
|
| 493 |
+
clear_all,
|
| 494 |
+
inputs=[],
|
| 495 |
+
outputs=[file, preview, output_text, question, model_choice, last_parsed_state,
|
| 496 |
+
download_file, temperature, top_p, external_api_url]
|
| 497 |
+
)
|
| 498 |
+
|
| 499 |
+
return demo
|
| 500 |
+
|
| 501 |
+
demo = main()
|
| 502 |
+
|
| 503 |
+
if __name__ == "__main__":
|
| 504 |
+
demo.launch()
|