Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -47,8 +47,8 @@ Please analyze the freight rate table in the file I provide and convert it into
|
|
| 47 |
"transit": "...",
|
| 48 |
"transit_time": "...",
|
| 49 |
"weight_breaks": {
|
| 50 |
-
"
|
| 51 |
-
"
|
| 52 |
"45kg": ...,
|
| 53 |
"100kg": ...,
|
| 54 |
"300kg": ...,
|
|
@@ -429,6 +429,8 @@ def run_process_internal_base_v2(file_bytes, filename, mime, question, model_cho
|
|
| 429 |
user_prompt = (question or "").strip() or PROMPT_FREIGHT_JSON
|
| 430 |
all_json_results, all_text_results = [], []
|
| 431 |
|
|
|
|
|
|
|
| 432 |
for i in range(0, len(pages), batch_size):
|
| 433 |
batch = pages[i:i+batch_size]
|
| 434 |
uploaded = []
|
|
@@ -438,16 +440,34 @@ def run_process_internal_base_v2(file_bytes, filename, mime, question, model_cho
|
|
| 438 |
up = genai.upload_file(path=tmp.name, mime_type="image/png")
|
| 439 |
up = _wait_file_active(up)
|
| 440 |
uploaded.append(up)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 441 |
try:
|
| 442 |
-
|
| 443 |
-
|
| 444 |
-
|
| 445 |
-
|
| 446 |
-
|
| 447 |
-
|
| 448 |
-
|
| 449 |
-
|
| 450 |
-
|
|
|
|
|
|
|
| 451 |
finally:
|
| 452 |
for up in uploaded:
|
| 453 |
try: genai.delete_file(up.name)
|
|
|
|
| 47 |
"transit": "...",
|
| 48 |
"transit_time": "...",
|
| 49 |
"weight_breaks": {
|
| 50 |
+
"m": ...,
|
| 51 |
+
"n": ...,
|
| 52 |
"45kg": ...,
|
| 53 |
"100kg": ...,
|
| 54 |
"300kg": ...,
|
|
|
|
| 429 |
user_prompt = (question or "").strip() or PROMPT_FREIGHT_JSON
|
| 430 |
all_json_results, all_text_results = [], []
|
| 431 |
|
| 432 |
+
previous_header_json = None
|
| 433 |
+
|
| 434 |
for i in range(0, len(pages), batch_size):
|
| 435 |
batch = pages[i:i+batch_size]
|
| 436 |
uploaded = []
|
|
|
|
| 440 |
up = genai.upload_file(path=tmp.name, mime_type="image/png")
|
| 441 |
up = _wait_file_active(up)
|
| 442 |
uploaded.append(up)
|
| 443 |
+
|
| 444 |
+
# build dynamic prompt
|
| 445 |
+
if previous_header_json:
|
| 446 |
+
context_prompt = (
|
| 447 |
+
f"{user_prompt}\n"
|
| 448 |
+
"The previous page had this table structure:\n"
|
| 449 |
+
f"{json.dumps(previous_header_json, ensure_ascii=False, indent=2)}\n"
|
| 450 |
+
"If this page has no header, assume it continues with the same structure."
|
| 451 |
+
)
|
| 452 |
+
else:
|
| 453 |
+
context_prompt = user_prompt
|
| 454 |
+
|
| 455 |
+
resp = model.generate_content([f"{context_prompt}\n(This is batch {i//batch_size+1})"] + uploaded)
|
| 456 |
+
text = _safe_text_from_gemini(resp)
|
| 457 |
+
json_text = _coerce_only_json(text)
|
| 458 |
+
|
| 459 |
try:
|
| 460 |
+
parsed = json.loads(json_text)
|
| 461 |
+
all_json_results.append(parsed)
|
| 462 |
+
|
| 463 |
+
# ✅ update header context (for next page)
|
| 464 |
+
if i == 0:
|
| 465 |
+
# chỉ cần giữ phần "charges[0].weight_breaks" làm cấu trúc header
|
| 466 |
+
first_charge = (parsed.get("charges") or [{}])[0]
|
| 467 |
+
if "weight_breaks" in first_charge:
|
| 468 |
+
previous_header_json = first_charge["weight_breaks"]
|
| 469 |
+
except Exception:
|
| 470 |
+
all_text_results.append(text)
|
| 471 |
finally:
|
| 472 |
for up in uploaded:
|
| 473 |
try: genai.delete_file(up.name)
|