Upload app.py
Browse files
app.py
CHANGED
|
@@ -358,54 +358,86 @@ def analyze_solution(question: str, solution: str):
|
|
| 358 |
return final_verdict
|
| 359 |
|
| 360 |
def classify_solution_stream(question: str, solution: str):
|
| 361 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
if not question.strip() or not solution.strip():
|
| 363 |
-
|
|
|
|
| 364 |
return
|
| 365 |
|
|
|
|
| 366 |
if not models_ready():
|
| 367 |
-
|
|
|
|
| 368 |
msg = load_model()
|
| 369 |
if not models_ready():
|
| 370 |
-
|
|
|
|
| 371 |
return
|
|
|
|
| 372 |
|
| 373 |
try:
|
| 374 |
-
# Stage 1
|
| 375 |
-
|
|
|
|
|
|
|
| 376 |
conceptual = run_conceptual_check(question, solution, classifier_model, classifier_tokenizer)
|
| 377 |
-
|
| 378 |
-
|
| 379 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
|
| 381 |
-
# Stage 2 — computational
|
| 382 |
-
yield "⏳ Working…", "Running computational check…", "🧮 **Stage 2:** extracting & evaluating equations…"
|
| 383 |
computational = run_computational_check(solution, gemma_model, gemma_tokenizer)
|
| 384 |
|
| 385 |
-
# Final verdict
|
| 386 |
if computational["error"]:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 387 |
classification = "computational_error"
|
| 388 |
explanation = (
|
| 389 |
-
|
| 390 |
-
f
|
| 391 |
-
f"The correct calculation should be: {
|
| 392 |
)
|
| 393 |
-
status = "🟥 **Final:** computational error detected."
|
| 394 |
else:
|
| 395 |
-
|
| 396 |
-
|
|
|
|
| 397 |
explanation = "All calculations are correct and the overall logic appears to be sound."
|
| 398 |
-
status = "🟩 **Final:** correct."
|
| 399 |
else:
|
| 400 |
-
classification =
|
| 401 |
-
explanation =
|
| 402 |
-
|
|
|
|
|
|
|
| 403 |
|
| 404 |
-
yield
|
|
|
|
| 405 |
|
| 406 |
except Exception as e:
|
| 407 |
logger.exception("inference failed")
|
| 408 |
-
|
|
|
|
|
|
|
| 409 |
|
| 410 |
# ---------------- UI: streaming, no progress bars ----------------
|
| 411 |
with gr.Blocks(title="Math Solution Classifier", theme=gr.themes.Soft()) as app:
|
|
|
|
| 358 |
return final_verdict
|
| 359 |
|
| 360 |
def classify_solution_stream(question: str, solution: str):
|
| 361 |
+
"""
|
| 362 |
+
Streams (classification, explanation, status_markdown)
|
| 363 |
+
Status shows a growing checklist:
|
| 364 |
+
⏳ Stage 1 ...
|
| 365 |
+
✅ Stage 1 ... done
|
| 366 |
+
⏳ Stage 2 ...
|
| 367 |
+
✅ / 🟥 Stage 2 ... result
|
| 368 |
+
"""
|
| 369 |
+
def render(log_lines):
|
| 370 |
+
# join as a bulleted list
|
| 371 |
+
return "\n".join(f"- {line}" for line in log_lines) or "*(idle)*"
|
| 372 |
+
|
| 373 |
+
log = []
|
| 374 |
+
|
| 375 |
+
# basic input check
|
| 376 |
if not question.strip() or not solution.strip():
|
| 377 |
+
log.append("⚠️ Provide a question and a solution.")
|
| 378 |
+
yield "Please fill in both fields", "", render(log)
|
| 379 |
return
|
| 380 |
|
| 381 |
+
# lazy-load if needed
|
| 382 |
if not models_ready():
|
| 383 |
+
log.append("⏳ Loading models…")
|
| 384 |
+
yield "⏳ Working…", "", render(log)
|
| 385 |
msg = load_model()
|
| 386 |
if not models_ready():
|
| 387 |
+
log[-1] = f"🟥 Failed to load models — {msg}"
|
| 388 |
+
yield "Models not loaded", "", render(log)
|
| 389 |
return
|
| 390 |
+
log[-1] = "✅ Models loaded."
|
| 391 |
|
| 392 |
try:
|
| 393 |
+
# ---------- Stage 1: Conceptual ----------
|
| 394 |
+
log.append("⏳ **Stage 1: Conceptual check**")
|
| 395 |
+
yield "⏳ Working…", "Starting conceptual check…", render(log)
|
| 396 |
+
|
| 397 |
conceptual = run_conceptual_check(question, solution, classifier_model, classifier_tokenizer)
|
| 398 |
+
pred = conceptual["prediction"]
|
| 399 |
+
conf = conceptual["probabilities"][pred]
|
| 400 |
+
log[-1] = f"✅ **Stage 1: Conceptual check** — prediction **{pred}** (p={conf:.2%})"
|
| 401 |
+
yield "⏳ Working…", f"Stage 1: {pred} (p={conf:.2%}). Now checking calculations…", render(log)
|
| 402 |
+
|
| 403 |
+
# ---------- Stage 2: Computational ----------
|
| 404 |
+
log.append("⏳ **Stage 2: Computational check**")
|
| 405 |
+
yield "⏳ Working…", "Running computational check…", render(log)
|
| 406 |
|
|
|
|
|
|
|
| 407 |
computational = run_computational_check(solution, gemma_model, gemma_tokenizer)
|
| 408 |
|
| 409 |
+
# ---------- Final verdict ----------
|
| 410 |
if computational["error"]:
|
| 411 |
+
# mark stage 2 as failed
|
| 412 |
+
line_txt = computational["line_text"]
|
| 413 |
+
corr = computational["correct_calc"]
|
| 414 |
+
log[-1] = f"🟥 **Stage 2: Computational check** — error on line “{line_txt}” (correct: `{corr}`)"
|
| 415 |
classification = "computational_error"
|
| 416 |
explanation = (
|
| 417 |
+
"A calculation error was found.\n"
|
| 418 |
+
f'On the line: "{line_txt}"\n'
|
| 419 |
+
f"The correct calculation should be: {corr}"
|
| 420 |
)
|
|
|
|
| 421 |
else:
|
| 422 |
+
log[-1] = "✅ **Stage 2: Computational check** — no arithmetic issues found."
|
| 423 |
+
if pred == "correct":
|
| 424 |
+
classification = "correct"
|
| 425 |
explanation = "All calculations are correct and the overall logic appears to be sound."
|
|
|
|
| 426 |
else:
|
| 427 |
+
classification = "conceptual_error"
|
| 428 |
+
explanation = (
|
| 429 |
+
"All calculations are correct, but there appears to be a conceptual error "
|
| 430 |
+
"in the logic or setup of the solution."
|
| 431 |
+
)
|
| 432 |
|
| 433 |
+
# final yield updates both result fields + the complete checklist
|
| 434 |
+
yield classification, explanation, render(log)
|
| 435 |
|
| 436 |
except Exception as e:
|
| 437 |
logger.exception("inference failed")
|
| 438 |
+
log.append(f"🟥 Exception during inference: **{type(e).__name__}** — {e}")
|
| 439 |
+
yield "Runtime error", f"{type(e).__name__}: {e}", render(log)
|
| 440 |
+
|
| 441 |
|
| 442 |
# ---------------- UI: streaming, no progress bars ----------------
|
| 443 |
with gr.Blocks(title="Math Solution Classifier", theme=gr.themes.Soft()) as app:
|