Spaces:

RishiRP
/

Talk2TaskDemo1

Sleeping

App Files Files Community

RishiRP commited on Sep 25

Commit

db991b8

verified ·

1 Parent(s): e5618ba

Update app.py

Browse files

Files changed (1) hide show

app.py +196 -60

app.py CHANGED Viewed

@@ -209,21 +209,35 @@ def clean_transcript(text: str) -> str:
     s = re.sub(r"\n{3,}", "\n\n", s).strip()
     return s
-def read_text_from_file(file: gr.File) -> str:
-    if not file or not file.name:
         return ""
-    name = file.name.lower()
-    data = file.read()
-    if name.endswith(".json"):
         try:
-            obj = json.loads(data.decode("utf-8", errors="ignore"))
-            if isinstance(obj, dict) and "transcript" in obj:
-                return str(obj["transcript"])
-            return json.dumps(obj, ensure_ascii=False)
         except Exception:
-            return data.decode("utf-8", errors="ignore")
-    else:
         return data.decode("utf-8", errors="ignore")
 def truncate_tokens(tokenizer, text: str, max_tokens: int) -> str:
     toks = tokenizer(text, add_special_tokens=False)["input_ids"]
@@ -270,30 +284,47 @@ class ModelWrapper:
     @torch.inference_mode()
     def generate(self, system_prompt: str, user_prompt: str) -> str:
         if hasattr(self.tokenizer, "apply_chat_template"):
-            msgs = [{"role": "system", "content": system_prompt},
-                    {"role": "user", "content": user_prompt}]
-            inputs = self.tokenizer.apply_chat_template(
-                msgs, add_generation_prompt=True, return_tensors="pt"
-            ).to(self.model.device)
         else:
-            text = f"<s>[SYSTEM]\n{system_prompt}\n[/SYSTEM]\n[USER]\n{user_prompt}\n[/USER]\n"
-            inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
-        with torch.cuda.amp.autocast(enabled=(DEVICE == "cuda")):
-            out_ids = self.model.generate(
-                **inputs,
                 generation_config=GEN_CONFIG,
                 eos_token_id=self.tokenizer.eos_token_id,
                 pad_token_id=self.tokenizer.pad_token_id,
             )
         return self.tokenizer.decode(out_ids[0], skip_special_tokens=True)
 _MODEL_CACHE: Dict[str, ModelWrapper] = {}
 def get_model(repo_id: str, hf_token: Optional[str], load_in_4bit: bool) -> ModelWrapper:
     key = f"{repo_id}::{'4bit' if (load_in_4bit and DEVICE=='cuda') else 'full'}"
     if key not in _MODEL_CACHE:
-        m = ModelWrapper(repo_id, hf_token, load_in_4bit); m.load()
         _MODEL_CACHE[key] = m
     return _MODEL_CACHE[key]
@@ -303,8 +334,6 @@ def get_model(repo_id: str, hf_token: Optional[str], load_in_4bit: bool) -> Mode
 def evaluate_predictions(y_true: List[List[str]], y_pred: List[List[str]]) -> float:
     ALLOWED_LABELS = OFFICIAL_LABELS
     LABEL_TO_IDX = {label: idx for idx, label in enumerate(ALLOWED_LABELS)}
-    FN_PENALTY = 2.0
-    FP_PENALTY = 1.0
     def _process_sample_labels(sample_labels: List[str], sample_name: str) -> List[str]:
         if not isinstance(sample_labels, list):
@@ -315,13 +344,10 @@ def evaluate_predictions(y_true: List[List[str]], y_pred: List[List[str]]) -> fl
                 raise ValueError(f"{sample_name} contains non-string: {label} (type: {type(label)})")
             if label in seen:
                 raise ValueError(f"{sample_name} contains duplicate label: '{label}'")
-            seen.add(label); uniq.append(label)
-        valid = []
-        for label in uniq:
             if label not in ALLOWED_LABELS:
                 raise ValueError(f"{sample_name} contains invalid label: '{label}'. Allowed: {ALLOWED_LABELS}")
-            valid.append(label)
-        return valid
     if len(y_true) != len(y_pred):
         raise ValueError(f"y_true and y_pred must have same length. Got {len(y_true)} vs {len(y_pred)}")
@@ -339,13 +365,37 @@ def evaluate_predictions(y_true: List[List[str]], y_pred: List[List[str]]) -> fl
         for label in _process_sample_labels(sample_labels, f"y_pred[{i}]"):
             y_pred_binary[i, LABEL_TO_IDX[label]] = 1
-    fn = np.sum((y_true_binary == 1) & (y_pred_binary == 0), axis=1)
-    fp = np.sum((y_true_binary == 0) & (y_pred_binary == 1), axis=1)
     weighted = 2.0 * fn + 1.0 * fp
     max_err = 2.0 * np.sum(y_true_binary, axis=1) + 1.0 * (n_labels - np.sum(y_true_binary, axis=1))
     per_sample = np.where(max_err > 0, 1.0 - (weighted / max_err), 1.0)
     return float(max(0.0, min(1.0, np.mean(per_sample))))
 # =========================
 # Inference helpers
 # =========================
@@ -358,34 +408,44 @@ def build_keyword_context(allowed: List[str]) -> str:
 def run_single(
     transcript_text: str,
-    transcript_file: gr.File,
     use_cleaning: bool,
     allowed_labels_text: str,
     model_repo: str,
     use_4bit: bool,
     max_input_tokens: int,
     hf_token: str,
-) -> Tuple[str, str, str, str]:
     t0 = _now_ms()
-    raw_text = read_text_from_file(transcript_file) if transcript_file else (transcript_text or "")
-    raw_text = (raw_text or "").strip()
     if not raw_text:
-        return "", "", "No transcript provided.", json.dumps({"labels": [], "tasks": []}, indent=2)
     text = clean_transcript(raw_text) if use_cleaning else raw_text
     user_allowed = [ln.strip() for ln in (allowed_labels_text or "").splitlines() if ln.strip()]
     allowed = normalize_labels(user_allowed or OFFICIAL_LABELS)
     try:
         model = get_model(model_repo, (hf_token or "").strip() or None, use_4bit)
     except Exception as e:
-        return "", "", f"Model load failed: {e}", json.dumps({"labels": [], "tasks": []}, indent=2)
     trunc = truncate_tokens(model.tokenizer, text, max_input_tokens)
     allowed_list_str = "\n".join(f"- {l}" for l in allowed)
     keyword_ctx = build_keyword_context(allowed)
     user_prompt = USER_PROMPT_TEMPLATE.format(
@@ -394,25 +454,38 @@ def run_single(
         keyword_context=keyword_ctx,
     )
     t1 = _now_ms()
     try:
         out = model.generate(SYSTEM_PROMPT, user_prompt)
     except Exception as e:
-        return "", "", f"Generation error: {e}", json.dumps({"labels": [], "tasks": []}, indent=2)
     t2 = _now_ms()
     parsed = robust_json_extract(out)
     filtered = restrict_to_allowed(parsed, allowed)
     diag = "\n".join([
         f"Device: {DEVICE} (4-bit: {'Yes' if (use_4bit and DEVICE=='cuda') else 'No'})",
         f"Model: {model_repo}",
         f"Input cleaned: {'Yes' if use_cleaning else 'No'}",
         f"Tokens (input, approx): ≤ {max_input_tokens}",
         f"Latency: prep {t1-t0} ms, gen {t2-t1} ms, total {t2-t0} ms",
         f"Allowed labels: {', '.join(allowed)}",
     ])
     labs = filtered.get("labels", [])
     tasks = filtered.get("tasks", [])
     summary = "Detected labels:\n" + ("\n".join(f"- {l}" for l in labs) if labs else "(none)")
@@ -424,20 +497,59 @@ def run_single(
     else:
         summary += "\n\nTasks: (none)"
-    return summary, json.dumps(filtered, indent=2, ensure_ascii=False), diag, out.strip()
 # =========================
 # Batch mode (ZIP with transcripts + truths)
 # =========================
-def read_zip(fileobj: io.BytesIO, exdir: Path) -> List[Path]:
     exdir.mkdir(parents=True, exist_ok=True)
-    with zipfile.ZipFile(fileobj) as zf:
         zf.extractall(exdir)
     return [p for p in exdir.rglob("*") if p.is_file()]
 def run_batch(
-    zip_file: gr.File,
     use_cleaning: bool,
     model_repo: str,
     use_4bit: bool,
     max_input_tokens: int,
@@ -445,24 +557,20 @@ def run_batch(
     limit_files: int,
 ) -> Tuple[str, str, pd.DataFrame, str]:
-    if not zip_file:
         return ("No ZIP provided.", "", pd.DataFrame(), "")
     work = Path("/tmp/batch")
     if work.exists():
         for p in sorted(work.rglob("*"), reverse=True):
-            try:
-                p.unlink()
-            except Exception:
-                pass
-        try:
-            work.rmdir()
-        except Exception:
-            pass
     work.mkdir(parents=True, exist_ok=True)
-    data = zip_file.read()
-    files = read_zip(io.BytesIO(data), work)
     txts: Dict[str, Path] = {}
     gts: Dict[str, Path] = {}
@@ -508,6 +616,12 @@ def run_batch(
         parsed = robust_json_extract(out)
         filtered = restrict_to_allowed(parsed, allowed)
         pred_labels = filtered.get("labels", [])
         y_pred.append(pred_labels)
@@ -543,6 +657,7 @@ def run_batch(
         f"Device: {DEVICE} (4-bit: {'Yes' if (use_4bit and DEVICE=='cuda') else 'No'})",
         f"Model: {model_repo}",
         f"Input cleaned: {'Yes' if use_cleaning else 'No'}",
         f"Tokens (input, approx): ≤ {max_input_tokens}",
         f"Batch time: {_now_ms()-t_start} ms",
     ]
@@ -563,7 +678,6 @@ def run_batch(
     # save CSV for download
     out_csv = Path("/tmp/batch_results.csv")
     df.to_csv(out_csv, index=False, encoding="utf-8")
     return ("Batch done.", diag_str, df, str(out_csv))
 # =========================
@@ -585,16 +699,31 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     with gr.Tab("Single transcript"):
         with gr.Row():
             with gr.Column(scale=3):
                 file = gr.File(
                     label="Drag & drop transcript (.txt / .md / .json)",
                     file_types=[".txt", ".md", ".json"],
                     type="filepath",
                 )
-                text = gr.Textbox(label="Or paste transcript", lines=14)
                 use_cleaning = gr.Checkbox(
                     label="Apply default cleaning (remove disclaimers, timestamps, speakers, footers)",
                     value=True,
                 )
                 labels_text = gr.Textbox(
                     label="Allowed Labels (one per line; empty = official list)",
                     value="",
@@ -613,11 +742,17 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
         with gr.Row():
             diag = gr.Textbox(label="Diagnostics", lines=8)
             raw = gr.Textbox(label="Raw Model Output", lines=8)
         run_btn.click(
             fn=run_single,
-            inputs=[text, file, use_cleaning, labels_text, repo, use_4bit, max_tokens, hf_token],
-            outputs=[summary, json_out, diag, raw],
         )
     with gr.Tab("Batch evaluation"):
@@ -625,6 +760,7 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
             with gr.Column(scale=3):
                 zip_in = gr.File(label="ZIP with transcripts (.txt) and truths (.json)", file_types=[".zip"], type="filepath")
                 use_cleaning_b = gr.Checkbox(label="Apply default cleaning", value=True)
             with gr.Column(scale=2):
                 repo_b = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
                 use_4bit_b = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
@@ -636,15 +772,15 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
         with gr.Row():
             status = gr.Textbox(label="Status", lines=1)
             diag_b = gr.Textbox(label="Batch diagnostics & metrics", lines=12)
         df_out = gr.Dataframe(label="Per-file results (TP/FP/FN, latency)", interactive=False)
         csv_out = gr.File(label="Download CSV", interactive=False)
         run_batch_btn.click(
             fn=run_batch,
-            inputs=[zip_in, use_cleaning_b, repo_b, use_4bit_b, max_tokens_b, hf_token_b, limit_files],
             outputs=[status, diag_b, df_out, csv_out],
         )
 if __name__ == "__main__":
     demo.launch()

     s = re.sub(r"\n{3,}", "\n\n", s).strip()
     return s
+def read_text_file_any(file_input) -> str:
+    """Works for gr.File(type='filepath') and raw strings/Path and file-like."""
+    if not file_input:
         return ""
+    # filepath string
+    if isinstance(file_input, (str, Path)):
         try:
+            return Path(file_input).read_text(encoding="utf-8", errors="ignore")
         except Exception:
+            return ""
+    # gr.File object or file-like
+    try:
+        data = file_input.read()
         return data.decode("utf-8", errors="ignore")
+    except Exception:
+        return ""
+def read_json_file_any(file_input) -> Optional[dict]:
+    if not file_input:
+        return None
+    if isinstance(file_input, (str, Path)):
+        try:
+            return json.loads(Path(file_input).read_text(encoding="utf-8", errors="ignore"))
+        except Exception:
+            return None
+    try:
+        return json.loads(file_input.read().decode("utf-8", errors="ignore"))
+    except Exception:
+        return None
 def truncate_tokens(tokenizer, text: str, max_tokens: int) -> str:
     toks = tokenizer(text, add_special_tokens=False)["input_ids"]
     @torch.inference_mode()
     def generate(self, system_prompt: str, user_prompt: str) -> str:
+        # Build inputs as input_ids=... (avoid **tensor bug)
         if hasattr(self.tokenizer, "apply_chat_template"):
+            messages = [
+                {"role": "system", "content": system_prompt},
+                {"role": "user", "content": user_prompt},
+            ]
+            input_ids = self.tokenizer.apply_chat_template(
+                messages,
+                tokenize=True,
+                add_generation_prompt=True,
+                return_tensors="pt",
+            )
+            input_ids = input_ids.to(self.model.device)
+            gen_kwargs = dict(
+                input_ids=input_ids,
+                generation_config=GEN_CONFIG,
+                eos_token_id=self.tokenizer.eos_token_id,
+                pad_token_id=self.tokenizer.pad_token_id,
+            )
         else:
+            enc = self.tokenizer(
+                f"<s>[SYSTEM]\n{system_prompt}\n[/SYSTEM]\n[USER]\n{user_prompt}\n[/USER]\n",
+                return_tensors="pt"
+            ).to(self.model.device)
+            gen_kwargs = dict(
+                **enc,
                 generation_config=GEN_CONFIG,
                 eos_token_id=self.tokenizer.eos_token_id,
                 pad_token_id=self.tokenizer.pad_token_id,
             )
+        with torch.cuda.amp.autocast(enabled=(DEVICE == "cuda")):
+            out_ids = self.model.generate(**gen_kwargs)
         return self.tokenizer.decode(out_ids[0], skip_special_tokens=True)
 _MODEL_CACHE: Dict[str, ModelWrapper] = {}
 def get_model(repo_id: str, hf_token: Optional[str], load_in_4bit: bool) -> ModelWrapper:
     key = f"{repo_id}::{'4bit' if (load_in_4bit and DEVICE=='cuda') else 'full'}"
     if key not in _MODEL_CACHE:
+        m = ModelWrapper(repo_id, hf_token, load_in_4bit)
+        m.load()
         _MODEL_CACHE[key] = m
     return _MODEL_CACHE[key]
 def evaluate_predictions(y_true: List[List[str]], y_pred: List[List[str]]) -> float:
     ALLOWED_LABELS = OFFICIAL_LABELS
     LABEL_TO_IDX = {label: idx for idx, label in enumerate(ALLOWED_LABELS)}
     def _process_sample_labels(sample_labels: List[str], sample_name: str) -> List[str]:
         if not isinstance(sample_labels, list):
                 raise ValueError(f"{sample_name} contains non-string: {label} (type: {type(label)})")
             if label in seen:
                 raise ValueError(f"{sample_name} contains duplicate label: '{label}'")
             if label not in ALLOWED_LABELS:
                 raise ValueError(f"{sample_name} contains invalid label: '{label}'. Allowed: {ALLOWED_LABELS}")
+            seen.add(label); uniq.append(label)
+        return uniq
     if len(y_true) != len(y_pred):
         raise ValueError(f"y_true and y_pred must have same length. Got {len(y_true)} vs {len(y_pred)}")
         for label in _process_sample_labels(sample_labels, f"y_pred[{i}]"):
             y_pred_binary[i, LABEL_TO_IDX[label]] = 1
+    fn = np.sum((y_true_binary == 1) & (y_pred_binary == 0), axis=1)  # penalty 2x
+    fp = np.sum((y_true_binary == 0) & (y_pred_binary == 1), axis=1)  # penalty 1x
     weighted = 2.0 * fn + 1.0 * fp
     max_err = 2.0 * np.sum(y_true_binary, axis=1) + 1.0 * (n_labels - np.sum(y_true_binary, axis=1))
     per_sample = np.where(max_err > 0, 1.0 - (weighted / max_err), 1.0)
     return float(max(0.0, min(1.0, np.mean(per_sample))))
+# =========================
+# Fallback: keyword heuristics if model returns empty
+# =========================
+def keyword_fallback(text: str, allowed: List[str]) -> Dict[str, Any]:
+    low = text.lower()
+    labels = []
+    tasks = []
+    for lab in allowed:
+        hits = []
+        for kw in LABEL_KEYWORDS.get(lab, []):
+            if kw.lower() in low:
+                # capture small evidence window
+                i = low.find(kw.lower())
+                start = max(0, i - 40); end = min(len(text), i + len(kw) + 40)
+                hits.append(text[start:end].strip())
+        if hits:
+            labels.append(lab)
+            tasks.append({
+                "label": lab,
+                "explanation": "Keyword match in transcript.",
+                "evidence": hits[0]
+            })
+    return {"labels": normalize_labels(labels), "tasks": tasks}
 # =========================
 # Inference helpers
 # =========================
 def run_single(
     transcript_text: str,
+    transcript_file,            # filepath or file-like
+    gt_json_text: str,
+    gt_json_file,               # filepath or file-like
     use_cleaning: bool,
+    use_keyword_fallback: bool,
     allowed_labels_text: str,
     model_repo: str,
     use_4bit: bool,
     max_input_tokens: int,
     hf_token: str,
+) -> Tuple[str, str, str, str, str, str]:
     t0 = _now_ms()
+    # Transcript
+    raw_text = ""
+    if transcript_file:
+        raw_text = read_text_file_any(transcript_file)
+    raw_text = (raw_text or transcript_text or "").strip()
     if not raw_text:
+        return "", "", "No transcript provided.", "", "", ""
     text = clean_transcript(raw_text) if use_cleaning else raw_text
+    # Allowed labels
     user_allowed = [ln.strip() for ln in (allowed_labels_text or "").splitlines() if ln.strip()]
     allowed = normalize_labels(user_allowed or OFFICIAL_LABELS)
+    # Model
     try:
         model = get_model(model_repo, (hf_token or "").strip() or None, use_4bit)
     except Exception as e:
+        return "", "", f"Model load failed: {e}", "", "", ""
+    # Truncate
     trunc = truncate_tokens(model.tokenizer, text, max_input_tokens)
+    # Build prompt
     allowed_list_str = "\n".join(f"- {l}" for l in allowed)
     keyword_ctx = build_keyword_context(allowed)
     user_prompt = USER_PROMPT_TEMPLATE.format(
         keyword_context=keyword_ctx,
     )
+    # Generate
     t1 = _now_ms()
     try:
         out = model.generate(SYSTEM_PROMPT, user_prompt)
     except Exception as e:
+        return "", "", f"Generation error: {e}", "", "", ""
     t2 = _now_ms()
     parsed = robust_json_extract(out)
     filtered = restrict_to_allowed(parsed, allowed)
+    # Fallback if empty
+    if use_keyword_fallback and not filtered.get("labels"):
+        fb = keyword_fallback(trunc, allowed)
+        if fb["labels"]:
+            filtered = fb
+    # Diagnostics
     diag = "\n".join([
         f"Device: {DEVICE} (4-bit: {'Yes' if (use_4bit and DEVICE=='cuda') else 'No'})",
         f"Model: {model_repo}",
         f"Input cleaned: {'Yes' if use_cleaning else 'No'}",
+        f"Keyword fallback: {'Yes' if use_keyword_fallback else 'No'}",
         f"Tokens (input, approx): ≤ {max_input_tokens}",
         f"Latency: prep {t1-t0} ms, gen {t2-t1} ms, total {t2-t0} ms",
         f"Allowed labels: {', '.join(allowed)}",
     ])
+    # Context preview shown in UI
+    context_preview = "Allowed Labels:\n" + "\n".join(f"- {l}" for l in allowed) + "\n\nKeyword cues:\n" + keyword_ctx
+    # Summary
     labs = filtered.get("labels", [])
     tasks = filtered.get("tasks", [])
     summary = "Detected labels:\n" + ("\n".join(f"- {l}" for l in labs) if labs else "(none)")
     else:
         summary += "\n\nTasks: (none)"
+    json_out = json.dumps(filtered, indent=2, ensure_ascii=False)
+    # Optional single-file scoring if GT provided
+    metrics = ""
+    true_labels = None
+    if gt_json_file or (gt_json_text and gt_json_text.strip()):
+        truth_obj = None
+        if gt_json_file:
+            truth_obj = read_json_file_any(gt_json_file)
+        if (not truth_obj) and gt_json_text:
+            try:
+                truth_obj = json.loads(gt_json_text)
+            except Exception:
+                pass
+        if isinstance(truth_obj, dict) and isinstance(truth_obj.get("labels"), list):
+            true_labels = [x for x in truth_obj["labels"] if x in OFFICIAL_LABELS]
+            pred_labels = labs
+            try:
+                score = evaluate_predictions([true_labels], [pred_labels])
+                tp = len(set(true_labels) & set(pred_labels))
+                fp = len(set(pred_labels) - set(true_labels))
+                fn = len(set(true_labels) - set(pred_labels))
+                recall = tp / (tp + fn) if (tp + fn) else 1.0
+                precision = tp / (tp + fp) if (tp + fp) else 1.0
+                f1 = (2 * precision * recall / (precision + recall)) if (precision + recall) else 1.0
+                metrics = (
+                    f"Weighted score: {score:.3f}\n"
+                    f"Recall: {recall:.3f} | Precision: {precision:.3f} | F1: {f1:.3f}\n"
+                    f"TP={tp} FP={fp} FN={fn}\n"
+                    f"Truth: {', '.join(true_labels)}"
+                )
+            except Exception as e:
+                metrics = f"Scoring error: {e}"
+        else:
+            metrics = "Ground truth JSON missing or invalid; expected {'labels': [...]}."
+    return summary, json_out, diag, out.strip(), context_preview, metrics
 # =========================
 # Batch mode (ZIP with transcripts + truths)
 # =========================
+def read_zip_from_path(path: str, exdir: Path) -> List[Path]:
     exdir.mkdir(parents=True, exist_ok=True)
+    with open(path, "rb") as f:
+        data = f.read()
+    with zipfile.ZipFile(io.BytesIO(data)) as zf:
         zf.extractall(exdir)
     return [p for p in exdir.rglob("*") if p.is_file()]
 def run_batch(
+    zip_path,                # filepath string
     use_cleaning: bool,
+    use_keyword_fallback: bool,
     model_repo: str,
     use_4bit: bool,
     max_input_tokens: int,
     limit_files: int,
 ) -> Tuple[str, str, pd.DataFrame, str]:
+    if not zip_path:
         return ("No ZIP provided.", "", pd.DataFrame(), "")
     work = Path("/tmp/batch")
     if work.exists():
         for p in sorted(work.rglob("*"), reverse=True):
+            try: p.unlink()
+            except Exception: pass
+        try: work.rmdir()
+        except Exception: pass
     work.mkdir(parents=True, exist_ok=True)
+    # Unzip
+    files = read_zip_from_path(zip_path, work)
     txts: Dict[str, Path] = {}
     gts: Dict[str, Path] = {}
         parsed = robust_json_extract(out)
         filtered = restrict_to_allowed(parsed, allowed)
+        if use_keyword_fallback and not filtered.get("labels"):
+            fb = keyword_fallback(trunc, allowed)
+            if fb["labels"]:
+                filtered = fb
         pred_labels = filtered.get("labels", [])
         y_pred.append(pred_labels)
         f"Device: {DEVICE} (4-bit: {'Yes' if (use_4bit and DEVICE=='cuda') else 'No'})",
         f"Model: {model_repo}",
         f"Input cleaned: {'Yes' if use_cleaning else 'No'}",
+        f"Keyword fallback: {'Yes' if use_keyword_fallback else 'No'}",
         f"Tokens (input, approx): ≤ {max_input_tokens}",
         f"Batch time: {_now_ms()-t_start} ms",
     ]
     # save CSV for download
     out_csv = Path("/tmp/batch_results.csv")
     df.to_csv(out_csv, index=False, encoding="utf-8")
     return ("Batch done.", diag_str, df, str(out_csv))
 # =========================
     with gr.Tab("Single transcript"):
         with gr.Row():
             with gr.Column(scale=3):
+                gr.Markdown("### Transcript")
                 file = gr.File(
                     label="Drag & drop transcript (.txt / .md / .json)",
                     file_types=[".txt", ".md", ".json"],
                     type="filepath",
                 )
+                text = gr.Textbox(label="Or paste transcript", lines=10)
+                gr.Markdown("### Ground truth JSON (optional)")
+                gt_file = gr.File(
+                    label="Upload ground truth JSON (expects {'labels': [...]})",
+                    file_types=[".json"],
+                    type="filepath",
+                )
+                gt_text = gr.Textbox(label="Or paste ground truth JSON", lines=6, placeholder='{"labels": ["schedule_meeting"]}')
                 use_cleaning = gr.Checkbox(
                     label="Apply default cleaning (remove disclaimers, timestamps, speakers, footers)",
                     value=True,
                 )
+                use_keyword_fallback = gr.Checkbox(
+                    label="Keyword fallback if model returns empty",
+                    value=True,
+                )
                 labels_text = gr.Textbox(
                     label="Allowed Labels (one per line; empty = official list)",
                     value="",
         with gr.Row():
             diag = gr.Textbox(label="Diagnostics", lines=8)
             raw = gr.Textbox(label="Raw Model Output", lines=8)
+        with gr.Row():
+            context_used = gr.Code(label="Effective context used this run (labels + keyword cues)", language="markdown")
+            single_metrics = gr.Textbox(label="Single-file metrics (if ground truth provided)", lines=6)
         run_btn.click(
             fn=run_single,
+            inputs=[
+                text, file, gt_text, gt_file, use_cleaning, use_keyword_fallback,
+                labels_text, repo, use_4bit, max_tokens, hf_token
+            ],
+            outputs=[summary, json_out, diag, raw, context_used, single_metrics],
         )
     with gr.Tab("Batch evaluation"):
             with gr.Column(scale=3):
                 zip_in = gr.File(label="ZIP with transcripts (.txt) and truths (.json)", file_types=[".zip"], type="filepath")
                 use_cleaning_b = gr.Checkbox(label="Apply default cleaning", value=True)
+                use_keyword_fallback_b = gr.Checkbox(label="Keyword fallback if model returns empty", value=True)
             with gr.Column(scale=2):
                 repo_b = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
                 use_4bit_b = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
         with gr.Row():
             status = gr.Textbox(label="Status", lines=1)
             diag_b = gr.Textbox(label="Batch diagnostics & metrics", lines=12)
         df_out = gr.Dataframe(label="Per-file results (TP/FP/FN, latency)", interactive=False)
         csv_out = gr.File(label="Download CSV", interactive=False)
         run_batch_btn.click(
             fn=run_batch,
+            inputs=[zip_in, use_cleaning_b, use_keyword_fallback_b, repo_b, use_4bit_b, max_tokens_b, hf_token_b, limit_files],
             outputs=[status, diag_b, df_out, csv_out],
         )
 if __name__ == "__main__":
+    demo = demo  # to satisfy some runtimes
     demo.launch()