Spaces:

RishiRP
/

Talk2TaskDemo1

Sleeping

App Files Files Community

RishiRP commited on Sep 25

Commit

954d97c

verified ·

1 Parent(s): db991b8

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -41

app.py CHANGED Viewed

@@ -45,6 +45,7 @@ OFFICIAL_LABELS = [
     "update_kyc_purpose_of_businessrelation",
     "update_kyc_total_assets",
 ]
 # Per-label keyword cues (static prompt context to improve recall)
 LABEL_KEYWORDS: Dict[str, List[str]] = {
@@ -213,13 +214,11 @@ def read_text_file_any(file_input) -> str:
     """Works for gr.File(type='filepath') and raw strings/Path and file-like."""
     if not file_input:
         return ""
-    # filepath string
     if isinstance(file_input, (str, Path)):
         try:
             return Path(file_input).read_text(encoding="utf-8", errors="ignore")
         except Exception:
             return ""
-    # gr.File object or file-like
     try:
         data = file_input.read()
         return data.decode("utf-8", errors="ignore")
@@ -284,7 +283,7 @@ class ModelWrapper:
     @torch.inference_mode()
     def generate(self, system_prompt: str, user_prompt: str) -> str:
-        # Build inputs as input_ids=... (avoid **tensor bug)
         if hasattr(self.tokenizer, "apply_chat_template"):
             messages = [
                 {"role": "system", "content": system_prompt},
@@ -382,10 +381,11 @@ def keyword_fallback(text: str, allowed: List[str]) -> Dict[str, Any]:
     for lab in allowed:
         hits = []
         for kw in LABEL_KEYWORDS.get(lab, []):
-            if kw.lower() in low:
                 # capture small evidence window
-                i = low.find(kw.lower())
-                start = max(0, i - 40); end = min(len(text), i + len(kw) + 40)
                 hits.append(text[start:end].strip())
         if hits:
             labels.append(lab)
@@ -418,7 +418,7 @@ def run_single(
     use_4bit: bool,
     max_input_tokens: int,
     hf_token: str,
-) -> Tuple[str, str, str, str, str, str]:
     t0 = _now_ms()
@@ -428,11 +428,11 @@ def run_single(
         raw_text = read_text_file_any(transcript_file)
     raw_text = (raw_text or transcript_text or "").strip()
     if not raw_text:
-        return "", "", "No transcript provided.", "", "", ""
     text = clean_transcript(raw_text) if use_cleaning else raw_text
-    # Allowed labels
     user_allowed = [ln.strip() for ln in (allowed_labels_text or "").splitlines() if ln.strip()]
     allowed = normalize_labels(user_allowed or OFFICIAL_LABELS)
@@ -440,7 +440,7 @@ def run_single(
     try:
         model = get_model(model_repo, (hf_token or "").strip() or None, use_4bit)
     except Exception as e:
-        return "", "", f"Model load failed: {e}", "", "", ""
     # Truncate
     trunc = truncate_tokens(model.tokenizer, text, max_input_tokens)
@@ -459,7 +459,7 @@ def run_single(
     try:
         out = model.generate(SYSTEM_PROMPT, user_prompt)
     except Exception as e:
-        return "", "", f"Generation error: {e}", "", "", ""
     t2 = _now_ms()
     parsed = robust_json_extract(out)
@@ -482,10 +482,16 @@ def run_single(
         f"Allowed labels: {', '.join(allowed)}",
     ])
-    # Context preview shown in UI
-    context_preview = "Allowed Labels:\n" + "\n".join(f"- {l}" for l in allowed) + "\n\nKeyword cues:\n" + keyword_ctx
-    # Summary
     labs = filtered.get("labels", [])
     tasks = filtered.get("tasks", [])
     summary = "Detected labels:\n" + ("\n".join(f"- {l}" for l in labs) if labs else "(none)")
@@ -496,7 +502,6 @@ def run_single(
         )
     else:
         summary += "\n\nTasks: (none)"
     json_out = json.dumps(filtered, indent=2, ensure_ascii=False)
     # Optional single-file scoring if GT provided
@@ -533,7 +538,7 @@ def run_single(
         else:
             metrics = "Ground truth JSON missing or invalid; expected {'labels': [...]}."
-    return summary, json_out, diag, out.strip(), context_preview, metrics
 # =========================
 # Batch mode (ZIP with transcripts + truths)
@@ -569,7 +574,6 @@ def run_batch(
         except Exception: pass
     work.mkdir(parents=True, exist_ok=True)
-    # Unzip
     files = read_zip_from_path(zip_path, work)
     txts: Dict[str, Path] = {}
@@ -642,7 +646,7 @@ def run_batch(
         rows.append({
             "file": stem,
-            "true_labels": ", ".join(gt_labels),
             "pred_labels": ", ".join(pred_labels),
             "TP": len(tp), "FP": len(fp), "FN": len(fn),
             "gen_ms": t1 - t0
@@ -689,32 +693,43 @@ MODEL_CHOICES = [
     "mistralai/Mistral-7B-Instruct-v0.3",
 ]
-with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
-    gr.Markdown("# Talk2Task — Task Extraction (UBS Challenge)")
-    gr.Markdown(
-        "Extract challenge labels from transcripts. False negatives are penalised 2× more than false positives "
-        "in the official score, so the app biases for recall."
-    )
     with gr.Tab("Single transcript"):
         with gr.Row():
             with gr.Column(scale=3):
-                gr.Markdown("### Transcript")
                 file = gr.File(
                     label="Drag & drop transcript (.txt / .md / .json)",
                     file_types=[".txt", ".md", ".json"],
                     type="filepath",
                 )
                 text = gr.Textbox(label="Or paste transcript", lines=10)
-                gr.Markdown("### Ground truth JSON (optional)")
                 gt_file = gr.File(
                     label="Upload ground truth JSON (expects {'labels': [...]})",
                     file_types=[".json"],
                     type="filepath",
                 )
-                gt_text = gr.Textbox(label="Or paste ground truth JSON", lines=6, placeholder='{"labels": ["schedule_meeting"]}')
                 use_cleaning = gr.Checkbox(
                     label="Apply default cleaning (remove disclaimers, timestamps, speakers, footers)",
                     value=True,
@@ -723,28 +738,51 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
                     label="Keyword fallback if model returns empty",
                     value=True,
                 )
                 labels_text = gr.Textbox(
-                    label="Allowed Labels (one per line; empty = official list)",
-                    value="",
                     lines=8,
                 )
             with gr.Column(scale=2):
                 repo = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
                 use_4bit = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
                 max_tokens = gr.Slider(label="Max input tokens", minimum=1024, maximum=8192, step=512, value=4096)
                 hf_token = gr.Textbox(label="HF_TOKEN (only for gated models)", type="password", value=os.environ.get("HF_TOKEN",""))
                 run_btn = gr.Button("Run Extraction", variant="primary")
         with gr.Row():
-            summary = gr.Textbox(label="Summary", lines=12)
-            json_out = gr.Code(label="Strict JSON Output", language="json")
-        with gr.Row():
-            diag = gr.Textbox(label="Diagnostics", lines=8)
-            raw = gr.Textbox(label="Raw Model Output", lines=8)
-        with gr.Row():
-            context_used = gr.Code(label="Effective context used this run (labels + keyword cues)", language="markdown")
-            single_metrics = gr.Textbox(label="Single-file metrics (if ground truth provided)", lines=6)
         run_btn.click(
             fn=run_single,
@@ -752,28 +790,38 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
                 text, file, gt_text, gt_file, use_cleaning, use_keyword_fallback,
                 labels_text, repo, use_4bit, max_tokens, hf_token
             ],
-            outputs=[summary, json_out, diag, raw, context_used, single_metrics],
         )
     with gr.Tab("Batch evaluation"):
         with gr.Row():
             with gr.Column(scale=3):
                 zip_in = gr.File(label="ZIP with transcripts (.txt) and truths (.json)", file_types=[".zip"], type="filepath")
                 use_cleaning_b = gr.Checkbox(label="Apply default cleaning", value=True)
                 use_keyword_fallback_b = gr.Checkbox(label="Keyword fallback if model returns empty", value=True)
             with gr.Column(scale=2):
                 repo_b = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
                 use_4bit_b = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
                 max_tokens_b = gr.Slider(label="Max input tokens", minimum=1024, maximum=8192, step=512, value=4096)
                 hf_token_b = gr.Textbox(label="HF_TOKEN (only for gated models)", type="password", value=os.environ.get("HF_TOKEN",""))
                 limit_files = gr.Slider(label="Process at most N files (0 = all)", minimum=0, maximum=2000, step=10, value=0)
                 run_batch_btn = gr.Button("Run Batch", variant="primary")
         with gr.Row():
             status = gr.Textbox(label="Status", lines=1)
             diag_b = gr.Textbox(label="Batch diagnostics & metrics", lines=12)
-        df_out = gr.Dataframe(label="Per-file results (TP/FP/FN, latency)", interactive=False)
-        csv_out = gr.File(label="Download CSV", interactive=False)
         run_batch_btn.click(
             fn=run_batch,
@@ -782,5 +830,4 @@ with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
         )
 if __name__ == "__main__":
-    demo = demo  # to satisfy some runtimes
     demo.launch()

     "update_kyc_purpose_of_businessrelation",
     "update_kyc_total_assets",
 ]
+OFFICIAL_LABELS_TEXT = "\n".join(OFFICIAL_LABELS)
 # Per-label keyword cues (static prompt context to improve recall)
 LABEL_KEYWORDS: Dict[str, List[str]] = {
     """Works for gr.File(type='filepath') and raw strings/Path and file-like."""
     if not file_input:
         return ""
     if isinstance(file_input, (str, Path)):
         try:
             return Path(file_input).read_text(encoding="utf-8", errors="ignore")
         except Exception:
             return ""
     try:
         data = file_input.read()
         return data.decode("utf-8", errors="ignore")
     @torch.inference_mode()
     def generate(self, system_prompt: str, user_prompt: str) -> str:
+        # Build inputs as input_ids=... (avoid **tensor bug from earlier)
         if hasattr(self.tokenizer, "apply_chat_template"):
             messages = [
                 {"role": "system", "content": system_prompt},
     for lab in allowed:
         hits = []
         for kw in LABEL_KEYWORDS.get(lab, []):
+            k = kw.lower()
+            if k in low:
                 # capture small evidence window
+                i = low.find(k)
+                start = max(0, i - 40); end = min(len(text), i + len(k) + 40)
                 hits.append(text[start:end].strip())
         if hits:
             labels.append(lab)
     use_4bit: bool,
     max_input_tokens: int,
     hf_token: str,
+) -> Tuple[str, str, str, str, str, str, str]:
     t0 = _now_ms()
         raw_text = read_text_file_any(transcript_file)
     raw_text = (raw_text or transcript_text or "").strip()
     if not raw_text:
+        return "", "", "No transcript provided.", "", "", "", ""
     text = clean_transcript(raw_text) if use_cleaning else raw_text
+    # Allowed labels (pre-filled defaults)
     user_allowed = [ln.strip() for ln in (allowed_labels_text or "").splitlines() if ln.strip()]
     allowed = normalize_labels(user_allowed or OFFICIAL_LABELS)
     try:
         model = get_model(model_repo, (hf_token or "").strip() or None, use_4bit)
     except Exception as e:
+        return "", "", f"Model load failed: {e}", "", "", "", ""
     # Truncate
     trunc = truncate_tokens(model.tokenizer, text, max_input_tokens)
     try:
         out = model.generate(SYSTEM_PROMPT, user_prompt)
     except Exception as e:
+        return "", "", f"Generation error: {e}", "", "", "", ""
     t2 = _now_ms()
     parsed = robust_json_extract(out)
         f"Allowed labels: {', '.join(allowed)}",
     ])
+    # Context & instructions preview shown in UI
+    context_preview = (
+        "### Allowed Labels\n"
+        + "\n".join(f"- {l}" for l in allowed)
+        + "\n\n### Keyword cues per label\n"
+        + keyword_ctx
+    )
+    instructions_preview = "```\n" + SYSTEM_PROMPT + "\n```"
+    # Summary & JSON
     labs = filtered.get("labels", [])
     tasks = filtered.get("tasks", [])
     summary = "Detected labels:\n" + ("\n".join(f"- {l}" for l in labs) if labs else "(none)")
         )
     else:
         summary += "\n\nTasks: (none)"
     json_out = json.dumps(filtered, indent=2, ensure_ascii=False)
     # Optional single-file scoring if GT provided
         else:
             metrics = "Ground truth JSON missing or invalid; expected {'labels': [...]}."
+    return summary, json_out, diag, out.strip(), context_preview, instructions_preview, metrics
 # =========================
 # Batch mode (ZIP with transcripts + truths)
         except Exception: pass
     work.mkdir(parents=True, exist_ok=True)
     files = read_zip_from_path(zip_path, work)
     txts: Dict[str, Path] = {}
         rows.append({
             "file": stem,
+            "true_labels": ", "..join(gt_labels),
             "pred_labels": ", ".join(pred_labels),
             "TP": len(tp), "FP": len(fp), "FN": len(fn),
             "gen_ms": t1 - t0
     "mistralai/Mistral-7B-Instruct-v0.3",
 ]
+custom_css = """
+:root { --radius: 14px; }
+.gradio-container { font-family: Inter, ui-sans-serif, system-ui; }
+.card { border: 1px solid rgba(255,255,255,.08); border-radius: var(--radius); padding: 14px 16px; background: rgba(255,255,255,.02); box-shadow: 0 1px 10px rgba(0,0,0,.12) inset; }
+.header { font-weight: 700; font-size: 22px; margin-bottom: 4px; }
+.subtle { color: rgba(255,255,255,.65); font-size: 14px; margin-bottom: 12px; }
+hr.sep { border: none; border-top: 1px solid rgba(255,255,255,.08); margin: 10px 0 16px; }
+.accordion-title { font-weight: 600; }
+.gr-button { border-radius: 12px !important; }
+"""
+with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, fill_height=True) as demo:
+    gr.Markdown("<div class='header'>Talk2Task — Task Extraction (UBS Challenge)</div>")
+    gr.Markdown("<div class='subtle'>False negatives are penalised 2× more than false positives in the official score. This UI biases for recall, shows the exact instructions & context, and supports single or batch evaluation.</div>")
     with gr.Tab("Single transcript"):
         with gr.Row():
             with gr.Column(scale=3):
+                gr.Markdown("<div class='card'><div class='header'>Transcript</div>", elem_id="card1")
                 file = gr.File(
                     label="Drag & drop transcript (.txt / .md / .json)",
                     file_types=[".txt", ".md", ".json"],
                     type="filepath",
                 )
                 text = gr.Textbox(label="Or paste transcript", lines=10)
+                gr.Markdown("<hr class='sep'/>")
+                gr.Markdown("<div class='header'>Ground truth JSON (optional)</div>", elem_id="card1b")
                 gt_file = gr.File(
                     label="Upload ground truth JSON (expects {'labels': [...]})",
                     file_types=[".json"],
                     type="filepath",
                 )
+                gt_text = gr.Textbox(label="Or paste ground truth JSON", lines=6, placeholder='{\"labels\": [\"schedule_meeting\"]}')
+                gr.Markdown("</div>")  # close card
+                gr.Markdown("<div class='card'><div class='header'>Preprocessing & heuristics</div>", elem_id="card2")
                 use_cleaning = gr.Checkbox(
                     label="Apply default cleaning (remove disclaimers, timestamps, speakers, footers)",
                     value=True,
                     label="Keyword fallback if model returns empty",
                     value=True,
                 )
+                gr.Markdown("</div>")
+                gr.Markdown("<div class='card'><div class='header'>Allowed labels</div>", elem_id="card3")
                 labels_text = gr.Textbox(
+                    label="Allowed Labels (one per line)",
+                    value=OFFICIAL_LABELS_TEXT,  # prefilled
                     lines=8,
                 )
+                reset_btn = gr.Button("Reset to official labels")
+                gr.Markdown("</div>")
             with gr.Column(scale=2):
+                gr.Markdown("<div class='card'><div class='header'>Model & run</div>", elem_id="card4")
                 repo = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
                 use_4bit = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
                 max_tokens = gr.Slider(label="Max input tokens", minimum=1024, maximum=8192, step=512, value=4096)
                 hf_token = gr.Textbox(label="HF_TOKEN (only for gated models)", type="password", value=os.environ.get("HF_TOKEN",""))
                 run_btn = gr.Button("Run Extraction", variant="primary")
+                gr.Markdown("</div>")
+                gr.Markdown("<div class='card'><div class='header'>Outputs</div>", elem_id="card5")
+                summary = gr.Textbox(label="Summary", lines=12)
+                json_out = gr.Code(label="Strict JSON Output", language="json")
+                diag = gr.Textbox(label="Diagnostics", lines=8)
+                raw = gr.Textbox(label="Raw Model Output", lines=8)
+                gr.Markdown("</div>")
         with gr.Row():
+            with gr.Column():
+                with gr.Accordion("Instructions used (system prompt)", open=False):
+                    instr_md = gr.Markdown("")
+            with gr.Column():
+                with gr.Accordion("Context used (allowed labels + keyword cues)", open=True):
+                    context_md = gr.Markdown("")
+        # reset button behavior
+        def _reset_labels():
+            return OFFICIAL_LABELS_TEXT
+        reset_btn.click(fn=_reset_labels, inputs=None, outputs=labels_text)
+        # single run
+        def _pack_context_md(allowed: str) -> str:
+            allowed_list = [ln.strip() for ln in (allowed or OFFICIAL_LABELS_TEXT).splitlines() if ln.strip()]
+            ctx = build_keyword_context(allowed_list)
+            return "### Allowed Labels\n" + "\n".join(f"- {l}" for l in allowed_list) + "\n\n### Keyword cues per label\n" + ctx
         run_btn.click(
             fn=run_single,
                 text, file, gt_text, gt_file, use_cleaning, use_keyword_fallback,
                 labels_text, repo, use_4bit, max_tokens, hf_token
             ],
+            outputs=[summary, json_out, diag, raw, context_md, instr_md, gr.Textbox(visible=False)],
         )
+        # also keep instructions visible at initial load
+        instr_md.value = "```\n" + SYSTEM_PROMPT + "\n```"
+        context_md.value = _pack_context_md(OFFICIAL_LABELS_TEXT)
     with gr.Tab("Batch evaluation"):
         with gr.Row():
             with gr.Column(scale=3):
+                gr.Markdown("<div class='card'><div class='header'>ZIP input</div>", elem_id="card6")
                 zip_in = gr.File(label="ZIP with transcripts (.txt) and truths (.json)", file_types=[".zip"], type="filepath")
                 use_cleaning_b = gr.Checkbox(label="Apply default cleaning", value=True)
                 use_keyword_fallback_b = gr.Checkbox(label="Keyword fallback if model returns empty", value=True)
+                gr.Markdown("</div>")
             with gr.Column(scale=2):
+                gr.Markdown("<div class='card'><div class='header'>Model & run</div>", elem_id="card7")
                 repo_b = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
                 use_4bit_b = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
                 max_tokens_b = gr.Slider(label="Max input tokens", minimum=1024, maximum=8192, step=512, value=4096)
                 hf_token_b = gr.Textbox(label="HF_TOKEN (only for gated models)", type="password", value=os.environ.get("HF_TOKEN",""))
                 limit_files = gr.Slider(label="Process at most N files (0 = all)", minimum=0, maximum=2000, step=10, value=0)
                 run_batch_btn = gr.Button("Run Batch", variant="primary")
+                gr.Markdown("</div>")
         with gr.Row():
+            gr.Markdown("<div class='card'><div class='header'>Batch outputs</div>", elem_id="card8")
             status = gr.Textbox(label="Status", lines=1)
             diag_b = gr.Textbox(label="Batch diagnostics & metrics", lines=12)
+            df_out = gr.Dataframe(label="Per-file results (TP/FP/FN, latency)", interactive=False)
+            csv_out = gr.File(label="Download CSV", interactive=False)
+            gr.Markdown("</div>")
         run_batch_btn.click(
             fn=run_batch,
         )
 if __name__ == "__main__":
     demo.launch()