Spaces:

RishiRP
/

Talk2TaskDemo1

Sleeping

App Files Files Community

RishiRP commited on Sep 23

Commit

38169c5

verified ·

1 Parent(s): 34720ab

Update app.py

Browse files

Files changed (1) hide show

app.py +159 -178

app.py CHANGED Viewed

@@ -1,17 +1,15 @@
 import os, io, re, sys, time, json, zipfile, statistics
 from pathlib import Path
-from typing import List, Dict, Tuple, Union, Optional
 import gradio as gr
 import pandas as pd
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
-# --- ZeroGPU support ---------------------------------------------------------
-# If the 'spaces' package is available (on Spaces), we use @spaces.GPU.
-# Locally / on CPU hardware, we create a no-op decorator so the code still runs.
 try:
-    import spaces  # provided in HF Spaces runtime
 except Exception:
     class _DummySpaces:
         def GPU(self, *args, **kwargs):
@@ -19,16 +17,22 @@ except Exception:
             return deco
     spaces = _DummySpaces()
-# --- Auth token for gated models --------------------------------------------
 HF_TOKEN = (
     os.getenv("HF_TOKEN")
     or os.getenv("HUGGINGFACE_HUB_TOKEN")
     or os.getenv("HUGGINGFACEHUB_API_TOKEN")
 )
-# =======================
-#  Label Set / Scoring
-# =======================
 ALLOWED_LABELS = [
     "plan_contact",
     "schedule_meeting",
@@ -39,7 +43,7 @@ ALLOWED_LABELS = [
     "update_kyc_purpose_of_businessrelation",
     "update_kyc_total_assets",
 ]
-LABEL_TO_IDX = {l:i for i,l in enumerate(ALLOWED_LABELS)}
 FN_PENALTY = 2.0
 FP_PENALTY = 1.0
@@ -48,7 +52,7 @@ def safe_json_load(s: str):
         return json.loads(s)
     except Exception:
         pass
-    m = re.search(r'\{.*\}', s, re.S)
     if m:
         try:
             return json.loads(m.group(0))
@@ -62,29 +66,29 @@ def _coerce_labels_list(x):
         for it in x:
             if isinstance(it, str): out.append(it)
             elif isinstance(it, dict):
-                for k in ("label","value","task","category","name"):
                     v = it.get(k)
                     if isinstance(v, str):
                         out.append(v); break
                 else:
                     if isinstance(it.get("labels"), list):
                         out += [s for s in it["labels"] if isinstance(s, str)]
-        seen=set(); norm=[]
         for s in out:
             if s not in seen:
                 norm.append(s); seen.add(s)
         return norm
     if isinstance(x, dict):
-        for k in ("expected_labels","labels","targets","y_true"):
             if k in x: return _coerce_labels_list(x[k])
         if "one_hot" in x and isinstance(x["one_hot"], dict):
-            return [k for k,v in x["one_hot"].items() if v]
     return []
 def classic_metrics(pred_labels, exp_labels):
-    pred_labels = [str(x) for x in (pred_labels or []) if isinstance(x, (str,int,float,bool))]
-    exp_labels  = [str(x) for x in (exp_labels  or []) if isinstance(x, (str,int,float,bool))]
-    pred = set(pred_labels); gold = set(exp_labels)
     if not pred and not gold:
         return True, 1.0, 1.0, 1.0, 1.0
     inter = pred & gold; union = pred | gold
@@ -108,9 +112,7 @@ def ubs_score_one(true_labels, pred_labels) -> float:
     score = 1.0 if max_err == 0 else (1.0 - (weighted / max_err))
     return float(max(0.0, min(1.0, score)))
-# =======================
-#  Lightweight Preprocess
-# =======================
 EMAIL_RX   = re.compile(r'\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b', re.I)
 TIME_RX    = re.compile(r'\b(\d{1,2}:\d{2}\b|\b\d{1,2}\s?(am|pm)\b|\bafternoon\b|\bmorning\b|\bevening\b)', re.I)
 DATE_RX    = re.compile(r'\b(jan|feb|mar|apr|may|jun|jul|aug|sep|sept|oct|nov|dec)\b|\b\d{1,2}[/-]\d{1,2}([/-]\d{2,4})?\b|\b20\d{2}\b', re.I)
@@ -201,11 +203,9 @@ def shrink_to_token_cap_by_lines(text: str, soft_cap_tokens: int, tokenizer,
     ids = tokenizer(text, return_tensors=None, add_special_tokens=False).input_ids
     est = len(ids)
     threshold = int(soft_cap_tokens * apply_only_if_ratio)
-    if est <= threshold:
-        return text
     parts = text.splitlines()
-    if len(parts) <= min_lines_keep:
-        return text
     keep_flags=[]
     for ln in parts:
@@ -230,15 +230,13 @@ def shrink_to_token_cap_by_lines(text: str, soft_cap_tokens: int, tokenizer,
         candidate2_tokens = len(tokenizer(candidate2, return_tensors=None, add_special_tokens=False).input_ids)
         candidate = candidate if cand_tokens <= candidate2_tokens else candidate2
-    if len(candidate.splitlines()) < min_lines_keep:
-        return text
     return candidate
 def enforce_rules(labels, transcript_text):
     labels = set(labels or [])
     if (TIME_RX.search(transcript_text) or DATE_RX.search(transcript_text)) and MEET_RX.search(transcript_text):
-        labels.add("schedule_meeting")
-        labels.discard("plan_contact")
     if EMAIL_RX.search(transcript_text) and re.search(r'\b(update|new|set|change|confirm(ed)?|for all communication)\b', transcript_text, re.I):
         labels.add("update_contact_info_non_postal")
     kyc_rx = re.compile(r'\b(kyc|aml|compliance|employer|occupation|purpose of (relationship|account)|source of (wealth|funds)|net worth|total assets)\b', re.I)
@@ -246,9 +244,7 @@ def enforce_rules(labels, transcript_text):
         labels.discard("update_kyc_activity")
     return sorted(labels)
-# =======================
-#  HF Model Wrapper
-# =======================
 class HFModel:
     def __init__(self, repo_id: str, load_4bit: bool, dtype: str, trust_remote_code: bool):
         self.repo_id = repo_id
@@ -260,19 +256,16 @@ class HFModel:
         self.model = None
         if load_4bit:
             try:
-                quant = BitsAndBytesConfig(
-                    load_in_4bit=True,
-                    bnb_4bit_use_double_quant=True,
-                    bnb_4bit_compute_dtype=torch_dtype,
-                    bnb_4bit_quant_type="nf4"
                 )
                 self.model = AutoModelForCausalLM.from_pretrained(
                     repo_id, device_map="auto", trust_remote_code=trust_remote_code,
-                    quantization_config=quant, torch_dtype=torch_dtype, token=HF_TOKEN
                 )
             except Exception as e:
                 print(f"[WARN] 4-bit load failed for {repo_id}: {e}\nFalling back to normal load...", file=sys.stderr)
         if self.model is None:
             self.model = AutoModelForCausalLM.from_pretrained(
                 repo_id, device_map="auto", trust_remote_code=trust_remote_code,
@@ -282,9 +275,6 @@ class HFModel:
         self.max_context = getattr(self.model.config, "max_position_embeddings", None) \
                            or getattr(self.model.config, "max_sequence_length", None) or 8192
-    def encode_len(self, text: str) -> int:
-        return len(self.tokenizer(text, return_tensors=None, add_special_tokens=False).input_ids)
     def apply_chat_template(self, system_text: str, user_text: str) -> str:
         if getattr(self.tokenizer, "chat_template", None):
             messages = [{"role":"system","content":system_text},
@@ -300,78 +290,63 @@ class HFModel:
         inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
         t0 = time.perf_counter()
         out = self.model.generate(
-            **inputs,
-            max_new_tokens=max_new_tokens,
-            do_sample=False,
-            temperature=None,
-            top_p=None,
-            eos_token_id=self.tokenizer.eos_token_id,
         )
         latency_ms = int((time.perf_counter() - t0) * 1000)
         text = self.tokenizer.decode(out[0], skip_special_tokens=True)
-        if text.startswith(prompt):
-            text = text[len(prompt):]
         return latency_ms, text, prompt
-# Cache
 MODEL_CACHE: Dict[str, HFModel] = {}
 def get_model(repo_id: str, load_4bit: bool, dtype: str, trust_remote_code: bool):
     if repo_id not in MODEL_CACHE:
         MODEL_CACHE[repo_id] = HFModel(repo_id, load_4bit=load_4bit, dtype=dtype, trust_remote_code=trust_remote_code)
     return MODEL_CACHE[repo_id]
-# =======================
-#  ZeroGPU-decorated generator
-# =======================
-@spaces.GPU(duration=180)  # required by ZeroGPU; no-op on CPU
 def gpu_generate(repo_id: str, system_text: str, user_text: str,
                  load_4bit: bool, dtype: str, trust_remote_code: bool):
     hf = get_model(repo_id, load_4bit=load_4bit, dtype=dtype, trust_remote_code=trust_remote_code)
-    return hf.generate_json(system_text.strip(), user_text.strip(), max_new_tokens=256)
-# =======================
-#  Utility (ZIP I/O)
-# =======================
 def _read_zip_bytes(dataset_zip: Union[bytes, str, dict, None]) -> bytes:
-    if dataset_zip is None:
-        raise ValueError("No ZIP provided")
-    if isinstance(dataset_zip, bytes):
-        return dataset_zip
     if isinstance(dataset_zip, str):
-        with open(dataset_zip, "rb") as f:
-            return f.read()
     if isinstance(dataset_zip, dict) and "path" in dataset_zip:
-        with open(dataset_zip["path"], "rb") as f:
-            return f.read()
     path = getattr(dataset_zip, "name", None)
     if path and os.path.exists(path):
-        with open(path, "rb") as f:
-            return f.read()
-    raise ValueError("Unsupported file object received from Gradio")
 def parse_zip(zip_bytes: bytes) -> Dict[str, Tuple[str, List[str]]]:
     zf = zipfile.ZipFile(io.BytesIO(zip_bytes))
-    names = zf.namelist()
     samples = {}
-    for n in names:
         p = Path(n)
         if p.suffix.lower() == ".txt":
-            sample_id = p.stem
-            txt = zf.read(n).decode("utf-8", "replace")
-            samples.setdefault(sample_id, ["", []])[0] = txt
         elif p.suffix.lower() == ".json":
-            sample_id = p.stem
             try:
                 js = json.loads(zf.read(n).decode("utf-8", "replace"))
             except Exception:
                 js = []
-            samples.setdefault(sample_id, ["", []])[1] = _coerce_labels_list(js)
     return samples
-# =======================
-#  Core Inference (shared)
-# =======================
 DEFAULT_SYSTEM = (
     "You are a task extraction assistant. "
     "Always output valid JSON with a field \"labels\" (list of strings). "
@@ -386,6 +361,7 @@ DEFAULT_CONTEXT = (
     "- update_kyc_*: KYC updates (activity, purpose, origin of assets, total assets)"
 )
 def prepare_input_text(raw_txt: str, soft_cap: int, preprocess: bool, pre_window: int,
                        add_cues: bool, strip_smalltalk: bool, tokenizer) -> Tuple[str, int, int]:
     before = len(tokenizer(raw_txt, return_tensors=None, add_special_tokens=False).input_ids)
@@ -395,10 +371,10 @@ def prepare_input_text(raw_txt: str, soft_cap: int, preprocess: bool, pre_window
         lines = [ln.strip() for ln in t_norm.splitlines() if ln.strip()]
         cue_lines = find_cue_lines(lines)
         if cue_lines:
-            lines_kept = prune_by_window(lines, cue_lines, window=pre_window, strip_smalltalk=strip_smalltalk)
         else:
-            lines_kept = [ln for ln in lines if not (strip_smalltalk and SMALLTALK_RX.search(ln))]
-        t_kept = "\n".join(lines_kept)
         cues = extract_cues(t_kept)
         header = build_cues_header(cues) if add_cues else ""
         proc_text = (header + "\n\n" + t_kept).strip() if header else t_kept
@@ -419,9 +395,7 @@ def explain_params_markdown() -> str:
         "- **Load in 4-bit (GPU only)**: memory-saving quantization; has no effect on CPU Spaces."
     )
-# =======================
-#  Single Transcript Mode
-# =======================
 def single_mode(
     preset_model: str, custom_model: str,
     system_text: str, context_text: str,
@@ -432,14 +406,14 @@ def single_mode(
 ):
     repo_id = custom_model.strip() or preset_model.strip()
     if not repo_id:
-        return "Please choose a model.", "", "", "", None, None, None
     txt = (transcript_text or "").strip()
     if transcript_file and hasattr(transcript_file, "name") and os.path.exists(transcript_file.name):
         with open(transcript_file.name, "r", encoding="utf-8", errors="replace") as f:
             txt = f.read()
     if not txt:
-        return "Please paste a transcript or upload a .txt file.", "", "", "", None, None, None
     exp = []
     if expected_labels_json and hasattr(expected_labels_json, "name") and os.path.exists(expected_labels_json.name):
@@ -449,27 +423,27 @@ def single_mode(
         except Exception:
             exp = []
-    # tokenizer for preprocessing (with token)
     try:
-        dummy_tok = AutoTokenizer.from_pretrained(
-            repo_id, use_fast=True, trust_remote_code=trust_remote_code, token=HF_TOKEN
-        )
     except Exception as e:
-        msg = ("Failed to load tokenizer for `{}`. If the model is gated, accept its license and set HF_TOKEN in "
-               "Space → Settings → Secrets.\n\nError: {}").format(repo_id, e)
-        return msg, "", "", "", None, None, None
     proc_text, tok_before, tok_after = prepare_input_text(
         txt, soft_cap, preprocess, pre_window, add_cues, strip_smalltalk, dummy_tok
     )
-    user = (context_text or DEFAULT_CONTEXT).strip() + "\n\nTRANSCRIPT\n" + proc_text.strip()
     system = (system_text or DEFAULT_SYSTEM).strip()
     try:
-        latency_ms, raw_text, _ = gpu_generate(repo_id, system, user, load_4bit, dtype, trust_remote_code)
     except Exception as e:
-        msg = ("Failed to run `{}`. If gated, accept license and set HF_TOKEN.\n\nError: {}").format(repo_id, e)
-        return msg, "", "", "", None, None, None
     out = safe_json_load(raw_text)
     pred_labels = enforce_rules(out.get("labels", []), proc_text)
@@ -499,12 +473,8 @@ def single_mode(
                 "model_calls": 1
             },
             "evaluation": None if not exp else {
-                "exact_match": exact,
-                "precision": prec,
-                "recall": rec,
-                "f1": f1,
-                "hamming": ham,
-                "ubs_score": ubs
             }
         }
         zout.writestr("FINAL.json", json.dumps(final_json, ensure_ascii=False, indent=2))
@@ -526,45 +496,45 @@ def single_mode(
         "ubs_score": round(ubs,6) if ubs is not None else None
     }])
-    csv_bytes = row.to_csv(index=False).encode("utf-8")
-    csv_buf = io.BytesIO(csv_bytes); csv_buf.name = "results_single.csv"
-    status = "Done. (ZeroGPU-ready: model calls run inside @spaces.GPU)."
-    return status, kpi1, kpi2, kpi3, row, csv_buf, zbuf
-# =======================
-#  Batch Mode (ZIP)
-# =======================
 def run_batch_ui(models_list, custom_models_str, instructions_text, context_text, dataset_zip,
                  soft_cap, preprocess, pre_window, add_cues, strip_smalltalk,
                  repeats, max_total_runs, load_4bit, dtype, trust_remote_code):
     models = [m for m in (models_list or [])]
-    custom = [m.strip() for m in (custom_models_str or "").split(",") if m.strip()]
-    models.extend(custom)
-    models = [m for m in models if m]
     if not models:
-        return pd.DataFrame(), None, None, "Please pick at least one model."
     if not dataset_zip:
-        return pd.DataFrame(), None, None, "Please upload a ZIP with *.txt (+ optional matching *.json)."
     try:
         zip_bytes = _read_zip_bytes(dataset_zip)
         samples = parse_zip(zip_bytes)
     except Exception as e:
-        return pd.DataFrame(), None, None, f"Failed to read ZIP: {e}"
-    rows = []
-    total_runs = 0
     all_artifacts = io.BytesIO()
     zout = zipfile.ZipFile(all_artifacts, "w", zipfile.ZIP_DEFLATED)
     for repo_id in models:
         try:
-            dummy_tok = AutoTokenizer.from_pretrained(
-                repo_id, use_fast=True, trust_remote_code=trust_remote_code, token=HF_TOKEN
-            )
         except Exception as e:
             rows.append({
                 "timestamp": pd.Timestamp.now().isoformat(timespec="seconds"),
                 "sample_id": None,
@@ -593,14 +563,10 @@ def run_batch_ui(models_list, custom_models_str, instructions_text, context_text
             continue
         for sample_id, (transcript_text, exp_labels) in samples.items():
-            if not transcript_text.strip():
-                continue
-            latencies = []
-            last_pred = None
             for r in range(1, repeats+1):
-                if total_runs >= max_total_runs:
-                    break
                 proc_text, before_tok, after_tok = prepare_input_text(
                     transcript_text, soft_cap, preprocess, pre_window, add_cues, strip_smalltalk, dummy_tok
                 )
@@ -608,7 +574,10 @@ def run_batch_ui(models_list, custom_models_str, instructions_text, context_text
                 user_text = (context_text or DEFAULT_CONTEXT).strip() + "\n\nTRANSCRIPT\n" + proc_text.strip()
                 try:
-                    latency_ms, raw_text, _ = gpu_generate(repo_id, system_text, user_text, load_4bit, dtype, trust_remote_code)
                 except Exception as e:
                     base = f"{repo_id.replace('/','_')}/{sample_id}/error_r{r}"
                     zout.writestr(base + "/ERROR.txt", f"Failed to run model via @spaces.GPU. If gated, accept license and set HF_TOKEN.\n\n{e}")
@@ -706,14 +675,35 @@ def run_batch_ui(models_list, custom_models_str, instructions_text, context_text
     zout.close()
     df = pd.DataFrame(rows)
     if df.empty:
-        return pd.DataFrame(), None, None, "No runs executed (empty dataset / exceeded cap / gated models)."
-    csv_bytes = df.to_csv(index=False).encode("utf-8")
-    return df, ("results.csv", csv_bytes), ("artifacts.zip", all_artifacts.getvalue()), "Done."
-# =======================
-#  UI (same dark theme)
-# =======================
 DARK_RED_CSS = """
 :root, .gradio-container {
   --color-background: #0b0b0d;
@@ -741,44 +731,36 @@ button, .gr-button {
 }
 """
-PRESET_MODELS = [
-    "mistralai/Mistral-7B-Instruct-v0.2",
-    "Qwen/Qwen2.5-7B-Instruct",
-    "HuggingFaceH4/zephyr-7b-beta",
-    "tiiuae/falcon-7b-instruct"
-]
-DEFAULT_SYSTEM = (
-    "You are a task extraction assistant. "
-    "Always output valid JSON with a field \"labels\" (list of strings). "
-    "Use only from this set: " + json.dumps(ALLOWED_LABELS) + ". "
-    "Return JSON only."
-)
-DEFAULT_CONTEXT = (
-    "- plan_contact: conversation without a concrete meeting (no date/time)\n"
-    "- schedule_meeting: explicit date/time/modality confirmation\n"
-    "- update_contact_info_non_postal: changes to email/phone\n"
-    "- update_contact_info_postal_address: changes to mailing address\n"
-    "- update_kyc_*: KYC updates (activity, purpose, origin of assets, total assets)"
-)
 with gr.Blocks(title="From Talk to Task — HF Space", css=DARK_RED_CSS) as demo:
     gr.Markdown("## 🟥 From Talk to Task — Batch & Single Task Extraction")
-    gr.Markdown(
-        "This tool extracts **task labels** from client–advisor transcripts using Hugging Face models.  \n"
         "1) Pick a model (or paste a custom repo id).  \n"
         "2) Provide **Instructions** and **Context**, then supply a transcript (single) or a ZIP (batch).  \n"
         "3) Adjust parameters (soft token cap, preprocessing).  \n"
-        "4) Run and review **latency**, **precision/recall/F1**, **UBS score**, and download artifacts.\n"
-        "_ZeroGPU-ready: model calls run inside an @spaces.GPU function when available._"
     )
     with gr.Tabs():
         # Single
         with gr.TabItem("Single Transcript (default)"):
             with gr.Row():
                 with gr.Column():
-                    preset_model = gr.Dropdown(choices=PRESET_MODELS, value=PRESET_MODELS[0], label="Model (preset)")
                     custom_model = gr.Textbox(label="Custom model repo id (overrides preset)",
                                               placeholder="e.g. meta-llama/Meta-Llama-3-8B-Instruct")
                     instructions = gr.Textbox(label="Instructions (System)", lines=8, value=DEFAULT_SYSTEM)
@@ -795,6 +777,7 @@ with gr.Blocks(title="From Talk to Task — HF Space", css=DARK_RED_CSS) as demo
                     pre_window_s = gr.Slider(0, 6, value=3, step=1, label="Window ± lines around cues")
                     add_cues_s = gr.Checkbox(value=True, label="Add cues header")
                     strip_smalltalk_s = gr.Checkbox(value=False, label="Strip smalltalk")
                 with gr.Column():
                     load_4bit_s = gr.Checkbox(value=False, label="Load in 4-bit (GPU only)")
                     dtype_s = gr.Dropdown(choices=["bfloat16","float16","float32"], value="bfloat16", label="Compute dtype")
@@ -808,11 +791,8 @@ with gr.Blocks(title="From Talk to Task — HF Space", css=DARK_RED_CSS) as demo
             single_status = gr.Markdown("")
             def _run_single(*args):
-                status, m1, m2, m3, df, csv_buf, zip_buf = single_mode(*args)
-                if isinstance(df, pd.DataFrame) and not df.empty:
-                    return m1, m2, m3, df, csv_buf, zip_buf, status
-                else:
-                    return m1 or "", m2 or "", m3 or "", pd.DataFrame(), None, None, status
             run_single_btn.click(
                 _run_single,
@@ -820,7 +800,7 @@ with gr.Blocks(title="From Talk to Task — HF Space", css=DARK_RED_CSS) as demo
                         transcript_text, transcript_file, expected_labels_json,
                         soft_cap_s, preprocess_s, pre_window_s, add_cues_s, strip_smalltalk_s,
                         load_4bit_s, dtype_s, trust_remote_code_s],
-                outputs=[kpi1, kpi2, kpi3, single_table, single_csv, single_zip, single_status]
             )
         # Batch
@@ -828,7 +808,8 @@ with gr.Blocks(title="From Talk to Task — HF Space", css=DARK_RED_CSS) as demo
             with gr.Row():
                 with gr.Column():
                     models_list = gr.Checkboxgroup(
-                        choices=PRESET_MODELS, value=[PRESET_MODELS[0]], label="Models (select one or more presets)"
                     )
                     custom_models = gr.Textbox(label="Custom model repo ids (comma-separated)",
                                                placeholder="e.g. meta-llama/Meta-Llama-3-8B-Instruct, Qwen/Qwen2.5-7B-Instruct")
@@ -839,6 +820,7 @@ with gr.Blocks(title="From Talk to Task — HF Space", css=DARK_RED_CSS) as demo
                         label="Upload ZIP of transcripts (*.txt) + expected (*.json)",
                         file_types=[".zip"], file_count="single", type="filepath"
                     )
             with gr.Row():
                 with gr.Column():
@@ -847,6 +829,7 @@ with gr.Blocks(title="From Talk to Task — HF Space", css=DARK_RED_CSS) as demo
                     pre_window = gr.Slider(0, 6, value=3, step=1, label="Window ± lines around cues")
                     add_cues = gr.Checkbox(value=True, label="Add cues header")
                     strip_smalltalk = gr.Checkbox(value=False, label="Strip smalltalk")
                 with gr.Column():
                     repeats = gr.Slider(1, 6, value=3, step=1, label="Repeats per config")
                     max_total_runs = gr.Slider(1, 200, value=40, step=1, label="Max total runs")
@@ -862,7 +845,7 @@ with gr.Blocks(title="From Talk to Task — HF Space", css=DARK_RED_CSS) as demo
             status = gr.Markdown("")
             def _run_batch(*args):
-                df, csv_pair, zip_pair, msg = run_batch_ui(*args)
                 m1 = m2 = m3 = ""
                 if isinstance(df, pd.DataFrame) and not df.empty:
                     summaries = df[df["is_summary"] == True]
@@ -874,19 +857,17 @@ with gr.Blocks(title="From Talk to Task — HF Space", css=DARK_RED_CSS) as demo
                         m3 = f"**Median latency (ms)**\n\n{int(med) if pd.notna(med) else '—'}"
                 csv_buf = zip_buf = None
                 if isinstance(csv_pair, tuple):
-                    name, data = csv_pair
-                    csv_buf = io.BytesIO(data); csv_buf.name = name
                 if isinstance(zip_pair, tuple):
-                    name, data = zip_pair
-                    zip_buf = io.BytesIO(data); zip_buf.name = name
-                return m1, m2, m3, df, csv_buf, zip_buf, msg
             run_btn.click(
                 _run_batch,
                 inputs=[models_list, custom_models, instructions_b, context_b, dataset_zip,
                         soft_cap, preprocess, pre_window, add_cues, strip_smalltalk,
                         repeats, max_total_runs, load_4bit, dtype, trust_remote_code],
-                outputs=[kpi_b1, kpi_b2, kpi_b3, table, csv_dl, zip_dl, status]
             )
 demo.launch()

 import os, io, re, sys, time, json, zipfile, statistics
 from pathlib import Path
+from typing import List, Dict, Tuple, Union
 import gradio as gr
 import pandas as pd
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
+# ========= ZeroGPU support =========
 try:
+    import spaces  # available on HF Spaces
 except Exception:
     class _DummySpaces:
         def GPU(self, *args, **kwargs):
             return deco
     spaces = _DummySpaces()
+# ========= Auth token =========
 HF_TOKEN = (
     os.getenv("HF_TOKEN")
     or os.getenv("HUGGINGFACE_HUB_TOKEN")
     or os.getenv("HUGGINGFACEHUB_API_TOKEN")
 )
+# Console warning at startup (helps when logs are open)
+if not HF_TOKEN:
+    print(
+        "[WARN] HF_TOKEN is not set. Gated models will fail. "
+        "Set it in Space → Settings → Variables and secrets.",
+        file=sys.stderr
+    )
+# ========= Labels & metrics =========
 ALLOWED_LABELS = [
     "plan_contact",
     "schedule_meeting",
     "update_kyc_purpose_of_businessrelation",
     "update_kyc_total_assets",
 ]
+LABEL_TO_IDX = {l: i for i, l in enumerate(ALLOWED_LABELS)}
 FN_PENALTY = 2.0
 FP_PENALTY = 1.0
         return json.loads(s)
     except Exception:
         pass
+    m = re.search(r"\{.*\}", s, re.S)
     if m:
         try:
             return json.loads(m.group(0))
         for it in x:
             if isinstance(it, str): out.append(it)
             elif isinstance(it, dict):
+                for k in ("label", "value", "task", "category", "name"):
                     v = it.get(k)
                     if isinstance(v, str):
                         out.append(v); break
                 else:
                     if isinstance(it.get("labels"), list):
                         out += [s for s in it["labels"] if isinstance(s, str)]
+        # dedupe keep order
+        seen = set(); norm = []
         for s in out:
             if s not in seen:
                 norm.append(s); seen.add(s)
         return norm
     if isinstance(x, dict):
+        for k in ("expected_labels", "labels", "targets", "y_true"):
             if k in x: return _coerce_labels_list(x[k])
         if "one_hot" in x and isinstance(x["one_hot"], dict):
+            return [k for k, v in x["one_hot"].items() if v]
     return []
 def classic_metrics(pred_labels, exp_labels):
+    pred = set([str(x) for x in (pred_labels or []) if isinstance(x, (str,int,float,bool))])
+    gold = set([str(x) for x in (exp_labels  or []) if isinstance(x, (str,int,float,bool))])
     if not pred and not gold:
         return True, 1.0, 1.0, 1.0, 1.0
     inter = pred & gold; union = pred | gold
     score = 1.0 if max_err == 0 else (1.0 - (weighted / max_err))
     return float(max(0.0, min(1.0, score)))
+# ========= Lightweight preprocessing =========
 EMAIL_RX   = re.compile(r'\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,}\b', re.I)
 TIME_RX    = re.compile(r'\b(\d{1,2}:\d{2}\b|\b\d{1,2}\s?(am|pm)\b|\bafternoon\b|\bmorning\b|\bevening\b)', re.I)
 DATE_RX    = re.compile(r'\b(jan|feb|mar|apr|may|jun|jul|aug|sep|sept|oct|nov|dec)\b|\b\d{1,2}[/-]\d{1,2}([/-]\d{2,4})?\b|\b20\d{2}\b', re.I)
     ids = tokenizer(text, return_tensors=None, add_special_tokens=False).input_ids
     est = len(ids)
     threshold = int(soft_cap_tokens * apply_only_if_ratio)
+    if est <= threshold: return text
     parts = text.splitlines()
+    if len(parts) <= min_lines_keep: return text
     keep_flags=[]
     for ln in parts:
         candidate2_tokens = len(tokenizer(candidate2, return_tensors=None, add_special_tokens=False).input_ids)
         candidate = candidate if cand_tokens <= candidate2_tokens else candidate2
+    if len(candidate.splitlines()) < min_lines_keep: return text
     return candidate
 def enforce_rules(labels, transcript_text):
     labels = set(labels or [])
     if (TIME_RX.search(transcript_text) or DATE_RX.search(transcript_text)) and MEET_RX.search(transcript_text):
+        labels.add("schedule_meeting"); labels.discard("plan_contact")
     if EMAIL_RX.search(transcript_text) and re.search(r'\b(update|new|set|change|confirm(ed)?|for all communication)\b', transcript_text, re.I):
         labels.add("update_contact_info_non_postal")
     kyc_rx = re.compile(r'\b(kyc|aml|compliance|employer|occupation|purpose of (relationship|account)|source of (wealth|funds)|net worth|total assets)\b', re.I)
         labels.discard("update_kyc_activity")
     return sorted(labels)
+# ========= HF model wrapper =========
 class HFModel:
     def __init__(self, repo_id: str, load_4bit: bool, dtype: str, trust_remote_code: bool):
         self.repo_id = repo_id
         self.model = None
         if load_4bit:
             try:
+                q = BitsAndBytesConfig(
+                    load_in_4bit=True, bnb_4bit_use_double_quant=True,
+                    bnb_4bit_compute_dtype=torch_dtype, bnb_4bit_quant_type="nf4"
                 )
                 self.model = AutoModelForCausalLM.from_pretrained(
                     repo_id, device_map="auto", trust_remote_code=trust_remote_code,
+                    quantization_config=q, torch_dtype=torch_dtype, token=HF_TOKEN
                 )
             except Exception as e:
                 print(f"[WARN] 4-bit load failed for {repo_id}: {e}\nFalling back to normal load...", file=sys.stderr)
         if self.model is None:
             self.model = AutoModelForCausalLM.from_pretrained(
                 repo_id, device_map="auto", trust_remote_code=trust_remote_code,
         self.max_context = getattr(self.model.config, "max_position_embeddings", None) \
                            or getattr(self.model.config, "max_sequence_length", None) or 8192
     def apply_chat_template(self, system_text: str, user_text: str) -> str:
         if getattr(self.tokenizer, "chat_template", None):
             messages = [{"role":"system","content":system_text},
         inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
         t0 = time.perf_counter()
         out = self.model.generate(
+            **inputs, max_new_tokens=max_new_tokens,
+            do_sample=False, temperature=None, top_p=None,
+            eos_token_id=self.tokenizer.eos_token_id
         )
         latency_ms = int((time.perf_counter() - t0) * 1000)
         text = self.tokenizer.decode(out[0], skip_special_tokens=True)
+        if text.startswith(prompt): text = text[len(prompt):]
         return latency_ms, text, prompt
 MODEL_CACHE: Dict[str, HFModel] = {}
 def get_model(repo_id: str, load_4bit: bool, dtype: str, trust_remote_code: bool):
     if repo_id not in MODEL_CACHE:
         MODEL_CACHE[repo_id] = HFModel(repo_id, load_4bit=load_4bit, dtype=dtype, trust_remote_code=trust_remote_code)
     return MODEL_CACHE[repo_id]
+# ========= ZeroGPU functions =========
+@spaces.GPU(duration=180, secrets=["HF_TOKEN"])  # pass token into ZeroGPU job
 def gpu_generate(repo_id: str, system_text: str, user_text: str,
                  load_4bit: bool, dtype: str, trust_remote_code: bool):
+    token_seen = bool(os.getenv("HF_TOKEN"))
     hf = get_model(repo_id, load_4bit=load_4bit, dtype=dtype, trust_remote_code=trust_remote_code)
+    lat, txt, prmpt = hf.generate_json(system_text.strip(), user_text.strip(), max_new_tokens=256)
+    return lat, txt, prmpt, token_seen
+@spaces.GPU(duration=15, secrets=["HF_TOKEN"])
+def gpu_check_token():
+    return bool(os.getenv("HF_TOKEN"))
+# ========= ZIP helpers =========
 def _read_zip_bytes(dataset_zip: Union[bytes, str, dict, None]) -> bytes:
+    if dataset_zip is None: raise ValueError("No ZIP provided")
+    if isinstance(dataset_zip, bytes): return dataset_zip
     if isinstance(dataset_zip, str):
+        with open(dataset_zip, "rb") as f: return f.read()
     if isinstance(dataset_zip, dict) and "path" in dataset_zip:
+        with open(dataset_zip["path"], "rb") as f: return f.read()
     path = getattr(dataset_zip, "name", None)
     if path and os.path.exists(path):
+        with open(path, "rb") as f: return f.read()
+    raise ValueError("Unsupported file object from Gradio")
 def parse_zip(zip_bytes: bytes) -> Dict[str, Tuple[str, List[str]]]:
     zf = zipfile.ZipFile(io.BytesIO(zip_bytes))
     samples = {}
+    for n in zf.namelist():
         p = Path(n)
         if p.suffix.lower() == ".txt":
+            samples.setdefault(p.stem, ["", []])[0] = zf.read(n).decode("utf-8", "replace")
         elif p.suffix.lower() == ".json":
             try:
                 js = json.loads(zf.read(n).decode("utf-8", "replace"))
             except Exception:
                 js = []
+            samples.setdefault(p.stem, ["", []])[1] = _coerce_labels_list(js)
     return samples
+# ========= Prompts =========
 DEFAULT_SYSTEM = (
     "You are a task extraction assistant. "
     "Always output valid JSON with a field \"labels\" (list of strings). "
     "- update_kyc_*: KYC updates (activity, purpose, origin of assets, total assets)"
 )
+# ========= Preprocess + build input =========
 def prepare_input_text(raw_txt: str, soft_cap: int, preprocess: bool, pre_window: int,
                        add_cues: bool, strip_smalltalk: bool, tokenizer) -> Tuple[str, int, int]:
     before = len(tokenizer(raw_txt, return_tensors=None, add_special_tokens=False).input_ids)
         lines = [ln.strip() for ln in t_norm.splitlines() if ln.strip()]
         cue_lines = find_cue_lines(lines)
         if cue_lines:
+            kept = prune_by_window(lines, cue_lines, window=pre_window, strip_smalltalk=strip_smalltalk)
         else:
+            kept = [ln for ln in lines if not (strip_smalltalk and SMALLTALK_RX.search(ln))]
+        t_kept = "\n".join(kept)
         cues = extract_cues(t_kept)
         header = build_cues_header(cues) if add_cues else ""
         proc_text = (header + "\n\n" + t_kept).strip() if header else t_kept
         "- **Load in 4-bit (GPU only)**: memory-saving quantization; has no effect on CPU Spaces."
     )
+# ========= Single mode =========
 def single_mode(
     preset_model: str, custom_model: str,
     system_text: str, context_text: str,
 ):
     repo_id = custom_model.strip() or preset_model.strip()
     if not repo_id:
+        return "Please choose a model.", "", "", "", None, None, None, ""
     txt = (transcript_text or "").strip()
     if transcript_file and hasattr(transcript_file, "name") and os.path.exists(transcript_file.name):
         with open(transcript_file.name, "r", encoding="utf-8", errors="replace") as f:
             txt = f.read()
     if not txt:
+        return "Please paste a transcript or upload a .txt file.", "", "", "", None, None, None, ""
     exp = []
     if expected_labels_json and hasattr(expected_labels_json, "name") and os.path.exists(expected_labels_json.name):
         except Exception:
             exp = []
+    # tokenizer for preprocessing
     try:
+        dummy_tok = AutoTokenizer.from_pretrained(repo_id, use_fast=True, trust_remote_code=trust_remote_code, token=HF_TOKEN)
     except Exception as e:
+        msg = (f"Failed to load tokenizer for `{repo_id}`. "
+               "If gated, accept license and set HF_TOKEN in Space → Settings → Secrets.\n\nError: " + str(e))
+        return msg, "", "", "", None, None, None, banner_text()
     proc_text, tok_before, tok_after = prepare_input_text(
         txt, soft_cap, preprocess, pre_window, add_cues, strip_smalltalk, dummy_tok
     )
     system = (system_text or DEFAULT_SYSTEM).strip()
+    user = (context_text or DEFAULT_CONTEXT).strip() + "\n\nTRANSCRIPT\n" + proc_text.strip()
     try:
+        latency_ms, raw_text, _prompt, gpu_token_seen = gpu_generate(
+            repo_id, system, user, load_4bit, dtype, trust_remote_code
+        )
     except Exception as e:
+        msg = (f"Failed to run `{repo_id}`. If gated, accept license and set HF_TOKEN.\n\nError: {e}")
+        return msg, "", "", "", None, None, None, banner_text()
     out = safe_json_load(raw_text)
     pred_labels = enforce_rules(out.get("labels", []), proc_text)
                 "model_calls": 1
             },
             "evaluation": None if not exp else {
+                "exact_match": exact, "precision": prec, "recall": rec,
+                "f1": f1, "hamming": ham, "ubs_score": ubs
             }
         }
         zout.writestr("FINAL.json", json.dumps(final_json, ensure_ascii=False, indent=2))
         "ubs_score": round(ubs,6) if ubs is not None else None
     }])
+    csv_buf = io.BytesIO(row.to_csv(index=False).encode("utf-8")); csv_buf.name = "results_single.csv"
+    return (
+        "Done.",
+        kpi1, kpi2, kpi3,
+        row, csv_buf, zbuf,
+        banner_text(gpu_token_seen)
+    )
+# ========= Batch mode =========
 def run_batch_ui(models_list, custom_models_str, instructions_text, context_text, dataset_zip,
                  soft_cap, preprocess, pre_window, add_cues, strip_smalltalk,
                  repeats, max_total_runs, load_4bit, dtype, trust_remote_code):
     models = [m for m in (models_list or [])]
+    models += [m.strip() for m in (custom_models_str or "").split(",") if m.strip()]
     if not models:
+        return pd.DataFrame(), None, None, "Please pick at least one model.", banner_text()
     if not dataset_zip:
+        return pd.DataFrame(), None, None, "Please upload a ZIP with *.txt (+ optional matching *.json).", banner_text()
     try:
         zip_bytes = _read_zip_bytes(dataset_zip)
         samples = parse_zip(zip_bytes)
     except Exception as e:
+        return pd.DataFrame(), None, None, f"Failed to read ZIP: {e}", banner_text()
+    rows = []; total_runs = 0
     all_artifacts = io.BytesIO()
     zout = zipfile.ZipFile(all_artifacts, "w", zipfile.ZIP_DEFLATED)
+    last_gpu_token_seen = None
     for repo_id in models:
+        # tokenizer for preprocessing (auth check)
         try:
+            dummy_tok = AutoTokenizer.from_pretrained(repo_id, use_fast=True, trust_remote_code=trust_remote_code, token=HF_TOKEN)
         except Exception as e:
+            # gated or missing token; record a summary row and continue
             rows.append({
                 "timestamp": pd.Timestamp.now().isoformat(timespec="seconds"),
                 "sample_id": None,
             continue
         for sample_id, (transcript_text, exp_labels) in samples.items():
+            if not transcript_text.strip(): continue
+            latencies = []; last_pred = None
             for r in range(1, repeats+1):
+                if total_runs >= max_total_runs: break
                 proc_text, before_tok, after_tok = prepare_input_text(
                     transcript_text, soft_cap, preprocess, pre_window, add_cues, strip_smalltalk, dummy_tok
                 )
                 user_text = (context_text or DEFAULT_CONTEXT).strip() + "\n\nTRANSCRIPT\n" + proc_text.strip()
                 try:
+                    latency_ms, raw_text, _prompt, token_seen = gpu_generate(
+                        repo_id, system_text, user_text, load_4bit, dtype, trust_remote_code
+                    )
+                    last_gpu_token_seen = token_seen
                 except Exception as e:
                     base = f"{repo_id.replace('/','_')}/{sample_id}/error_r{r}"
                     zout.writestr(base + "/ERROR.txt", f"Failed to run model via @spaces.GPU. If gated, accept license and set HF_TOKEN.\n\n{e}")
     zout.close()
     df = pd.DataFrame(rows)
     if df.empty:
+        return pd.DataFrame(), None, None, "No runs executed (empty dataset / exceeded cap / gated models).", banner_text(last_gpu_token_seen)
+    csv_pair = ("results.csv", df.to_csv(index=False).encode("utf-8"))
+    zip_pair = ("artifacts.zip", all_artifacts.getvalue())
+    return df, csv_pair, zip_pair, "Done.", banner_text(last_gpu_token_seen)
+# ========= UI helpers =========
+OPEN_MODEL_PRESETS = [
+    "mistralai/Mistral-7B-Instruct-v0.2",
+    "Qwen/Qwen2.5-7B-Instruct",
+    "HuggingFaceH4/zephyr-7b-beta",
+    "tiiuae/falcon-7b-instruct",
+]
+def banner_text(gpu_token_seen: bool | None = None) -> str:
+    app_seen = bool(HF_TOKEN)
+    lines = []
+    if not app_seen:
+        lines.append("🟡 **HF_TOKEN not detected in App** — gated models will fail unless you set it in **Settings → Variables and secrets**.")
+    else:
+        lines.append("🟢 **HF_TOKEN detected in App**.")
+    if gpu_token_seen is None:
+        lines.append("ℹ️ ZeroGPU token status: click **Run** or **Check ZeroGPU token** to verify.")
+    else:
+        lines.append("🟢 **HF_TOKEN detected inside ZeroGPU job.**" if gpu_token_seen else "🔴 **HF_TOKEN missing inside ZeroGPU job** (add `secrets=[\"HF_TOKEN\"]` to @spaces.GPU).")
+    lines.append("✅ Tip: use **Open models** (no license gating): " + ", ".join(OPEN_MODEL_PRESETS))
+    return "\n\n".join(lines)
+# ========= UI (dark red) =========
 DARK_RED_CSS = """
 :root, .gradio-container {
   --color-background: #0b0b0d;
 }
 """
 with gr.Blocks(title="From Talk to Task — HF Space", css=DARK_RED_CSS) as demo:
     gr.Markdown("## 🟥 From Talk to Task — Batch & Single Task Extraction")
+    help_md = (
+        "This tool extracts **task labels** from transcripts using Hugging Face models.  \n"
         "1) Pick a model (or paste a custom repo id).  \n"
         "2) Provide **Instructions** and **Context**, then supply a transcript (single) or a ZIP (batch).  \n"
         "3) Adjust parameters (soft token cap, preprocessing).  \n"
+        "4) Run and review **latency**, **precision/recall/F1**, **UBS score**, and download artifacts."
     )
+    gr.Markdown(help_md)
+    # Status banner (token presence info)
+    banner = gr.Markdown(banner_text())
+    check_btn = gr.Button("Check ZeroGPU token")
+    def _check_token():
+        try:
+            present = gpu_check_token()
+        except Exception:
+            present = None
+        return banner_text(present)
+    check_btn.click(_check_token, outputs=banner)
     with gr.Tabs():
         # Single
         with gr.TabItem("Single Transcript (default)"):
             with gr.Row():
                 with gr.Column():
+                    preset_model = gr.Dropdown(choices=OPEN_MODEL_PRESETS, value=OPEN_MODEL_PRESETS[0],
+                                               label="Model (Open presets — no gating)")
                     custom_model = gr.Textbox(label="Custom model repo id (overrides preset)",
                                               placeholder="e.g. meta-llama/Meta-Llama-3-8B-Instruct")
                     instructions = gr.Textbox(label="Instructions (System)", lines=8, value=DEFAULT_SYSTEM)
                     pre_window_s = gr.Slider(0, 6, value=3, step=1, label="Window ± lines around cues")
                     add_cues_s = gr.Checkbox(value=True, label="Add cues header")
                     strip_smalltalk_s = gr.Checkbox(value=False, label="Strip smalltalk")
+                    gr.Markdown(explain_params_markdown())
                 with gr.Column():
                     load_4bit_s = gr.Checkbox(value=False, label="Load in 4-bit (GPU only)")
                     dtype_s = gr.Dropdown(choices=["bfloat16","float16","float32"], value="bfloat16", label="Compute dtype")
             single_status = gr.Markdown("")
             def _run_single(*args):
+                status, m1, m2, m3, df, csv_buf, zip_buf, btxt = single_mode(*args)
+                return m1 or "", m2 or "", m3 or "", (df if isinstance(df, pd.DataFrame) else pd.DataFrame()), csv_buf, zip_buf, (status or ""), (btxt or banner_text())
             run_single_btn.click(
                 _run_single,
                         transcript_text, transcript_file, expected_labels_json,
                         soft_cap_s, preprocess_s, pre_window_s, add_cues_s, strip_smalltalk_s,
                         load_4bit_s, dtype_s, trust_remote_code_s],
+                outputs=[kpi1, kpi2, kpi3, single_table, single_csv, single_zip, single_status, banner]
             )
         # Batch
             with gr.Row():
                 with gr.Column():
                     models_list = gr.Checkboxgroup(
+                        choices=OPEN_MODEL_PRESETS, value=[OPEN_MODEL_PRESETS[0]],
+                        label="Models (Open presets — select one or more)"
                     )
                     custom_models = gr.Textbox(label="Custom model repo ids (comma-separated)",
                                                placeholder="e.g. meta-llama/Meta-Llama-3-8B-Instruct, Qwen/Qwen2.5-7B-Instruct")
                         label="Upload ZIP of transcripts (*.txt) + expected (*.json)",
                         file_types=[".zip"], file_count="single", type="filepath"
                     )
+                    gr.Markdown("Zip must contain pairs like `ID.txt` and optional `ID.json` with expected labels (same base filename).")
             with gr.Row():
                 with gr.Column():
                     pre_window = gr.Slider(0, 6, value=3, step=1, label="Window ± lines around cues")
                     add_cues = gr.Checkbox(value=True, label="Add cues header")
                     strip_smalltalk = gr.Checkbox(value=False, label="Strip smalltalk")
+                    gr.Markdown(explain_params_markdown())
                 with gr.Column():
                     repeats = gr.Slider(1, 6, value=3, step=1, label="Repeats per config")
                     max_total_runs = gr.Slider(1, 200, value=40, step=1, label="Max total runs")
             status = gr.Markdown("")
             def _run_batch(*args):
+                df, csv_pair, zip_pair, msg, btxt = run_batch_ui(*args)
                 m1 = m2 = m3 = ""
                 if isinstance(df, pd.DataFrame) and not df.empty:
                     summaries = df[df["is_summary"] == True]
                         m3 = f"**Median latency (ms)**\n\n{int(med) if pd.notna(med) else '—'}"
                 csv_buf = zip_buf = None
                 if isinstance(csv_pair, tuple):
+                    name, data = csv_pair; csv_buf = io.BytesIO(data); csv_buf.name = name
                 if isinstance(zip_pair, tuple):
+                    name, data = zip_pair; zip_buf = io.BytesIO(data); zip_buf.name = name
+                return m1, m2, m3, (df if isinstance(df, pd.DataFrame) else pd.DataFrame()), csv_buf, zip_buf, (msg or ""), (btxt or banner_text())
             run_btn.click(
                 _run_batch,
                 inputs=[models_list, custom_models, instructions_b, context_b, dataset_zip,
                         soft_cap, preprocess, pre_window, add_cues, strip_smalltalk,
                         repeats, max_total_runs, load_4bit, dtype, trust_remote_code],
+                outputs=[kpi_b1, kpi_b2, kpi_b3, table, csv_dl, zip_dl, status, banner]
             )
 demo.launch()