Spaces:

RishiRP
/

Talk2TaskDemo1

Sleeping

App Files Files Community

RishiRP commited on Sep 25

Commit

05188c4

verified ·

1 Parent(s): b80450d

Update app.py

Browse files

Files changed (1) hide show

app.py +93 -302

app.py CHANGED Viewed

@@ -1,11 +1,8 @@
-Allowed Labels (strict, case-insensitive match; output must use canonical label text exactly):
 {allowed_labels_list}
-Instructions:
-1) Extract every concrete task the advisor or client must take.
-2) For each, choose ONE label from Allowed Labels (or leave empty if none match).
-3) Output STRICT JSON only, no prose:
 {{
   "labels": ["LabelA","LabelB", ...],
   "tasks": [
@@ -16,405 +13,199 @@ Instructions:
 """
 # =========================
-# Utilities
 # =========================
-def _now_ms() -> int:
-    return int(time.time() * 1000)
 def read_file_to_text(file: gr.File) -> str:
     if not file or not file.name:
         return ""
     name = file.name.lower()
     data = file.read()
-    # Restrict to light parsers (txt/md/json) for speed/reliability
     if name.endswith(".json"):
         try:
             obj = json.loads(data.decode("utf-8", errors="ignore"))
-            # Accept either {"transcript": "..."} or list/str
             if isinstance(obj, dict) and "transcript" in obj:
                 return str(obj["transcript"])
             return json.dumps(obj, ensure_ascii=False)
         except Exception:
             return data.decode("utf-8", errors="ignore")
     else:
-        # txt / md or anything texty
-        try:
-            return data.decode("utf-8", errors="ignore")
-        except Exception:
-            try:
-                return data.decode("latin-1", errors="ignore")
-            except Exception:
-                return ""
 def normalize_labels(labels: List[str]) -> List[str]:
     return list(dict.fromkeys([l.strip() for l in labels if isinstance(l, str) and l.strip()]))
 def canonicalize_map(allowed: List[str]) -> Dict[str, str]:
-    """
-    Build a case-insensitive map: lowercase -> canonical label
-    """
-    m = {}
-    for lab in allowed:
-        m[lab.lower()] = lab
-    return m
 def robust_json_extract(text: str) -> Dict[str, Any]:
-    """
-    Try to parse strict JSON from model output.
-    If the model added extra tokens, strip to first {...} block.
-    """
     if not text:
         return {"labels": [], "tasks": []}
-    # Find first JSON object
-    start = text.find("{")
-    end = text.rfind("}")
-    if start != -1 and end != -1 and end > start:
-        candidate = text[start : end + 1]
-    else:
-        candidate = text
-    # Remove trailing junk commas and try json.loads
     try:
         return json.loads(candidate)
     except Exception:
-        # Fallback: try to repair common issues
         candidate = re.sub(r",\s*}", "}", candidate)
         candidate = re.sub(r",\s*]", "]", candidate)
-        try:
-            return json.loads(candidate)
-        except Exception:
-            return {"labels": [], "tasks": []}
 def restrict_to_allowed(pred: Dict[str, Any], allowed: List[str]) -> Dict[str, Any]:
-    """
-    Keep only tasks whose label ∈ allowed; map case-insensitively to canonical.
-    """
     out = {"labels": [], "tasks": []}
-    if not isinstance(pred, dict):
-        return out
-    raw_labels = pred.get("labels", []) or []
-    raw_tasks = pred.get("tasks", []) or []
     allowed_map = canonicalize_map(allowed)
-    # Filter labels
-    filt_labels: List[str] = []
-    for l in raw_labels:
-        if not isinstance(l, str):
-            continue
-        k = l.strip().lower()
-        if k in allowed_map:
-            filt_labels.append(allowed_map[k])
     filt_labels = normalize_labels(filt_labels)
-    # Filter tasks
     filt_tasks = []
-    for t in raw_tasks:
-        if not isinstance(t, dict):
-            continue
-        lbl = t.get("label", "")
-        k = str(lbl).strip().lower()
         if k in allowed_map:
-            new_t = dict(t)
-            new_t["label"] = allowed_map[k]
             filt_tasks.append(new_t)
-    # Ensure labels reflect tasks (union)
-    from_tasks = [tt["label"] for tt in filt_tasks if isinstance(tt.get("label"), str)]
     merged = normalize_labels(list(set(filt_labels) | set(from_tasks)))
-    out["labels"] = merged
-    out["tasks"] = filt_tasks
     return out
-def truncate_tokens(tokenizer, text: str, max_input_tokens: int) -> str:
-    if max_input_tokens <= 0:
-        return text
-    toks = tokenizer(text, add_special_tokens=False, return_attention_mask=False, return_tensors=None)["input_ids"]
-    if len(toks) <= max_input_tokens:
-        return text
-    # Keep the tail (most recent part of the convo often carries actionable tasks)
-    keep_ids = toks[-max_input_tokens:]
-    return tokenizer.decode(keep_ids, skip_special_tokens=True)
 # =========================
-# Model Loading
 # =========================
 class ModelWrapper:
-    def __init__(self, repo_id: str, hf_token: Optional[str], load_in_4bit: bool):
-        self.repo_id = repo_id
-        self.hf_token = hf_token
-        self.load_in_4bit = load_in_4bit
-        self.tokenizer = None
-        self.model = None
     def load(self):
         qcfg = None
         if self.load_in_4bit and DEVICE == "cuda":
             qcfg = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_quant_type="nf4",
                 bnb_4bit_compute_dtype=torch.float16,
                 bnb_4bit_use_double_quant=True,
             )
-        tok = AutoTokenizer.from_pretrained(
-            self.repo_id,
-            token=self.hf_token,
-            cache_dir=str(SPACE_CACHE),
-            trust_remote_code=True,
-            use_fast=True,
         )
-        # Some models lack pad token—safe default
-        if tok.pad_token is None and tok.eos_token is not None:
-            tok.pad_token = tok.eos_token
-        model = AutoModelForCausalLM.from_pretrained(
-            self.repo_id,
-            token=self.hf_token,
-            cache_dir=str(SPACE_CACHE),
             trust_remote_code=True,
             torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
             device_map="auto" if DEVICE == "cuda" else None,
-            low_cpu_mem_usage=True,
-            quantization_config=qcfg,
-            attn_implementation="sdpa",  # T4-safe and faster than 'eager'
         )
-        self.tokenizer = tok
-        self.model = model
     @torch.inference_mode()
-    def generate(self, system_prompt: str, user_prompt: str) -> str:
-        # Chat template if available; otherwise a simple format
         if hasattr(self.tokenizer, "apply_chat_template"):
-            messages = [
-                {"role": "system", "content": system_prompt},
-                {"role": "user", "content": user_prompt},
-            ]
-            input_ids = self.tokenizer.apply_chat_template(
-                messages,
-                add_generation_prompt=True,
-                return_tensors="pt",
-            ).to(self.model.device)
         else:
-            text = f"<s>[SYSTEM]\n{system_prompt}\n[/SYSTEM]\n[USER]\n{user_prompt}\n[/USER]\n"
-            input_ids = self.tokenizer(text, return_tensors="pt").to(self.model.device)
-        with torch.cuda.amp.autocast(enabled=(DEVICE == "cuda")):
-            out_ids = self.model.generate(
-                **input_ids,
-                generation_config=GEN_CONFIG,
-                eos_token_id=self.tokenizer.eos_token_id,
-                pad_token_id=self.tokenizer.pad_token_id,
-            )
-        out = self.tokenizer.decode(out_ids[0], skip_special_tokens=True)
-        # Heuristic: strip the prompting part if the model echoes input
-        if "}" in out:
-            tail = out[out.rfind("}") + 1 :]
-            body = out[: out.rfind("}") + 1]
-            # Prefer the last JSON object if multiple
-            if "{" in tail and "}" in tail:
-                # do nothing—rare; handled by robust_json_extract
-                pass
-            return body
-        return out
-# Keep one live model per repo for snappy re-runs
 _MODEL_CACHE: Dict[str, ModelWrapper] = {}
-def get_model(repo_id: str, hf_token: Optional[str], load_in_4bit: bool) -> ModelWrapper:
     key = f"{repo_id}::{'4bit' if (load_in_4bit and DEVICE=='cuda') else 'full'}"
     if key not in _MODEL_CACHE:
-        mw = ModelWrapper(repo_id, hf_token, load_in_4bit)
-        mw.load()
-        _MODEL_CACHE[key] = mw
     return _MODEL_CACHE[key]
 # =========================
-# Inference Pipeline
 # =========================
-def run_extraction(
-    transcript_text: str,
-    transcript_file: gr.File,
-    allowed_labels_text: str,
-    model_repo: str,
-    use_4bit: bool,
-    max_input_tokens: int,
-    hf_token: str,
-) -> Tuple[str, str, str, str]:
     t0 = _now_ms()
-    # 1) Get transcript: prefer file (drag-drop), else textarea
-    raw_text = ""
-    if transcript_file:
-        raw_text = read_file_to_text(transcript_file)
-    if not raw_text:
-        raw_text = transcript_text or ""
-    raw_text = raw_text.strip()
-    if not raw_text:
-        return "", "", "No transcript provided.", json.dumps({"labels": [], "tasks": []}, ensure_ascii=False, indent=2)
-    # 2) Allowed labels: combine UI text with default (so we NEVER end up empty)
-    user_allowed = [ln.strip() for ln in (allowed_labels_text or "").splitlines() if ln.strip()]
-    allowed = normalize_labels(user_allowed or DEFAULT_ALLOWED_LABELS)
-    # 3) Load model
-    hf_tok = hf_token.strip() or None
     try:
-        model = get_model(model_repo, hf_tok, load_in_4bit=use_4bit)
     except Exception as e:
-        msg = (
-            f"Model load failed for '{model_repo}'. If gated/private, set HF_TOKEN in Space secrets.\n"
-            f"Error: {e}"
-        )
-        return "", "", msg, json.dumps({"labels": [], "tasks": []}, ensure_ascii=False, indent=2)
-    # 4) Truncate input to speed up
-    trunc_text = truncate_tokens(model.tokenizer, raw_text, max_input_tokens=max_input_tokens)
-    # 5) Build prompts
-    allowed_list_str = "\n".join(f"- {lab}" for lab in allowed)
-    user_prompt = USER_PROMPT_TEMPLATE.format(
-        transcript=trunc_text,
-        allowed_labels_list=allowed_list_str,
-    )
-    # 6) Generate
     t1 = _now_ms()
     try:
-        model_out = model.generate(SYSTEM_PROMPT, user_prompt)
     except Exception as e:
-        return "", "", f"Generation error: {e}", json.dumps({"labels": [], "tasks": []}, ensure_ascii=False, indent=2)
     t2 = _now_ms()
-    # 7) Parse & filter strictly to allowed
-    parsed = robust_json_extract(model_out)
     filtered = restrict_to_allowed(parsed, allowed)
-    # 8) Compose UI outputs
-    # Diagnostics
-    diag = [
         f"Device: {DEVICE} (4-bit: {'Yes' if (use_4bit and DEVICE=='cuda') else 'No'})",
-        f"Model: {model_repo}",
-        f"Tokens (input, approx): ≤ {max_input_tokens}",
-        f"Latency: load+prep {(t1 - t0)} ms, generate {(t2 - t1)} ms, total {(t2 - t0)} ms",
-        f"Allowed Labels Used (n={len(allowed)}): {', '.join(allowed)}",
-    ]
-    diag_str = "\n".join(diag)
-    # Summary plain text
-    labs = filtered.get("labels", [])
-    tasks = filtered.get("tasks", [])
-    summ_lines = []
-    if labs:
-        summ_lines.append("Detected labels:\n  - " + "\n  - ".join(labs))
-    else:
-        summ_lines.append("Detected labels: (none)")
-    if tasks:
-        summ_lines.append("\nTasks:")
-        for t in tasks:
-            lab = t.get("label", "")
-            expl = t.get("explanation", "")
-            ev = t.get("evidence", "")
-            summ_lines.append(f"• [{lab}] {expl} | evidence: {ev[:140]}{'…' if len(ev)>140 else ''}")
     else:
-        summ_lines.append("\nTasks: (none)")
-    summary = "\n".join(summ_lines)
-    # JSON pretty
-    json_str = json.dumps(filtered, ensure_ascii=False, indent=2)
-    # Raw model text (to help debug label empty issues)
-    raw_out = model_out.strip()
-    return summary, json_str, diag_str, raw_out
 # =========================
 # UI
 # =========================
 MODEL_CHOICES = [
-    "swiss-ai/Apertus-8B-Instruct-2509",         # default
-    "meta-llama/Meta-Llama-3-8B-Instruct",       # may be gated; handled in code
-    "mistralai/Mistral-7B-Instruct-v0.3",        # widely available, strong baseline
 ]
-with gr.Blocks(theme=gr.themes.Soft(), fill_height=True) as demo:
     gr.Markdown("# Talk2Task — Task Extraction Demo")
-    gr.Markdown(
-        "Drop a transcript file **or** paste text, choose a model, and get strict JSON back. "
-        "For best speed, keep inputs concise or lower the input token limit."
-    )
     with gr.Row():
         with gr.Column(scale=3):
-            transcript_file = gr.File(
-                label="Drag & drop transcript (.txt / .md / .json)",
-                file_types=[".txt", ".md", ".json"],
-                type="filepath",
-            )
-            transcript_text = gr.Textbox(
-                label="Or paste transcript here",
-                lines=14,
-                placeholder="Paste conversation transcript…",
-            )
-            allowed_labels_text = gr.Textbox(
-                label="Allowed Labels (one per line) — leave empty to use defaults",
-                value="",
-                lines=8,
-            )
         with gr.Column(scale=2):
-            model_repo = gr.Dropdown(
-                label="Model Repository",
-                choices=MODEL_CHOICES,
-                value=MODEL_CHOICES[0],
-            )
-            use_4bit = gr.Checkbox(
-                label="Use 4-bit quantization (recommended on GPU/T4)",
-                value=True,
-            )
-            max_input_tokens = gr.Slider(
-                label="Max input tokens (truncate from end for speed)",
-                minimum=1024,
-                maximum=8192,
-                step=512,
-                value=4096,
-            )
-            hf_token = gr.Textbox(
-                label="HF_TOKEN (only needed for gated/private models)",
-                type="password",
-                value=os.environ.get("HF_TOKEN", ""),
-            )
-            run_btn = gr.Button("Run Extraction", variant="primary")
     with gr.Row():
-        with gr.Column():
-            summary_out = gr.Textbox(label="Summary", lines=10)
-        with gr.Column():
-            json_out = gr.Code(label="Strict JSON Output", language="json")
     with gr.Row():
-        with gr.Column():
-            diag_out = gr.Textbox(label="Diagnostics & Timing", lines=8)
-        with gr.Column():
-            raw_out = gr.Textbox(label="Raw Model Output (debug)", lines=8)
-    run_btn.click(
-        fn=run_extraction,
-        inputs=[
-            transcript_text,
-            transcript_file,
-            allowed_labels_text,
-            model_repo,
-            use_4bit,
-            max_input_tokens,
-            hf_token,
-        ],
-        outputs=[summary_out, json_out, diag_out, raw_out],
-    )
 if __name__ == "__main__":
     demo.launch()

+Allowed Labels:
 {allowed_labels_list}
+Output STRICT JSON only, no prose:
 {{
   "labels": ["LabelA","LabelB", ...],
   "tasks": [
 """
 # =========================
+# Utils
 # =========================
+def _now_ms(): return int(time.time() * 1000)
 def read_file_to_text(file: gr.File) -> str:
     if not file or not file.name:
         return ""
     name = file.name.lower()
     data = file.read()
     if name.endswith(".json"):
         try:
             obj = json.loads(data.decode("utf-8", errors="ignore"))
             if isinstance(obj, dict) and "transcript" in obj:
                 return str(obj["transcript"])
             return json.dumps(obj, ensure_ascii=False)
         except Exception:
             return data.decode("utf-8", errors="ignore")
     else:
+        return data.decode("utf-8", errors="ignore")
 def normalize_labels(labels: List[str]) -> List[str]:
     return list(dict.fromkeys([l.strip() for l in labels if isinstance(l, str) and l.strip()]))
 def canonicalize_map(allowed: List[str]) -> Dict[str, str]:
+    return {lab.lower(): lab for lab in allowed}
 def robust_json_extract(text: str) -> Dict[str, Any]:
     if not text:
         return {"labels": [], "tasks": []}
+    start, end = text.find("{"), text.rfind("}")
+    candidate = text[start:end+1] if (start != -1 and end != -1) else text
     try:
         return json.loads(candidate)
     except Exception:
         candidate = re.sub(r",\s*}", "}", candidate)
         candidate = re.sub(r",\s*]", "]", candidate)
+        try: return json.loads(candidate)
+        except Exception: return {"labels": [], "tasks": []}
 def restrict_to_allowed(pred: Dict[str, Any], allowed: List[str]) -> Dict[str, Any]:
     out = {"labels": [], "tasks": []}
     allowed_map = canonicalize_map(allowed)
+    filt_labels = []
+    for l in pred.get("labels", []):
+        k = str(l).strip().lower()
+        if k in allowed_map: filt_labels.append(allowed_map[k])
     filt_labels = normalize_labels(filt_labels)
     filt_tasks = []
+    for t in pred.get("tasks", []):
+        if not isinstance(t, dict): continue
+        k = str(t.get("label", "")).strip().lower()
         if k in allowed_map:
+            new_t = dict(t); new_t["label"] = allowed_map[k]
             filt_tasks.append(new_t)
+    from_tasks = [tt["label"] for tt in filt_tasks]
     merged = normalize_labels(list(set(filt_labels) | set(from_tasks)))
+    out["labels"], out["tasks"] = merged, filt_tasks
     return out
+def truncate_tokens(tokenizer, text: str, max_tokens: int) -> str:
+    toks = tokenizer(text, add_special_tokens=False)["input_ids"]
+    if len(toks) <= max_tokens: return text
+    return tokenizer.decode(toks[-max_tokens:], skip_special_tokens=True)
 # =========================
+# Model
 # =========================
 class ModelWrapper:
+    def __init__(self, repo_id, hf_token, load_in_4bit):
+        self.repo_id, self.hf_token, self.load_in_4bit = repo_id, hf_token, load_in_4bit
+        self.tokenizer, self.model = None, None
     def load(self):
         qcfg = None
         if self.load_in_4bit and DEVICE == "cuda":
             qcfg = BitsAndBytesConfig(
+                load_in_4bit=True, bnb_4bit_quant_type="nf4",
                 bnb_4bit_compute_dtype=torch.float16,
                 bnb_4bit_use_double_quant=True,
             )
+        self.tokenizer = AutoTokenizer.from_pretrained(
+            self.repo_id, token=self.hf_token, cache_dir=str(SPACE_CACHE),
+            trust_remote_code=True, use_fast=True,
         )
+        if self.tokenizer.pad_token is None and self.tokenizer.eos_token:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+        self.model = AutoModelForCausalLM.from_pretrained(
+            self.repo_id, token=self.hf_token, cache_dir=str(SPACE_CACHE),
             trust_remote_code=True,
             torch_dtype=torch.float16 if DEVICE == "cuda" else torch.float32,
             device_map="auto" if DEVICE == "cuda" else None,
+            low_cpu_mem_usage=True, quantization_config=qcfg,
+            attn_implementation="sdpa",
         )
     @torch.inference_mode()
+    def generate(self, system_prompt, user_prompt):
         if hasattr(self.tokenizer, "apply_chat_template"):
+            msgs = [{"role":"system","content":system_prompt},{"role":"user","content":user_prompt}]
+            inputs = self.tokenizer.apply_chat_template(msgs, add_generation_prompt=True, return_tensors="pt")
+            inputs = inputs.to(self.model.device)
         else:
+            text = f"<s>[SYSTEM]{system_prompt}[/SYSTEM][USER]{user_prompt}[/USER]"
+            inputs = self.tokenizer(text, return_tensors="pt").to(self.model.device)
+        with torch.cuda.amp.autocast(enabled=(DEVICE=="cuda")):
+            out_ids = self.model.generate(**inputs, generation_config=GEN_CONFIG,
+                eos_token_id=self.tokenizer.eos_token_id, pad_token_id=self.tokenizer.pad_token_id)
+        return self.tokenizer.decode(out_ids[0], skip_special_tokens=True)
 _MODEL_CACHE: Dict[str, ModelWrapper] = {}
+def get_model(repo_id, hf_token, load_in_4bit):
     key = f"{repo_id}::{'4bit' if (load_in_4bit and DEVICE=='cuda') else 'full'}"
     if key not in _MODEL_CACHE:
+        m = ModelWrapper(repo_id, hf_token, load_in_4bit); m.load()
+        _MODEL_CACHE[key] = m
     return _MODEL_CACHE[key]
 # =========================
+# Pipeline
 # =========================
+def run_extraction(text, file, labels_text, repo, use_4bit, max_tokens, hf_token):
     t0 = _now_ms()
+    raw = read_file_to_text(file) if file else (text or "")
+    raw = raw.strip()
+    if not raw:
+        return "", "", "No transcript.", json.dumps({"labels":[], "tasks":[]}, indent=2)
+    user_labels = [ln.strip() for ln in (labels_text or "").splitlines() if ln.strip()]
+    allowed = normalize_labels(user_labels or DEFAULT_ALLOWED_LABELS)
     try:
+        model = get_model(repo, hf_token.strip() or None, use_4bit)
     except Exception as e:
+        return "", "", f"Model load failed: {e}", json.dumps({"labels":[], "tasks":[]}, indent=2)
+    trunc = truncate_tokens(model.tokenizer, raw, max_tokens)
+    user_prompt = USER_PROMPT_TEMPLATE.format(transcript=trunc, allowed_labels_list="\n".join(f"- {l}" for l in allowed))
     t1 = _now_ms()
     try:
+        out = model.generate(SYSTEM_PROMPT, user_prompt)
     except Exception as e:
+        return "", "", f"Gen error: {e}", json.dumps({"labels":[], "tasks":[]}, indent=2)
     t2 = _now_ms()
+    parsed = robust_json_extract(out)
     filtered = restrict_to_allowed(parsed, allowed)
+    diag = "\n".join([
         f"Device: {DEVICE} (4-bit: {'Yes' if (use_4bit and DEVICE=='cuda') else 'No'})",
+        f"Model: {repo}",
+        f"Latency: prep {t1-t0} ms, gen {t2-t1} ms, total {t2-t0} ms",
+        f"Allowed labels: {', '.join(allowed)}"
+    ])
+    summary = "Detected labels:\n" + "\n".join(f"- {l}" for l in filtered["labels"]) if filtered["labels"] else "Detected labels: (none)"
+    if filtered["tasks"]:
+        summary += "\n\nTasks:\n" + "\n".join(f"• [{t['label']}] {t.get('explanation','')} | ev: {t.get('evidence','')[:100]}" for t in filtered["tasks"])
     else:
+        summary += "\n\nTasks: (none)"
+    return summary, json.dumps(filtered, indent=2), diag, out.strip()
 # =========================
 # UI
 # =========================
 MODEL_CHOICES = [
+    "swiss-ai/Apertus-8B-Instruct-2509",
+    "meta-llama/Meta-Llama-3-8B-Instruct",
+    "mistralai/Mistral-7B-Instruct-v0.3",
 ]
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
     gr.Markdown("# Talk2Task — Task Extraction Demo")
     with gr.Row():
         with gr.Column(scale=3):
+            file = gr.File(label="Drag & drop transcript (.txt/.md/.json)", file_types=[".txt",".md",".json"], type="filepath")
+            text = gr.Textbox(label="Or paste transcript", lines=12)
+            labels_text = gr.Textbox(label="Allowed Labels (one per line)", lines=8)
         with gr.Column(scale=2):
+            repo = gr.Dropdown(label="Model", choices=MODEL_CHOICES, value=MODEL_CHOICES[0])
+            use_4bit = gr.Checkbox(label="Use 4-bit (GPU only)", value=True)
+            max_tokens = gr.Slider(label="Max input tokens", minimum=1024, maximum=8192, step=512, value=4096)
+            hf_token = gr.Textbox(label="HF_TOKEN (only for gated models)", type="password", value=os.environ.get("HF_TOKEN",""))
+            btn = gr.Button("Run Extraction", variant="primary")
     with gr.Row():
+        summary = gr.Textbox(label="Summary", lines=12)
+        json_out = gr.Code(label="JSON Output", language="json")
     with gr.Row():
+        diag = gr.Textbox(label="Diagnostics", lines=6)
+        raw = gr.Textbox(label="Raw Model Output", lines=6)
+    btn.click(fn=run_extraction, inputs=[text,file,labels_text,repo,use_4bit,max_tokens,hf_token], outputs=[summary,json_out,diag,raw])
 if __name__ == "__main__":
     demo.launch()