Spaces:

hackergeek
/

HARRISON_GPT

Running

App Files Files Community

hackergeek commited on 7 days ago

Commit

e2734d1

verified ·

1 Parent(s): 9b9d5c1

Update app.py

Browse files

Files changed (1) hide show

app.py +36 -24

app.py CHANGED Viewed

@@ -20,7 +20,7 @@ if not HF_TOKEN:
     print("   export HF_TOKEN='your_hf_token_here'")
 # =====================================================
-# Load private RAG model
 # =====================================================
 def load_private_model(model_name, token):
     dtype_value = torch.float16 if torch.cuda.is_available() else torch.float32
@@ -44,13 +44,13 @@ tokenizer, model = load_private_model(GEN_MODEL, token=HF_TOKEN)
 # =====================================================
 # Dynamic token allocation
 # =====================================================
-def calculate_max_tokens(query, min_tokens=20, max_tokens=8000, factor=4):
     query_tokens = len(tokenizer(query)["input_ids"])
     dynamic_tokens = query_tokens * factor
     return min(max(dynamic_tokens, min_tokens), max_tokens)
 # =====================================================
-# Generate structured guideline-style answer
 # =====================================================
 def generate_answer(query, history):
     if not query.strip():
@@ -59,22 +59,35 @@ def generate_answer(query, history):
     # Correct common typos
     corrected_query = query.replace("COPP", "COPD")
-    # Step 1: Rephrase query for RAG retrieval
     rephrase_prompt = (
         "You are a medical assistant. Rephrase this query for precise retrieval:\n\n"
         f"Query: {corrected_query}\n\nRephrased query:"
     )
     inputs = tokenizer(rephrase_prompt, return_tensors="pt").to(model.device)
-    rephrased_ids = model.generate(**inputs, max_new_tokens=8000, do_sample=False)
-    rephrased_query = tokenizer.decode(rephrased_ids[0], skip_special_tokens=True).split("Rephrased query:")[-1].strip()
     # Step 2: Generate detailed structured answer
     max_tokens = calculate_max_tokens(rephrased_query)
     prompt = (
-        "You are a retrieval-augmented medical assistant. Provide a detailed, structured answer in bullet points.\n"
-        "Each bullet should contain multiple sentences or short paragraphs if needed.\n"
-        "Use headings: Definition / Description, Symptoms, Complications, Standard Treatments, Notes.\n\n"
-        f"User query (optimized for retrieval): {rephrased_query}\n\nAnswer:"
     )
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
@@ -82,35 +95,34 @@ def generate_answer(query, history):
         **inputs,
         max_new_tokens=max_tokens,
         do_sample=True,
-        temperature=0.1,
-        no_repeat_ngram_size=4,
         pad_token_id=tokenizer.eos_token_id,
     )
     output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
     answer = output.split("Answer:")[-1].strip()
-    # Stop at first major break to avoid infinite repetition
-    stop_tokens = ["\n\n", "\n- ", "."]
-    for token_str in stop_tokens:
-        if token_str in answer:
-            answer = answer.split(token_str)[0] + token_str
-            break
     history = history + [(query, answer)]
     return history, history
 # =====================================================
-# Gradio UI
 # =====================================================
 with gr.Blocks(title="Qwen3-Harrison-RAG Chatbot") as demo:
     gr.Markdown("""
-    # 🤖 Qwen3-Harrison-RAG Chatbot
-    Ask medical questions — answers are structured in multi-sentence bullet points, like mini guidelines.
     """)
-    chatbot = gr.Chatbot(height=450)
     with gr.Row():
-        msg = gr.Textbox(placeholder="Type your question here...", scale=4)
         clear = gr.Button("Clear", scale=1)
     msg.submit(generate_answer, [msg, chatbot], [chatbot, chatbot])
     clear.click(lambda: None, None, chatbot, queue=False)
@@ -119,4 +131,4 @@ with gr.Blocks(title="Qwen3-Harrison-RAG Chatbot") as demo:
 # Launch
 # =====================================================
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)))

     print("   export HF_TOKEN='your_hf_token_here'")
 # =====================================================
+# Load private model
 # =====================================================
 def load_private_model(model_name, token):
     dtype_value = torch.float16 if torch.cuda.is_available() else torch.float32
 # =====================================================
 # Dynamic token allocation
 # =====================================================
+def calculate_max_tokens(query, min_tokens=1000, max_tokens=8192, factor=8):
     query_tokens = len(tokenizer(query)["input_ids"])
     dynamic_tokens = query_tokens * factor
     return min(max(dynamic_tokens, min_tokens), max_tokens)
 # =====================================================
+# Generate long, complete, structured answers
 # =====================================================
 def generate_answer(query, history):
     if not query.strip():
     # Correct common typos
     corrected_query = query.replace("COPP", "COPD")
+    # Step 1: Rephrase for precise retrieval
     rephrase_prompt = (
         "You are a medical assistant. Rephrase this query for precise retrieval:\n\n"
         f"Query: {corrected_query}\n\nRephrased query:"
     )
     inputs = tokenizer(rephrase_prompt, return_tensors="pt").to(model.device)
+    rephrased_ids = model.generate(**inputs, max_new_tokens=128, do_sample=False)
+    rephrased_query = tokenizer.decode(
+        rephrased_ids[0], skip_special_tokens=True
+    ).split("Rephrased query:")[-1].strip()
     # Step 2: Generate detailed structured answer
     max_tokens = calculate_max_tokens(rephrased_query)
     prompt = (
+        "You are a retrieval-augmented medical assistant. Provide a **long, detailed, structured** medical answer "
+        "as if writing a concise clinical guideline. Use markdown headings and bullet points. "
+        "Each section should include multiple complete sentences and clear explanations.\n\n"
+        "Follow this structure:\n"
+        "### Definition / Description\n"
+        "### Epidemiology / Causes\n"
+        "### Symptoms & Signs\n"
+        "### Diagnosis / Investigations\n"
+        "### Complications\n"
+        "### Treatment & Management\n"
+        "### Prognosis / Prevention\n"
+        "### Key Notes / References\n\n"
+        "At the end, include a **🩺 Quick Summary** with 3–5 key takeaways written in plain English "
+        "that a non-medical reader could understand.\n\n"
+        f"User query: {rephrased_query}\n\nAnswer:"
     )
     inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
         **inputs,
         max_new_tokens=max_tokens,
         do_sample=True,
+        temperature=0.8,
+        top_p=0.9,
+        repetition_penalty=1.2,
         pad_token_id=tokenizer.eos_token_id,
     )
     output = tokenizer.decode(output_ids[0], skip_special_tokens=True)
     answer = output.split("Answer:")[-1].strip()
+    # Clean up potential triple breaks
+    while "\n\n\n" in answer:
+        answer = answer.replace("\n\n\n", "\n\n")
     history = history + [(query, answer)]
     return history, history
 # =====================================================
+# Gradio interface
 # =====================================================
 with gr.Blocks(title="Qwen3-Harrison-RAG Chatbot") as demo:
     gr.Markdown("""
+    # 🧠 Qwen3-Harrison-RAG Medical Chatbot
+    This model provides **guideline-style medical answers** with structured sections and a **Quick Summary**.
+    *For educational and informational purposes only — not a substitute for professional medical advice.*
     """)
+    chatbot = gr.Chatbot(height=480, show_label=False)
     with gr.Row():
+        msg = gr.Textbox(placeholder="Ask a detailed medical question...", scale=4)
         clear = gr.Button("Clear", scale=1)
     msg.submit(generate_answer, [msg, chatbot], [chatbot, chatbot])
     clear.click(lambda: None, None, chatbot, queue=False)
 # Launch
 # =====================================================
 if __name__ == "__main__":
+    demo.launch(server_name="0.0.0.0", server_port=int(os.environ.get("PORT", 7860)), debug=True)