Spaces:
Sleeping
Sleeping
| # Streamlit App: Counselor Assistant (XGBoost + Selectable LLMs from Hugging Face) | |
| import streamlit as st | |
| import os | |
| import pandas as pd | |
| import json | |
| import time | |
| import csv | |
| from datetime import datetime | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.preprocessing import LabelEncoder | |
| from sklearn.model_selection import train_test_split | |
| from xgboost import XGBClassifier | |
| from transformers import pipeline | |
| # --- Page Setup --- | |
| st.set_page_config(page_title="Counselor Assistant", layout="centered") | |
| # --- Styling --- | |
| st.markdown(""" | |
| <style> | |
| .main { background-color: #f9f9f9; padding: 1rem 2rem; border-radius: 12px; } | |
| h1 { color: #2c3e50; text-align: center; font-size: 2.4rem; } | |
| .user { color: #1f77b4; font-weight: bold; } | |
| .assistant { color: #2ca02c; font-weight: bold; } | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # --- App Header --- | |
| st.title("π§ Mental Health Counselor Assistant") | |
| st.markdown(""" | |
| Welcome, counselor π | |
| This tool offers **AI-powered suggestions** to support you when responding to your patients. | |
| ### What it does: | |
| - π§© Predicts what type of support is best: *Advice*, *Validation*, *Information*, or *Question* | |
| - π¬ Generates an LLM-powered suggestion for you | |
| - πΎ Lets you save your session for reflection | |
| This is here to support β not replace β your clinical instincts π | |
| """) | |
| # --- Load and label dataset --- | |
| df = pd.read_csv("dataset/Kaggle_Mental_Health_Conversations_train.csv") | |
| df = df[['Context', 'Response']].dropna().copy() | |
| # Auto-labeling: heuristics for labeling responses | |
| keywords_to_labels = { | |
| 'advice': ['try', 'should', 'suggest', 'recommend'], | |
| 'validation': ['understand', 'feel', 'valid', 'normal'], | |
| 'information': ['cause', 'often', 'disorder', 'symptom'], | |
| 'question': ['how', 'what', 'why', 'have you'] | |
| } | |
| def auto_label_response(response): | |
| response = response.lower() | |
| for label, keywords in keywords_to_labels.items(): | |
| if any(word in response for word in keywords): | |
| return label | |
| return 'information' | |
| df['response_type'] = df['Response'].apply(auto_label_response) | |
| df['combined_text'] = df['Context'] + " " + df['Response'] | |
| # Encode labels | |
| le = LabelEncoder() | |
| y = le.fit_transform(df['response_type']) | |
| # TF-IDF vectorizer on combined text | |
| vectorizer = TfidfVectorizer(max_features=2000, ngram_range=(1, 2)) | |
| X = vectorizer.fit_transform(df['combined_text']) | |
| # Train-test split | |
| X_train, X_test, y_train, y_test = train_test_split( | |
| X, y, test_size=0.2, stratify=y, random_state=42 | |
| ) | |
| # XGBoost Classifier | |
| xgb_model = XGBClassifier( | |
| objective='multi:softmax', | |
| num_class=len(le.classes_), | |
| eval_metric='mlogloss', | |
| use_label_encoder=False, | |
| max_depth=6, | |
| learning_rate=0.1, | |
| n_estimators=100 | |
| ) | |
| xgb_model.fit(X_train, y_train) | |
| # --- Select Model Option --- | |
| model_options = { | |
| "google/flan-t5-base": "β Flan-T5 (Fast, Clean)", | |
| "declare-lab/flan-alpaca-gpt4-xl": "π¬ Flan Alpaca GPT4 (Human-sounding)", | |
| "google/flan-ul2": "π§ Flan-UL2 (Deeper reasoning)" | |
| } | |
| model_choice = st.selectbox("π§ Choose a Response Model", list(model_options.keys()), format_func=lambda x: model_options[x]) | |
| def load_llm(model_name): | |
| return pipeline("text2text-generation", model=model_name) | |
| llm = load_llm(model_choice) | |
| # --- Utility Functions --- | |
| def predict_response_type(user_input): | |
| vec = vectorizer.transform([user_input]) | |
| pred = xgb_model.predict(vec) | |
| proba = xgb_model.predict_proba(vec).max() | |
| label = le.inverse_transform(pred)[0] | |
| return label, proba | |
| def build_prompt(user_input, response_type): | |
| prompts = { | |
| "advice": f"A patient said: \"{user_input}\". What advice should a mental health counselor give to support them?", | |
| "validation": f"A patient said: \"{user_input}\". How can a counselor validate and empathize with their emotions?", | |
| "information": f"A patient said: \"{user_input}\". Explain what might be happening from a mental health perspective.", | |
| "question": f"A patient said: \"{user_input}\". What thoughtful follow-up questions should a counselor ask?" | |
| } | |
| return prompts.get(response_type, prompts["information"]) | |
| def generate_llm_response(user_input, response_type): | |
| prompt = build_prompt(user_input, response_type) | |
| start = time.time() | |
| with st.spinner("Thinking through a helpful response for your patient..."): | |
| result = llm(prompt, max_length=150, do_sample=True, temperature=0.7) | |
| end = time.time() | |
| st.info(f"Response generated in {end - start:.1f} seconds") | |
| return result[0]["generated_text"].strip() | |
| def trim_memory(history, max_turns=6): | |
| return history[-max_turns * 2:] | |
| def save_conversation(history): | |
| now = datetime.now().strftime("%Y-%m-%d_%H-%M-%S") | |
| with open(f"chat_log_{now}.csv", "w", newline='') as f: | |
| writer = csv.writer(f) | |
| writer.writerow(["Role", "Content", "Intent", "Confidence"]) | |
| for entry in history: | |
| writer.writerow([ | |
| entry.get("role", ""), | |
| entry.get("content", ""), | |
| entry.get("label", ""), | |
| round(float(entry.get("confidence", 0)) * 100) | |
| ]) | |
| st.success(f"Saved to chat_log_{now}.csv") | |
| # --- Session State Setup --- | |
| if "history" not in st.session_state: | |
| st.session_state.history = [] | |
| if "user_input" not in st.session_state: | |
| st.session_state.user_input = "" | |
| # --- Display Sample Prompts --- | |
| with st.expander("π‘ Sample inputs you can try"): | |
| st.markdown(""" | |
| - My patient is constantly feeling overwhelmed at work. | |
| - A student says they panic every time they have to speak in class. | |
| - Someone told me they think theyβll never feel okay again. | |
| """) | |
| # --- Text Area + Word Counter --- | |
| MAX_WORDS = 1000 | |
| word_count = len(st.session_state.user_input.split()) | |
| st.markdown(f"**π Input Length:** {word_count} / {MAX_WORDS} words") | |
| st.session_state.user_input = st.text_area( | |
| "π¬ What did your patient say?", | |
| value=st.session_state.user_input, | |
| placeholder="e.g. I just feel like I'm never going to get better.", | |
| height=100 | |
| ) | |
| # --- Button Layout --- | |
| col1, col2, col3 = st.columns([2, 1, 1]) | |
| with col1: | |
| send = st.button("π‘ Suggest Response") | |
| with col2: | |
| save = st.button("π Save This") | |
| with col3: | |
| reset = st.button("π Reset") | |
| # --- Button Logic --- | |
| if send and st.session_state.user_input: | |
| user_input = st.session_state.user_input | |
| predicted_type, confidence = predict_response_type(user_input) | |
| reply = generate_llm_response(user_input, predicted_type) | |
| st.session_state.history.append({"role": "user", "content": user_input}) | |
| st.session_state.history.append({ | |
| "role": "assistant", | |
| "content": reply, | |
| "label": predicted_type, | |
| "confidence": confidence | |
| }) | |
| st.session_state.history = trim_memory(st.session_state.history) | |
| if save: | |
| save_conversation(st.session_state.history) | |
| if reset: | |
| st.session_state.history = [] | |
| st.session_state.user_input = "" | |
| st.success("Conversation has been cleared.") | |
| # --- Chat History Display --- | |
| st.markdown("---") | |
| for turn in st.session_state.history: | |
| if turn["role"] == "user": | |
| st.markdown(f"π§ββοΈ **Patient:** {turn['content']}") | |
| else: | |
| st.markdown(f"π©ββοΈπ¨ββοΈ **Suggested Counselor Response:** {turn['content']}") | |
| st.caption(f"_Intent: {turn['label']} (Confidence: {turn['confidence']:.0%})_") | |
| st.markdown("---") | |