Spaces:

Blaiseboy
/

BioGPT-chatbot

Sleeping

App Files Files Community

Blaiseboy commited on Aug 5

Commit

8e3dd93

verified ·

1 Parent(s): d830966

Upload 3 files

Browse files

Files changed (3) hide show

app.py +203 -0
medical_chatbot.py +730 -0
requirements.txt +36 -0

app.py ADDED Viewed

	@@ -0,0 +1,203 @@

+import gradio as gr
+import os
+import torch
+from medical_chatbot import ColabBioGPTChatbot
+def initialize_chatbot():
+    """Initialize the chatbot with proper error handling"""
+    try:
+        print("🚀 Initializing Medical Chatbot...")
+        # Check if GPU is available but use CPU for stability on HF Spaces
+        use_gpu = torch.cuda.is_available()
+        use_8bit = use_gpu  # Only use 8-bit if GPU is available
+        chatbot = ColabBioGPTChatbot(use_gpu=use_gpu, use_8bit=use_8bit)
+        # Try to load medical data
+        medical_file = "Pediatric_cleaned.txt"
+        if os.path.exists(medical_file):
+            chatbot.load_medical_data(medical_file)
+            status = f"✅ Medical file '{medical_file}' loaded successfully! Ready to chat!"
+            success = True
+        else:
+            status = f"❌ Medical file '{medical_file}' not found. Please ensure the file is in the same directory."
+            success = False
+        return chatbot, status, success
+    except Exception as e:
+        error_msg = f"❌ Failed to initialize chatbot: {str(e)}"
+        print(error_msg)
+        return None, error_msg, False
+    # Check if file exists
+medical_file = "Pediatric_cleaned.txt"
+print(f"Debug: Looking for file: {medical_file}")
+print(f"Debug: File exists: {os.path.exists(medical_file)}")
+if os.path.exists(medical_file):
+    with open(medical_file, 'r') as f:
+        content = f.read()
+    print(f"Debug: File size: {len(content)} characters")
+# Initialize chatbot at startup
+print("🏥 Starting Pediatric Medical Assistant...")
+chatbot, startup_status, medical_file_loaded = initialize_chatbot()
+def generate_response(user_input, history):
+    """Generate response with proper error handling"""
+    if not chatbot:
+        return history + [("System Error", "❌ Chatbot failed to initialize. Please refresh the page and try again.")], ""
+    if not medical_file_loaded:
+        return history + [(user_input, "⚠️ Medical data failed to load. The chatbot may not have access to the full medical knowledge base.")], ""
+    if not user_input.strip():
+        return history, ""
+    try:
+        # Generate response
+        bot_response = chatbot.chat(user_input)
+        # Add to history
+        history = history + [(user_input, bot_response)]
+        return history, ""
+    except Exception as e:
+        error_response = f"⚠️ Sorry, I encountered an error: {str(e)}. Please try rephrasing your question."
+        history = history + [(user_input, error_response)]
+        return history, ""
+    # Initialize chatbot at startup
+print("🏥 Starting Pediatric Medical Assistant...")
+chatbot, startup_status, medical_file_loaded = initialize_chatbot()
+# debug section:
+print(f"Debug: Medical file loaded = {medical_file_loaded}")
+if chatbot and hasattr(chatbot, 'knowledge_chunks'):
+    print(f"Debug: Number of knowledge chunks = {len(chatbot.knowledge_chunks)}")
+    if chatbot.knowledge_chunks:
+        print(f"Debug: First chunk preview = {chatbot.knowledge_chunks[0]['text'][:100]}...")
+else:
+    print("Debug: No knowledge_chunks attribute found")
+# Create custom CSS for better styling
+custom_css = """
+.gradio-container {
+    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
+}
+.chatbot {
+    height: 500px !important;
+}
+.message {
+    padding: 10px;
+    margin: 5px;
+    border-radius: 10px;
+}
+.user-message {
+    background-color: #e3f2fd;
+    margin-left: 20%;
+}
+.bot-message {
+    background-color: #f5f5f5;
+    margin-right: 20%;
+}
+"""
+# Create Gradio interface
+with gr.Blocks(css=custom_css, title="Pediatric Medical Assistant") as demo:
+    gr.Markdown(
+        """
+        # 🩺 Pediatric Medical Assistant
+        Welcome to your AI-powered pediatric medical assistant! This chatbot uses advanced medical AI (BioGPT)
+        to provide evidence-based information about children's health and medical conditions.
+        **⚠️ Important Disclaimer:** This tool provides educational information only.
+        Always consult qualified healthcare professionals for medical diagnosis, treatment, and personalized advice.
+        """
+    )
+    # Display startup status
+    gr.Markdown(f"**System Status:** {startup_status}")
+    # Chat interface
+    with gr.Row():
+        with gr.Column(scale=4):
+            chatbot_ui = gr.Chatbot(
+                label="💬 Chat with Medical AI",
+                height=500,
+                show_label=True,
+                avatar_images=("👤", "🤖")
+            )
+            with gr.Row():
+                user_input = gr.Textbox(
+                    placeholder="Ask a pediatric health question... (e.g., 'What causes fever in children?')",
+                    lines=2,
+                    max_lines=5,
+                    show_label=False,
+                    scale=4
+                )
+                submit_btn = gr.Button("Send 📤", variant="primary", scale=1)
+        with gr.Column(scale=1):
+            gr.Markdown(
+                """
+                ### 💡 Example Questions:
+                - "What causes fever in children?"
+                - "How to treat a child's cough?"
+                - "When should I call the doctor?"
+                - "What are signs of dehydration?"
+                - "How to prevent common infections?"
+                ### 🔧 System Info:
+                - **Model:** BioGPT (Medical AI)
+                - **Specialization:** Pediatric Medicine
+                - **Search:** Vector + Keyword
+                """
+            )
+    # Event handlers
+    def submit_message(user_msg, history):
+        return generate_response(user_msg, history)
+    # Connect events
+    user_input.submit(
+        fn=submit_message,
+        inputs=[user_input, chatbot_ui],
+        outputs=[chatbot_ui, user_input],
+        show_progress=True
+    )
+    submit_btn.click(
+        fn=submit_message,
+        inputs=[user_input, chatbot_ui],
+        outputs=[chatbot_ui, user_input],
+        show_progress=True
+    )
+    # Footer
+    gr.Markdown(
+        """
+        ---
+        **🏥 Medical AI Assistant** | Powered by BioGPT | For Educational Purposes Only
+        **Remember:** Always consult healthcare professionals for medical emergencies and personalized medical advice.
+        """
+    )
+# Launch configuration for Hugging Face Spaces
+if __name__ == "__main__":
+    # For Hugging Face Spaces deployment
+    demo.launch(
+        server_name="0.0.0.0",  # Required for HF Spaces
+        server_port=7860,       # Default port for HF Spaces
+        show_error=True         # Show errors for debugging
+    )

medical_chatbot.py ADDED Viewed

	@@ -0,0 +1,730 @@

+import os
+import re
+import torch
+import warnings
+import numpy as np
+import faiss
+from transformers import (
+    AutoTokenizer,
+    AutoModelForCausalLM,
+    BitsAndBytesConfig
+)
+from sentence_transformers import SentenceTransformer
+from typing import List, Dict, Optional
+import time
+from datetime import datetime
+# Suppress warnings for cleaner output
+warnings.filterwarnings('ignore')
+class ColabBioGPTChatbot:
+    def __init__(self, use_gpu=True, use_8bit=True):
+        """Initialize BioGPT chatbot optimized for Hugging Face Spaces"""
+        print("🏥 Initializing Medical Chatbot...")
+        self.use_gpu = use_gpu
+        self.use_8bit = use_8bit
+        self.device = "cuda" if torch.cuda.is_available() and use_gpu else "cpu"
+        print(f"🖥️ Using device: {self.device}")
+        self.tokenizer = None
+        self.model = None
+        self.knowledge_chunks = []
+        self.conversation_history = []
+        self.embedding_model = None
+        self.faiss_index = None
+        self.faiss_ready = False
+        self.use_embeddings = True
+        # Initialize components
+        self.setup_biogpt()
+        self.load_sentence_transformer()
+    def setup_biogpt(self):
+        """Setup BioGPT model with fallback to base BioGPT if Large fails"""
+        print("🧠 Loading BioGPT model...")
+        try:
+            # Try BioGPT-Large first
+            model_name = "microsoft/BioGPT-Large"
+            print(f"Attempting to load {model_name}...")
+            if self.use_8bit and self.device == "cuda":
+                quantization_config = BitsAndBytesConfig(
+                    load_in_8bit=True,
+                    llm_int8_threshold=6.0,
+                    llm_int8_has_fp16_weight=False,
+                )
+            else:
+                quantization_config = None
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                quantization_config=quantization_config,
+                torch_dtype=torch.float16 if self.device == "cuda" else torch.float32,
+                device_map="auto" if self.device == "cuda" else None,
+                trust_remote_code=True,
+                low_cpu_mem_usage=True
+            )
+            if self.device == "cuda" and quantization_config is None:
+                self.model = self.model.to(self.device)
+            print("✅ BioGPT-Large loaded successfully!")
+        except Exception as e:
+            print(f"❌ BioGPT-Large loading failed: {e}")
+            print("🔁 Falling back to base BioGPT...")
+            self.setup_fallback_biogpt()
+    def setup_fallback_biogpt(self):
+        """Fallback to microsoft/BioGPT if BioGPT-Large fails"""
+        try:
+            model_name = "microsoft/BioGPT"
+            print(f"Loading fallback model: {model_name}")
+            self.tokenizer = AutoTokenizer.from_pretrained(model_name)
+            if self.tokenizer.pad_token is None:
+                self.tokenizer.pad_token = self.tokenizer.eos_token
+            self.model = AutoModelForCausalLM.from_pretrained(
+                model_name,
+                torch_dtype=torch.float32,
+                trust_remote_code=True,
+                low_cpu_mem_usage=True
+            )
+            if self.device == "cuda":
+                self.model = self.model.to(self.device)
+            print("✅ Base BioGPT model loaded successfully!")
+        except Exception as e:
+            print(f"❌ Failed to load fallback BioGPT: {e}")
+            self.model = None
+            self.tokenizer = None
+    def load_sentence_transformer(self):
+        """Load sentence transformer for embeddings"""
+        try:
+            print("🔮 Loading sentence transformer...")
+            self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
+            # Initialize FAISS index (will be populated when data is loaded)
+            embedding_dim = 384  # Dimension for all-MiniLM-L6-v2
+            self.faiss_index = faiss.IndexFlatL2(embedding_dim)
+            self.faiss_ready = True
+            print("✅ Sentence transformer and FAISS index ready!")
+        except Exception as e:
+            print(f"❌ Failed to load sentence transformer: {e}")
+            self.use_embeddings = False
+            self.faiss_ready = False
+    def load_medical_data(self, file_path):
+        """Load and process medical data"""
+        print(f"📖 Loading medical data from {file_path}...")
+        try:
+            if not os.path.exists(file_path):
+                raise FileNotFoundError(f"File {file_path} not found")
+            with open(file_path, 'r', encoding='utf-8') as f:
+                text = f.read()
+            print(f"📄 File loaded: {len(text):,} characters")
+        except Exception as e:
+            print(f"❌ Error loading file: {e}")
+            raise ValueError(f"Failed to load medical data: {e}")
+        # Create chunks
+        print("📝 Creating medical chunks...")
+        chunks = self.create_medical_chunks(text)
+        print(f"📋 Created {len(chunks)} medical chunks")
+        self.knowledge_chunks = chunks
+        # Generate embeddings if available
+        if self.use_embeddings and self.embedding_model and self.faiss_ready:
+            try:
+                self.generate_embeddings_with_progress(chunks)
+                print("✅ Medical data loaded with embeddings!")
+            except Exception as e:
+                print(f"⚠️ Embedding generation failed: {e}")
+                print("✅ Medical data loaded (keyword search mode)")
+        else:
+            print("✅ Medical data loaded (keyword search mode)")
+    def create_medical_chunks(self, text: str, chunk_size: int = 400) -> List[Dict]:
+        """Create medically-optimized text chunks"""
+        chunks = []
+        # Split by paragraphs first
+        paragraphs = [p.strip() for p in text.split('\n\n') if len(p.strip()) > 50]
+        chunk_id = 0
+        for paragraph in paragraphs:
+            if len(paragraph.split()) <= chunk_size:
+                chunks.append({
+                    'id': chunk_id,
+                    'text': paragraph,
+                    'medical_focus': self.identify_medical_focus(paragraph)
+                })
+                chunk_id += 1
+            else:
+                # Split large paragraphs by sentences
+                sentences = re.split(r'[.!?]+', paragraph)
+                current_chunk = ""
+                for sentence in sentences:
+                    sentence = sentence.strip()
+                    if not sentence:
+                        continue
+                    if len(current_chunk.split()) + len(sentence.split()) <= chunk_size:
+                        current_chunk += sentence + ". "
+                    else:
+                        if current_chunk.strip():
+                            chunks.append({
+                                'id': chunk_id,
+                                'text': current_chunk.strip(),
+                                'medical_focus': self.identify_medical_focus(current_chunk)
+                            })
+                            chunk_id += 1
+                        current_chunk = sentence + ". "
+                if current_chunk.strip():
+                    chunks.append({
+                        'id': chunk_id,
+                        'text': current_chunk.strip(),
+                        'medical_focus': self.identify_medical_focus(current_chunk)
+                    })
+                    chunk_id += 1
+        return chunks
+    def identify_medical_focus(self, text: str) -> str:
+        """Identify the medical focus of a text chunk"""
+        text_lower = text.lower()
+        categories = {
+            'pediatric_symptoms': ['fever', 'cough', 'rash', 'vomiting', 'diarrhea'],
+            'treatments': ['treatment', 'therapy', 'medication', 'antibiotics'],
+            'diagnosis': ['diagnosis', 'diagnostic', 'symptoms', 'signs'],
+            'emergency': ['emergency', 'urgent', 'serious', 'hospital'],
+            'prevention': ['prevention', 'vaccine', 'immunization', 'avoid']
+        }
+        for category, keywords in categories.items():
+            if any(keyword in text_lower for keyword in keywords):
+                return category
+        return 'general_medical'
+    def generate_embeddings_with_progress(self, chunks: List[Dict]):
+        """Generate embeddings and add to FAISS index"""
+        print("🔮 Generating embeddings...")
+        try:
+            texts = [chunk['text'] for chunk in chunks]
+            # Generate embeddings in batches
+            batch_size = 32
+            all_embeddings = []
+            for i in range(0, len(texts), batch_size):
+                batch_texts = texts[i:i+batch_size]
+                batch_embeddings = self.embedding_model.encode(batch_texts, show_progress_bar=False)
+                all_embeddings.extend(batch_embeddings)
+                progress = min(i + batch_size, len(texts))
+                print(f"   Progress: {progress}/{len(texts)} chunks processed", end='\r')
+            print(f"\n   ✅ Generated embeddings for {len(texts)} chunks")
+            # Add to FAISS index
+            embeddings_array = np.array(all_embeddings).astype('float32')
+            self.faiss_index.add(embeddings_array)
+            print("✅ Embeddings added to FAISS index!")
+        except Exception as e:
+            print(f"❌ Embedding generation failed: {e}")
+            raise
+    def retrieve_medical_context(self, query: str, n_results: int = 3) -> List[str]:
+        """Retrieve relevant medical context"""
+        if self.use_embeddings and self.embedding_model and self.faiss_ready and self.faiss_index.ntotal > 0:
+            try:
+                # Generate query embedding
+                query_embedding = self.embedding_model.encode([query])
+                # Search FAISS index
+                distances, indices = self.faiss_index.search(
+                    np.array(query_embedding).astype('float32'),
+                    min(n_results, self.faiss_index.ntotal)
+                )
+                # Get relevant chunks
+                context_chunks = []
+                for idx in indices[0]:
+                    if idx != -1 and idx < len(self.knowledge_chunks):
+                        context_chunks.append(self.knowledge_chunks[idx]['text'])
+                if context_chunks:
+                    return context_chunks
+            except Exception as e:
+                print(f"⚠️ Embedding search failed: {e}")
+        # Fallback to keyword search
+        return self.keyword_search_medical(query, n_results)
+    def keyword_search_medical(self, query: str, n_results: int) -> List[str]:
+        """Medical-focused keyword search"""
+        if not self.knowledge_chunks:
+            return []
+        query_words = set(query.lower().split())
+        chunk_scores = []
+        for chunk_info in self.knowledge_chunks:
+            chunk_text = chunk_info['text']
+            chunk_words = set(chunk_text.lower().split())
+            # Calculate relevance score
+            word_overlap = len(query_words.intersection(chunk_words))
+            base_score = word_overlap / len(query_words) if query_words else 0
+            # Boost medical content
+            medical_boost = 0
+            if chunk_info.get('medical_focus') in ['pediatric_symptoms', 'treatments', 'diagnosis']:
+                medical_boost = 0.3
+            final_score = base_score + medical_boost
+            if final_score > 0:
+                chunk_scores.append((final_score, chunk_text))
+        # Return top matches
+        chunk_scores.sort(reverse=True)
+        return [chunk for _, chunk in chunk_scores[:n_results]]
+    def generate_biogpt_response(self, context: str, query: str) -> str:
+        """Generate medical response using context directly (BioGPT bypass)"""
+        # BioGPT is giving poor responses, so use the retrieved context directly
+        return self.create_context_based_response(context, query)
+    def create_context_based_response(self, context: str, query: str) -> str:
+        """Create response directly from medical context"""
+        if not context:
+            return "I don't have specific information about this topic in my medical database."
+        # Split context into sentences
+        sentences = [s.strip() + '.' for s in context.split('.') if len(s.strip()) > 15]
+        # Find sentences most relevant to the query
+        query_words = set(query.lower().split())
+        scored_sentences = []
+        for sentence in sentences[:20]:  # Increased from 15 to 20
+            sentence_words = set(sentence.lower().split())
+            # Score based on word overlap
+            score = len(query_words.intersection(sentence_words))
+            if score > 0:
+                scored_sentences.append((score, sentence))
+        # Sort by relevance and take top sentences
+        scored_sentences.sort(reverse=True)
+        if scored_sentences:
+            # Take top 3-4 most relevant sentences for better coverage
+            response_sentences = [sent for _, sent in scored_sentences[:4]]
+            response = ' '.join(response_sentences)
+        else:
+            # Fallback to first few sentences
+            response = ' '.join(sentences[:3])
+        # Clean up the response
+        response = re.sub(r'\s+', ' ', response).strip()
+        return response[:500] + '...' if len(response) > 500 else response  # Increased from 400
+    def clean_medical_response(self, response: str) -> str:
+        """Clean and format medical response"""
+        # Remove training artifacts and unwanted symbols
+        response = re.sub(r'<[^>]*>', '', response)  # Remove HTML-like tags
+        response = re.sub(r'▃+', '', response)  # Remove block symbols
+        response = re.sub(r'FREETEXT|INTRO|/FREETEXT|/INTRO', '', response)  # Remove training markers
+        response = re.sub(r'\s+', ' ', response)  # Clean up whitespace
+        response = response.strip()
+        # Split into sentences and keep only complete, relevant ones
+        sentences = re.split(r'[.!?]+', response)
+        clean_sentences = []
+        for sentence in sentences:
+            sentence = sentence.strip()
+            # Skip very short sentences and those with artifacts
+            if len(sentence) > 15 and not any(artifact in sentence.lower() for artifact in ['▃', '<', '>', 'freetext']):
+                clean_sentences.append(sentence)
+            if len(clean_sentences) >= 2:  # Limit to 2 good sentences
+                break
+        if clean_sentences:
+            cleaned = '. '.join(clean_sentences) + '.'
+        else:
+            # Fallback to first 150 characters if no good sentences found
+            cleaned = response[:150].strip()
+            if cleaned and not cleaned.endswith('.'):
+                cleaned += '.'
+        return cleaned
+    def fallback_response(self, context: str, query: str) -> str:
+        """Fallback response when BioGPT fails"""
+        sentences = [s.strip() for s in context.split('.') if len(s.strip()) > 20]
+        if sentences:
+            response = sentences[0] + '.'
+            if len(sentences) > 1:
+                response += ' ' + sentences[1] + '.'
+        else:
+            response = context[:300] + '...'
+        return response
+    def handle_conversational_interactions(self, query: str) -> Optional[str]:
+        """Handle conversational interactions"""
+        query_lower = query.lower().strip()
+        # Only match very specific greeting patterns (must be standalone)
+        if re.match(r'^\s*(hello|hi|hey)\s*$', query_lower):
+            return "👋 Hello! I'm your pediatric medical AI assistant. How can I help you with medical questions today?"
+        if re.match(r'^\s*(good morning|good afternoon|good evening)\s*$', query_lower):
+            return "👋 Hello! I'm your pediatric medical AI assistant. How can I help you with medical questions today?"
+        # Only match very specific thanks patterns (must be standalone)
+        if re.match(r'^\s*(thank you|thanks|thx)\s*$', query_lower):
+            return "🙏 You're welcome! I'm glad I could help. Remember to consult healthcare professionals for medical decisions. What else can I help you with?"
+        # Only match very specific goodbye patterns (must be standalone)
+        if re.match(r'^\s*(bye|goodbye)\s*$', query_lower):
+            return "👋 Goodbye! Take care and remember to consult healthcare professionals for any medical concerns. Stay healthy!"
+        return None
+    def chat(self, query: str) -> str:
+        """Main chat function"""
+        if not query.strip():
+            return "Hello! I'm your pediatric medical AI assistant. How can I help you today?"
+        # Handle conversational interactions
+        conversational_response = self.handle_conversational_interactions(query)
+        if conversational_response:
+            return conversational_response
+        if not self.knowledge_chunks:
+            return "Please load medical data first to access the medical knowledge base."
+        if not self.model or not self.tokenizer:
+            return "Medical model not available. Please check the setup and try again."
+        # Retrieve context
+        context = self.retrieve_medical_context(query)
+        if not context:
+            return "I don't have specific information about this topic in my medical database. Please consult with a healthcare professional for personalized medical advice."
+        # Generate response
+        main_context = '\n\n'.join(context)
+        response = self.generate_biogpt_response(main_context, query)
+        # Format final response
+        final_response = f"🩺 **Medical Information:** {response}\n\n⚠️ **Important:** This information is for educational purposes only. Always consult with qualified healthcare professionals for medical diagnosis, treatment, and personalized advice."
+        return final_response,
+        r'^\s*(good morning|good afternoon|good evening)\s*$',
+    def chat(self, query: str) -> str:
+        """Main chat function"""
+        if not query.strip():
+            return "Hello! I'm your pediatric medical AI assistant. How can I help you today?"
+        # Handle conversational interactions
+        conversational_response = self.handle_conversational_interactions(query)
+        if conversational_response:
+            return conversational_response
+        if not self.knowledge_chunks:
+            return "Please load medical data first to access the medical knowledge base."
+        if not self.model or not self.tokenizer:
+            return "Medical model not available. Please check the setup and try again."
+        # Retrieve context
+        context = self.retrieve_medical_context(query)
+        if not context:
+            return "I don't have specific information about this topic in my medical database. Please consult with a healthcare professional for personalized medical advice."
+        # Generate response
+        main_context = '\n\n'.join(context)
+        response = self.generate_biogpt_response(main_context, query)
+        # Format final response
+        final_response = f"🩺 **Medical Information:** {response}\n\n⚠️ **Important:** This information is for educational purposes only. Always consult with qualified healthcare professionals for medical diagnosis, treatment, and personalized advice."
+        return final_response,
+        r'^\s*(hi there|hello there)\s*$'
+    def chat(self, query: str) -> str:
+        """Main chat function"""
+        if not query.strip():
+            return "Hello! I'm your pediatric medical AI assistant. How can I help you today?"
+        # Handle conversational interactions
+        conversational_response = self.handle_conversational_interactions(query)
+        if conversational_response:
+            return conversational_response
+        if not self.knowledge_chunks:
+            return "Please load medical data first to access the medical knowledge base."
+        if not self.model or not self.tokenizer:
+            return "Medical model not available. Please check the setup and try again."
+        # Retrieve context
+        context = self.retrieve_medical_context(query)
+        if not context:
+            return "I don't have specific information about this topic in my medical database. Please consult with a healthcare professional for personalized medical advice."
+        # Generate response
+        main_context = '\n\n'.join(context)
+        response = self.generate_biogpt_response(main_context, query)
+        # Format final response
+        final_response = f"🩺 **Medical Information:** {response}\n\n⚠️ **Important:** This information is for educational purposes only. Always consult with qualified healthcare professionals for medical diagnosis, treatment, and personalized advice."
+        return final_response
+        for pattern in greeting_patterns:
+            if re.match(pattern, query_lower):
+                return "👋 Hello! I'm your pediatric medical AI assistant. How can I help you with medical questions today?"
+        # Only match very specific thanks patterns (must be standalone)
+        thanks_patterns = [
+            r'^\s*(thank you|thanks|thx)\s*$'
+        ]
+    def chat(self, query: str) -> str:
+        """Main chat function"""
+        if not query.strip():
+            return "Hello! I'm your pediatric medical AI assistant. How can I help you today?"
+        # Handle conversational interactions
+        conversational_response = self.handle_conversational_interactions(query)
+        if conversational_response:
+            return conversational_response
+        if not self.knowledge_chunks:
+            return "Please load medical data first to access the medical knowledge base."
+        if not self.model or not self.tokenizer:
+            return "Medical model not available. Please check the setup and try again."
+        # Retrieve context
+        context = self.retrieve_medical_context(query)
+        if not context:
+            return "I don't have specific information about this topic in my medical database. Please consult with a healthcare professional for personalized medical advice."
+        # Generate response
+        main_context = '\n\n'.join(context)
+        response = self.generate_biogpt_response(main_context, query)
+        # Format final response
+        final_response = f"🩺 **Medical Information:** {response}\n\n⚠️ **Important:** This information is for educational purposes only. Always consult with qualified healthcare professionals for medical diagnosis, treatment, and personalized advice."
+        return final_response,
+        r'^\s*(thank you so much|thanks a lot)\s*$'
+    def chat(self, query: str) -> str:
+        """Main chat function"""
+        if not query.strip():
+            return "Hello! I'm your pediatric medical AI assistant. How can I help you today?"
+        # Handle conversational interactions
+        conversational_response = self.handle_conversational_interactions(query)
+        if conversational_response:
+            return conversational_response
+        if not self.knowledge_chunks:
+            return "Please load medical data first to access the medical knowledge base."
+        if not self.model or not self.tokenizer:
+            return "Medical model not available. Please check the setup and try again."
+        # Retrieve context
+        context = self.retrieve_medical_context(query)
+        if not context:
+            return "I don't have specific information about this topic in my medical database. Please consult with a healthcare professional for personalized medical advice."
+        # Generate response
+        main_context = '\n\n'.join(context)
+        response = self.generate_biogpt_response(main_context, query)
+        # Format final response
+        final_response = f"🩺 **Medical Information:** {response}\n\n⚠️ **Important:** This information is for educational purposes only. Always consult with qualified healthcare professionals for medical diagnosis, treatment, and personalized advice."
+        return final_response
+        for pattern in thanks_patterns:
+            if re.match(pattern, query_lower):
+                return "🙏 You're welcome! I'm glad I could help. Remember to consult healthcare professionals for medical decisions. What else can I help you with?"
+        # Only match very specific goodbye patterns (must be standalone)
+        goodbye_patterns = [
+        r'^\s*(bye|goodbye)\s*$'
+        ]
+    def chat(self, query: str) -> str:
+        """Main chat function"""
+        if not query.strip():
+            return "Hello! I'm your pediatric medical AI assistant. How can I help you today?"
+        # Handle conversational interactions
+        conversational_response = self.handle_conversational_interactions(query)
+        if conversational_response:
+            return conversational_response
+        if not self.knowledge_chunks:
+            return "Please load medical data first to access the medical knowledge base."
+        if not self.model or not self.tokenizer:
+            return "Medical model not available. Please check the setup and try again."
+        # Retrieve context
+        context = self.retrieve_medical_context(query)
+        if not context:
+            return "I don't have specific information about this topic in my medical database. Please consult with a healthcare professional for personalized medical advice."
+        # Generate response
+        main_context = '\n\n'.join(context)
+        response = self.generate_biogpt_response(main_context, query)
+        # Format final response
+        final_response = f"🩺 **Medical Information:** {response}\n\n⚠️ **Important:** This information is for educational purposes only. Always consult with qualified healthcare professionals for medical diagnosis, treatment, and personalized advice."
+        return final_response,
+        r'^\s*(see you later|see ya)\s*$'
+    def chat(self, query: str) -> str:
+        """Main chat function"""
+        if not query.strip():
+            return "Hello! I'm your pediatric medical AI assistant. How can I help you today?"
+        # Handle conversational interactions
+        conversational_response = self.handle_conversational_interactions(query)
+        if conversational_response:
+            return conversational_response
+        if not self.knowledge_chunks:
+            return "Please load medical data first to access the medical knowledge base."
+        if not self.model or not self.tokenizer:
+            return "Medical model not available. Please check the setup and try again."
+        # Retrieve context
+        context = self.retrieve_medical_context(query)
+        if not context:
+            return "I don't have specific information about this topic in my medical database. Please consult with a healthcare professional for personalized medical advice."
+        # Generate response
+        main_context = '\n\n'.join(context)
+        response = self.generate_biogpt_response(main_context, query)
+        # Format final response
+        final_response = f"🩺 **Medical Information:** {response}\n\n⚠️ **Important:** This information is for educational purposes only. Always consult with qualified healthcare professionals for medical diagnosis, treatment, and personalized advice."
+        return final_response,
+        r'^\s*(have a good day|take care)\s*$'
+    def chat(self, query: str) -> str:
+        """Main chat function"""
+        if not query.strip():
+            return "Hello! I'm your pediatric medical AI assistant. How can I help you today?"
+        # Handle conversational interactions
+        conversational_response = self.handle_conversational_interactions(query)
+        if conversational_response:
+            return conversational_response
+        if not self.knowledge_chunks:
+            return "Please load medical data first to access the medical knowledge base."
+        if not self.model or not self.tokenizer:
+            return "Medical model not available. Please check the setup and try again."
+        # Retrieve context
+        context = self.retrieve_medical_context(query)
+        if not context:
+            return "I don't have specific information about this topic in my medical database. Please consult with a healthcare professional for personalized medical advice."
+        # Generate response
+        main_context = '\n\n'.join(context)
+        response = self.generate_biogpt_response(main_context, query)
+        # Format final response
+        final_response = f"🩺 **Medical Information:** {response}\n\n⚠️ **Important:** This information is for educational purposes only. Always consult with qualified healthcare professionals for medical diagnosis, treatment, and personalized advice."
+        return final_response
+        for pattern in goodbye_patterns:
+            if re.match(pattern, query_lower):
+                return "👋 Goodbye! Take care and remember to consult healthcare professionals for any medical concerns. Stay healthy!"
+        return None
+    def chat(self, query: str) -> str:
+        """Main chat function"""
+        if not query.strip():
+            return "Hello! I'm your pediatric medical AI assistant. How can I help you today?"
+        # Handle conversational interactions
+        conversational_response = self.handle_conversational_interactions(query)
+        if conversational_response:
+            return conversational_response
+        if not self.knowledge_chunks:
+            return "Please load medical data first to access the medical knowledge base."
+        if not self.model or not self.tokenizer:
+            return "Medical model not available. Please check the setup and try again."
+        # Retrieve context
+        context = self.retrieve_medical_context(query)
+        if not context:
+            return "I don't have specific information about this topic in my medical database. Please consult with a healthcare professional for personalized medical advice."
+        # Generate response
+        main_context = '\n\n'.join(context)
+        response = self.generate_biogpt_response(main_context, query)
+        # Format final response
+        final_response = f"🩺 **Medical Information:** {response}\n\n⚠️ **Important:** This information is for educational purposes only. Always consult with qualified healthcare professionals for medical diagnosis, treatment, and personalized advice."
+        return final_response

requirements.txt ADDED Viewed

	@@ -0,0 +1,36 @@

+# Core ML and NLP libraries
+torch>=2.0.0,<2.2.0
+transformers>=4.30.0,<4.40.0
+sentence-transformers>=2.2.0,<3.0.0
+accelerate>=0.20.0,<0.25.0
+# Quantization support (for GPU optimization)
+bitsandbytes>=0.41.0,<0.43.0
+# Vector search (CPU version for HF Spaces compatibility)
+faiss-cpu>=1.7.4,<1.8.0
+# Scientific computing
+numpy>=1.21.0,<1.26.0
+scipy>=1.9.0,<1.12.0
+# Gradio for web interface (stable version)
+gradio>=4.0.0,<5.0.0
+# Essential utilities
+tqdm>=4.64.0
+requests>=2.28.0
+packaging>=21.0
+# Tokenization support
+tokenizers>=0.13.0,<0.16.0
+# System monitoring
+psutil>=5.9.0
+# Additional stability packages
+safetensors>=0.3.0
+huggingface-hub>=0.15.0
+# Required for BioGPT tokenizer
+sacremoses>=0.0.53