Spaces:

brahmanarisetty
/

IT_support

Runtime error

App Files Files Community

brahmanarisetty commited on Aug 3

Commit

0c61b1f

verified ·

1 Parent(s): d6da279

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -85

app.py CHANGED Viewed

@@ -2,12 +2,11 @@
 """
 IT Support Chatbot Application
 - Converts the original Colab notebook into a deployable Gradio app.
-- Loads data from a local CSV file.
 - Uses environment variables for API keys.
 - Implements a RAG pipeline with LLaMA 3.1, Qdrant, and Hybrid Retrieval.
 """
 # --- CELL 1: Imports, Logging & Reproducibility ---
 import os
 import random
@@ -41,8 +40,7 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
-# Apply nest_asyncio for environments like notebooks
-nest_asyncio.apply()
 # Reproducibility
 SEED = 42
@@ -50,12 +48,10 @@ random.seed(SEED)
 np.random.seed(SEED)
 torch.manual_seed(SEED)
-# --- CELL 0: load secrets from env vars ---
-QDRANT_HOST   = os.getenv("QDRANT_HOST")
 QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
-HF_TOKEN      = os.getenv("HF_TOKEN")
-# --- CELL 2: Environment & Qdrant Connection Setup ---
 if not all([QDRANT_HOST, QDRANT_API_KEY, HF_TOKEN]):
     raise EnvironmentError(
@@ -73,80 +69,48 @@ qdrant = qdrant_client.QdrantClient(
 )
 COLLECTION_NAME = "it_support_rag"
 # --- CELL 3: Load Dataset & Build Documents ---
-# Load data from a local CSV file.
-# Make sure this CSV file is in the same directory as app.py when deploying.
-CSV_PATH = "data.csv" # Or whatever you name your CSV file
 if not os.path.exists(CSV_PATH):
     raise FileNotFoundError(
-        f"The data file was not found at {CSV_PATH}. "
-        "Please upload your data CSV and name it correctly."
     )
 df = pd.read_csv(CSV_PATH, encoding="ISO-8859-1")
 case_docs: List[Document] = []
 for _, row in df.iterrows():
     text = str(row.get("text_chunk", ""))
     meta = {
         "source_dataset": str(row.get("source_dataset", ""))[:50],
-        "category": str(row.get("category", ""))[:100],
-        "orig_query": str(row.get("original_query", ""))[:200],
-        "orig_solution": str(row.get("original_solution", ""))[:200]
     }
     case_docs.append(Document(text=text, metadata=meta))
 logger.info(f"Loaded {len(case_docs)} documents from {CSV_PATH}.")
-# --- CELL 4: Create Vector Index ---
-# Embedding model
-device = "cuda" if torch.cuda.is_available() else "cpu"
-logger.info(f"Using device: {device}")
-embed_model = HuggingFaceEmbedding(
-    model_name="BAAI/bge-large-en-v1.5",
-    device=device
-)
-# Node parser for chunking
-node_parser = SentenceSplitter(
-    chunk_size=1024,
-    chunk_overlap=100,
-    paragraph_separator="\n\n"
-)
-# Qdrant-backed vector store
 vector_store = QdrantVectorStore(
     client=qdrant,
     collection_name=COLLECTION_NAME,
     prefer_grpc=False
 )
-# Build the index (will upload to Qdrant if collection doesn't exist)
-# Note: This step can be slow the first time it's run.
-logger.info("Initializing VectorStoreIndex...")
-index = VectorStoreIndex.from_documents(
-    documents=case_docs,
-    storage_context=StorageContext.from_defaults(vector_store=vector_store),
-    embed_model=embed_model,
-    node_parser=node_parser,
-    show_progress=True
-)
-logger.info("VectorStoreIndex initialized successfully.")
 # --- CELL 5: Define Hybrid Retriever & Reranker ---
-Settings.llm = None # We will use our own LLM pipeline
 class HybridRetriever(BaseRetriever):
     def __init__(self, dense, bm25):
         super().__init__()
         self.dense = dense
         self.bm25 = bm25
     def _retrieve(self, query_bundle: QueryBundle) -> List[Document]:
         dense_hits = self.dense.retrieve(query_bundle)
         bm25_hits = self.bm25.retrieve(query_bundle)
         combined = dense_hits + bm25_hits
         unique = []
         seen = set()
@@ -159,7 +123,7 @@ class HybridRetriever(BaseRetriever):
 # Instantiate retrievers
 dense_retriever = index.as_retriever(similarity_top_k=10)
-bm25_nodes = node_parser.get_nodes_from_documents(case_docs)
 bm25_retriever = BM25Retriever.from_defaults(
     nodes=bm25_nodes,
     similarity_top_k=10,
@@ -169,7 +133,7 @@ hybrid_retriever = HybridRetriever(dense=dense_retriever, bm25=bm25_retriever)
 reranker = SentenceTransformerRerank(
     model="cross-encoder/ms-marco-MiniLM-L-2-v2",
     top_n=4,
-    device=device
 )
 query_engine = index.as_query_engine(
@@ -178,7 +142,6 @@ query_engine = index.as_query_engine(
     llm=None
 )
 # --- CELL 6: Load & Quantize LLaMA Model ---
 quant_config = BitsAndBytesConfig(
     load_in_4bit=True,
@@ -204,7 +167,6 @@ generator = pipeline(
     device_map="auto"
 )
 # --- CELL 7: Chat Logic and Prompting ---
 SYSTEM_PROMPT = (
     "You are a friendly and helpful Level 0 IT Support Assistant. "
@@ -230,26 +192,22 @@ GREETINGS = {"hello", "hi", "hey", "good morning", "good afternoon", "good eveni
 def format_history(history):
     return "".join(
-        f"{HDR['usr']}\n{u}{HDR['eot']}{HDR['ast']}\n{a}{HDR['eot']}"
-        for u, a in history
     )
 def build_prompt(query, context, history):
     if query.lower().strip() in GREETINGS:
         return None, "greeting"
     words = query.strip().split()
     if len(words) < 3:
         return (
             "Could you provide more detail about what you're experiencing? "
             "Any error messages or steps you've tried will help me assist you."
         ), "clarify"
     context_str = "\n---\n".join(node.text for node in context) if context else "No context provided."
     hist_str = format_history(history[-3:])
     prompt = (
-        f"<|begin_of_text|>"
         f"{HDR['sys']}\n{SYSTEM_PROMPT}{HDR['eot']}"
         f"{hist_str}"
         f"{HDR['usr']}\nContext:\n{context_str}\n\nQuestion: {query}{HDR['eot']}"
@@ -260,22 +218,17 @@ def build_prompt(query, context, history):
 def chat(query, temperature=0.7, top_p=0.9):
     global chat_history
     prompt, mode = build_prompt(query, [], chat_history)
     if mode == "greeting":
         reply = "Hello there! How can I help with your IT support question today?"
         chat_history.append((query, reply))
         return reply
     if mode == "clarify":
         reply = prompt
         chat_history.append((query, reply))
         return reply
     response = query_engine.query(query)
     context_nodes = response.source_nodes
     prompt, _ = build_prompt(query, context_nodes, chat_history)
     gen_args = {
         "do_sample": True,
         "max_new_tokens": 350,
@@ -283,19 +236,15 @@ def chat(query, temperature=0.7, top_p=0.9):
         "top_p": top_p,
         "eos_token_id": tokenizer.eos_token_id
     }
     output = generator(prompt, **gen_args)
     text = output[0]["generated_text"]
     answer = text.split(HDR["ast"])[-1].strip()
     chat_history.append((query, answer))
     return answer, context_nodes
 # --- CELL 8: Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft(), title="💬 Level 0 IT Support Chatbot") as demo:
     gr.Markdown("### 🤖 Level 0 IT Support Chatbot (RAG + Qdrant + LLaMA3)")
     with gr.Row():
         with gr.Column(scale=3):
             chatbot = gr.Chatbot(label="Chat", height=500, bubble_full_width=False)
@@ -310,35 +259,25 @@ with gr.Blocks(theme=gr.themes.Soft(), title="💬 Level 0 IT Support Chatbot")
             top_p_slider = gr.Slider(0.0, 1.0, value=0.9, step=0.01, label="Top-p")
             with gr.Accordion("Show Retrieved Context", open=False):
                 context_display = gr.Textbox(label="Retrieved Context", interactive=False, lines=10)
     def respond(message, history, k, temp, top_p):
         global chat_history
-        # Update retriever k value
         dense_retriever.similarity_top_k = k
         bm25_retriever.similarity_top_k = k
-        # Get response and context
         reply, context_nodes = chat(message, temperature=temp, top_p=top_p)
-        # Format context for display
-        ctx_text = "\n\n---\n\n".join([f"**Source {i+1} (Score: {node.score:.4f})**\n{node.text}" for i, node in enumerate(context_nodes)])
         history.append([message, reply])
         return "", history, ctx_text
     def clear_chat():
         global chat_history
         chat_history = []
         return [], None
-    # Event Listeners
     inp.submit(respond, [inp, chatbot, k_slider, temp_slider, top_p_slider], [inp, chatbot, context_display])
     send_btn.click(respond, [inp, chatbot, k_slider, temp_slider, top_p_slider], [inp, chatbot, context_display])
     clear_btn.click(clear_chat, None, [chatbot, context_display], queue=False)
-# --- Main execution block ---
 if __name__ == "__main__":
-    # The launch() command will start a web server that serves the interface.
-    # It will block the script from exiting.
     logger.info("Launching Gradio interface...")
     demo.launch(server_name="0.0.0.0", server_port=7860)

 """
 IT Support Chatbot Application
 - Converts the original Colab notebook into a deployable Gradio app.
+- Connects to a prebuilt Qdrant index instead of rebuilding it on startup.
 - Uses environment variables for API keys.
 - Implements a RAG pipeline with LLaMA 3.1, Qdrant, and Hybrid Retrieval.
 """
 # --- CELL 1: Imports, Logging & Reproducibility ---
 import os
 import random
 )
 logger = logging.getLogger(__name__)
+# Apply nest_asyncio for environments like notebooks\ nnest_asyncio.apply()
 # Reproducibility
 SEED = 42
 np.random.seed(SEED)
 torch.manual_seed(SEED)
+# --- CELL 0: Load secrets from environment variables ---
+QDRANT_HOST    = os.getenv("QDRANT_HOST")
 QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
+HF_TOKEN       = os.getenv("HF_TOKEN")
 if not all([QDRANT_HOST, QDRANT_API_KEY, HF_TOKEN]):
     raise EnvironmentError(
 )
 COLLECTION_NAME = "it_support_rag"
 # --- CELL 3: Load Dataset & Build Documents ---
+CSV_PATH = "data.csv"
 if not os.path.exists(CSV_PATH):
     raise FileNotFoundError(
+        f"The data file was not found at {CSV_PATH}. Please upload your data CSV and name it correctly."
     )
 df = pd.read_csv(CSV_PATH, encoding="ISO-8859-1")
 case_docs: List[Document] = []
 for _, row in df.iterrows():
     text = str(row.get("text_chunk", ""))
     meta = {
         "source_dataset": str(row.get("source_dataset", ""))[:50],
+        "category":       str(row.get("category", ""))[:100],
+        "orig_query":     str(row.get("original_query", ""))[:200],
+        "orig_solution":  str(row.get("original_solution", ""))[:200],
     }
     case_docs.append(Document(text=text, metadata=meta))
 logger.info(f"Loaded {len(case_docs)} documents from {CSV_PATH}.")
+# --- CELL 4: Load prebuilt Vector Index ---
 vector_store = QdrantVectorStore(
     client=qdrant,
     collection_name=COLLECTION_NAME,
     prefer_grpc=False
 )
+storage_context = StorageContext.from_defaults(vector_store=vector_store)
+index = VectorStoreIndex.load_from_storage(storage_context)
+logger.info("✅ Loaded existing VectorStoreIndex from Qdrant")
 # --- CELL 5: Define Hybrid Retriever & Reranker ---
+Settings.llm = None  # We will use our own LLM pipeline
 class HybridRetriever(BaseRetriever):
     def __init__(self, dense, bm25):
         super().__init__()
         self.dense = dense
         self.bm25 = bm25
     def _retrieve(self, query_bundle: QueryBundle) -> List[Document]:
         dense_hits = self.dense.retrieve(query_bundle)
         bm25_hits = self.bm25.retrieve(query_bundle)
         combined = dense_hits + bm25_hits
         unique = []
         seen = set()
 # Instantiate retrievers
 dense_retriever = index.as_retriever(similarity_top_k=10)
+bm25_nodes = SentenceSplitter(chunk_size=1024, chunk_overlap=100).get_nodes_from_documents(case_docs)
 bm25_retriever = BM25Retriever.from_defaults(
     nodes=bm25_nodes,
     similarity_top_k=10,
 reranker = SentenceTransformerRerank(
     model="cross-encoder/ms-marco-MiniLM-L-2-v2",
     top_n=4,
+    device="cuda" if torch.cuda.is_available() else "cpu"
 )
 query_engine = index.as_query_engine(
     llm=None
 )
 # --- CELL 6: Load & Quantize LLaMA Model ---
 quant_config = BitsAndBytesConfig(
     load_in_4bit=True,
     device_map="auto"
 )
 # --- CELL 7: Chat Logic and Prompting ---
 SYSTEM_PROMPT = (
     "You are a friendly and helpful Level 0 IT Support Assistant. "
 def format_history(history):
     return "".join(
+        f"{HDR['usr']}\n{u}{HDR['eot']}{HDR['ast']}\n{a}{HDR['eot']}" for u, a in history
     )
 def build_prompt(query, context, history):
     if query.lower().strip() in GREETINGS:
         return None, "greeting"
     words = query.strip().split()
     if len(words) < 3:
         return (
             "Could you provide more detail about what you're experiencing? "
             "Any error messages or steps you've tried will help me assist you."
         ), "clarify"
     context_str = "\n---\n".join(node.text for node in context) if context else "No context provided."
     hist_str = format_history(history[-3:])
     prompt = (
+        "<|begin_of_text|>"
         f"{HDR['sys']}\n{SYSTEM_PROMPT}{HDR['eot']}"
         f"{hist_str}"
         f"{HDR['usr']}\nContext:\n{context_str}\n\nQuestion: {query}{HDR['eot']}"
 def chat(query, temperature=0.7, top_p=0.9):
     global chat_history
     prompt, mode = build_prompt(query, [], chat_history)
     if mode == "greeting":
         reply = "Hello there! How can I help with your IT support question today?"
         chat_history.append((query, reply))
         return reply
     if mode == "clarify":
         reply = prompt
         chat_history.append((query, reply))
         return reply
     response = query_engine.query(query)
     context_nodes = response.source_nodes
     prompt, _ = build_prompt(query, context_nodes, chat_history)
     gen_args = {
         "do_sample": True,
         "max_new_tokens": 350,
         "top_p": top_p,
         "eos_token_id": tokenizer.eos_token_id
     }
     output = generator(prompt, **gen_args)
     text = output[0]["generated_text"]
     answer = text.split(HDR["ast"])[-1].strip()
     chat_history.append((query, answer))
     return answer, context_nodes
 # --- CELL 8: Gradio Interface ---
 with gr.Blocks(theme=gr.themes.Soft(), title="💬 Level 0 IT Support Chatbot") as demo:
     gr.Markdown("### 🤖 Level 0 IT Support Chatbot (RAG + Qdrant + LLaMA3)")
     with gr.Row():
         with gr.Column(scale=3):
             chatbot = gr.Chatbot(label="Chat", height=500, bubble_full_width=False)
             top_p_slider = gr.Slider(0.0, 1.0, value=0.9, step=0.01, label="Top-p")
             with gr.Accordion("Show Retrieved Context", open=False):
                 context_display = gr.Textbox(label="Retrieved Context", interactive=False, lines=10)
     def respond(message, history, k, temp, top_p):
         global chat_history
         dense_retriever.similarity_top_k = k
         bm25_retriever.similarity_top_k = k
         reply, context_nodes = chat(message, temperature=temp, top_p=top_p)
+        ctx_text = "\n\n---\n\n".join([
+            f"**Source {i+1} (Score: {node.score:.4f})**\n{node.text}"
+            for i,node in enumerate(context_nodes)
+        ])
         history.append([message, reply])
         return "", history, ctx_text
     def clear_chat():
         global chat_history
         chat_history = []
         return [], None
     inp.submit(respond, [inp, chatbot, k_slider, temp_slider, top_p_slider], [inp, chatbot, context_display])
     send_btn.click(respond, [inp, chatbot, k_slider, temp_slider, top_p_slider], [inp, chatbot, context_display])
     clear_btn.click(clear_chat, None, [chatbot, context_display], queue=False)
 if __name__ == "__main__":
     logger.info("Launching Gradio interface...")
     demo.launch(server_name="0.0.0.0", server_port=7860)