Spaces:

raksama19
/

Test-Dolphin-PDF

Runtime error

App Files Files Community

raksama19 commited on Jul 16

Commit

1e2434f

verified ·

1 Parent(s): 84e44dc

Update app.py

Browse files

Files changed (1) hide show

app.py +13 -11

app.py CHANGED Viewed

@@ -419,8 +419,8 @@ gemini_model = None
 current_model = None  # Track which model is currently loaded
-def chunk_document(text, chunk_size=400, overlap=40):
-    """Split document into overlapping chunks for RAG"""
     words = text.split()
     chunks = []
@@ -451,10 +451,10 @@ def create_embeddings(chunks):
         print(f"Error creating embeddings: {e}")
         return None
-def retrieve_relevant_chunks(question, chunks, embeddings, top_k=2):
     """Retrieve most relevant chunks for a question"""
     if embedding_model is None or embeddings is None:
-        return chunks[:2]  # Fallback to first 2 chunks
     try:
         question_embedding = embedding_model.encode([question], show_progress_bar=False)
@@ -467,7 +467,7 @@ def retrieve_relevant_chunks(question, chunks, embeddings, top_k=2):
         return relevant_chunks
     except Exception as e:
         print(f"Error retrieving chunks: {e}")
-        return chunks[:2]  # Fallback
 def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
     """Main processing function for uploaded PDF"""
@@ -708,16 +708,18 @@ with gr.Blocks(
             if model is None:
                 return history + [[message, "❌ Failed to initialize Gemini model. Please check your GEMINI_API_KEY."]]
-            # Use RAG to get relevant chunks from markdown (smaller chunks for quota)
             if document_chunks and len(document_chunks) > 0:
-                relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=2)
                 context = "\n\n".join(relevant_chunks)
-                # Limit context size to avoid quota issues
-                if len(context) > 1000:
-                    context = context[:1000] + "..."
             else:
                 # Fallback to truncated document if RAG fails
-                context = processed_markdown[:800] + "..." if len(processed_markdown) > 800 else processed_markdown
             # Create prompt for Gemini
             prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.

 current_model = None  # Track which model is currently loaded
+def chunk_document(text, chunk_size=1024, overlap=100):
+    """Split document into overlapping chunks for RAG - optimized for API quota"""
     words = text.split()
     chunks = []
         print(f"Error creating embeddings: {e}")
         return None
+def retrieve_relevant_chunks(question, chunks, embeddings, top_k=3):
     """Retrieve most relevant chunks for a question"""
     if embedding_model is None or embeddings is None:
+        return chunks[:3]  # Fallback to first 3 chunks
     try:
         question_embedding = embedding_model.encode([question], show_progress_bar=False)
         return relevant_chunks
     except Exception as e:
         print(f"Error retrieving chunks: {e}")
+        return chunks[:3]  # Fallback
 def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
     """Main processing function for uploaded PDF"""
             if model is None:
                 return history + [[message, "❌ Failed to initialize Gemini model. Please check your GEMINI_API_KEY."]]
+            # Use RAG to get relevant chunks from markdown (balanced for performance vs quota)
             if document_chunks and len(document_chunks) > 0:
+                relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=3)
                 context = "\n\n".join(relevant_chunks)
+                # Smart truncation: aim for ~4000 chars (good context while staying under quota)
+                if len(context) > 4000:
+                    # Try to cut at sentence boundaries
+                    sentences = context[:4000].split('.')
+                    context = '.'.join(sentences[:-1]) + '...' if len(sentences) > 1 else context[:4000] + '...'
             else:
                 # Fallback to truncated document if RAG fails
+                context = processed_markdown[:4000] + "..." if len(processed_markdown) > 4000 else processed_markdown
             # Create prompt for Gemini
             prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.