Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -419,8 +419,8 @@ gemini_model = None
|
|
| 419 |
current_model = None # Track which model is currently loaded
|
| 420 |
|
| 421 |
|
| 422 |
-
def chunk_document(text, chunk_size=
|
| 423 |
-
"""Split document into overlapping chunks for RAG"""
|
| 424 |
words = text.split()
|
| 425 |
chunks = []
|
| 426 |
|
|
@@ -451,10 +451,10 @@ def create_embeddings(chunks):
|
|
| 451 |
print(f"Error creating embeddings: {e}")
|
| 452 |
return None
|
| 453 |
|
| 454 |
-
def retrieve_relevant_chunks(question, chunks, embeddings, top_k=
|
| 455 |
"""Retrieve most relevant chunks for a question"""
|
| 456 |
if embedding_model is None or embeddings is None:
|
| 457 |
-
return chunks[:
|
| 458 |
|
| 459 |
try:
|
| 460 |
question_embedding = embedding_model.encode([question], show_progress_bar=False)
|
|
@@ -467,7 +467,7 @@ def retrieve_relevant_chunks(question, chunks, embeddings, top_k=2):
|
|
| 467 |
return relevant_chunks
|
| 468 |
except Exception as e:
|
| 469 |
print(f"Error retrieving chunks: {e}")
|
| 470 |
-
return chunks[:
|
| 471 |
|
| 472 |
def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
|
| 473 |
"""Main processing function for uploaded PDF"""
|
|
@@ -708,16 +708,18 @@ with gr.Blocks(
|
|
| 708 |
if model is None:
|
| 709 |
return history + [[message, "❌ Failed to initialize Gemini model. Please check your GEMINI_API_KEY."]]
|
| 710 |
|
| 711 |
-
# Use RAG to get relevant chunks from markdown (
|
| 712 |
if document_chunks and len(document_chunks) > 0:
|
| 713 |
-
relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=
|
| 714 |
context = "\n\n".join(relevant_chunks)
|
| 715 |
-
#
|
| 716 |
-
if len(context) >
|
| 717 |
-
|
|
|
|
|
|
|
| 718 |
else:
|
| 719 |
# Fallback to truncated document if RAG fails
|
| 720 |
-
context = processed_markdown[:
|
| 721 |
|
| 722 |
# Create prompt for Gemini
|
| 723 |
prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.
|
|
|
|
| 419 |
current_model = None # Track which model is currently loaded
|
| 420 |
|
| 421 |
|
| 422 |
+
def chunk_document(text, chunk_size=1024, overlap=100):
|
| 423 |
+
"""Split document into overlapping chunks for RAG - optimized for API quota"""
|
| 424 |
words = text.split()
|
| 425 |
chunks = []
|
| 426 |
|
|
|
|
| 451 |
print(f"Error creating embeddings: {e}")
|
| 452 |
return None
|
| 453 |
|
| 454 |
+
def retrieve_relevant_chunks(question, chunks, embeddings, top_k=3):
|
| 455 |
"""Retrieve most relevant chunks for a question"""
|
| 456 |
if embedding_model is None or embeddings is None:
|
| 457 |
+
return chunks[:3] # Fallback to first 3 chunks
|
| 458 |
|
| 459 |
try:
|
| 460 |
question_embedding = embedding_model.encode([question], show_progress_bar=False)
|
|
|
|
| 467 |
return relevant_chunks
|
| 468 |
except Exception as e:
|
| 469 |
print(f"Error retrieving chunks: {e}")
|
| 470 |
+
return chunks[:3] # Fallback
|
| 471 |
|
| 472 |
def process_uploaded_pdf(pdf_file, progress=gr.Progress()):
|
| 473 |
"""Main processing function for uploaded PDF"""
|
|
|
|
| 708 |
if model is None:
|
| 709 |
return history + [[message, "❌ Failed to initialize Gemini model. Please check your GEMINI_API_KEY."]]
|
| 710 |
|
| 711 |
+
# Use RAG to get relevant chunks from markdown (balanced for performance vs quota)
|
| 712 |
if document_chunks and len(document_chunks) > 0:
|
| 713 |
+
relevant_chunks = retrieve_relevant_chunks(message, document_chunks, document_embeddings, top_k=3)
|
| 714 |
context = "\n\n".join(relevant_chunks)
|
| 715 |
+
# Smart truncation: aim for ~4000 chars (good context while staying under quota)
|
| 716 |
+
if len(context) > 4000:
|
| 717 |
+
# Try to cut at sentence boundaries
|
| 718 |
+
sentences = context[:4000].split('.')
|
| 719 |
+
context = '.'.join(sentences[:-1]) + '...' if len(sentences) > 1 else context[:4000] + '...'
|
| 720 |
else:
|
| 721 |
# Fallback to truncated document if RAG fails
|
| 722 |
+
context = processed_markdown[:4000] + "..." if len(processed_markdown) > 4000 else processed_markdown
|
| 723 |
|
| 724 |
# Create prompt for Gemini
|
| 725 |
prompt = f"""You are a helpful assistant that answers questions about documents. Use the provided context to answer questions accurately and concisely.
|