|
|
""" |
|
|
DocMind - Gradio Chat Interface |
|
|
Multi-agent research assistant for arXiv papers |
|
|
""" |
|
|
|
|
|
import gradio as gr |
|
|
from retriever import PaperRetriever |
|
|
from agents import DocMindOrchestrator |
|
|
from fetch_arxiv_data import ArxivFetcher |
|
|
import os |
|
|
|
|
|
|
|
|
class DocMindApp: |
|
|
def __init__(self): |
|
|
self.retriever = None |
|
|
self.orchestrator = None |
|
|
self.setup_system() |
|
|
|
|
|
def setup_system(self): |
|
|
"""Initialize retriever and load index""" |
|
|
print("Initializing DocMind...") |
|
|
|
|
|
|
|
|
self.retriever = PaperRetriever() |
|
|
|
|
|
|
|
|
if not self.retriever.load_index(): |
|
|
print("No index found. Building new index...") |
|
|
fetcher = ArxivFetcher() |
|
|
papers = fetcher.load_papers("arxiv_papers.json") |
|
|
|
|
|
if papers: |
|
|
self.retriever.build_index(papers) |
|
|
self.retriever.save_index() |
|
|
print(f"Index built with {len(papers)} papers") |
|
|
else: |
|
|
print("β οΈ Warning: No papers found. Please run fetch_arxiv_data.py first") |
|
|
return |
|
|
|
|
|
|
|
|
self.orchestrator = DocMindOrchestrator(self.retriever) |
|
|
print("DocMind ready!") |
|
|
|
|
|
def chat( |
|
|
self, |
|
|
message: str, |
|
|
history: list, |
|
|
num_papers: int = 5, |
|
|
show_agent_logs: bool = True |
|
|
) -> str: |
|
|
""" |
|
|
Process chat message |
|
|
|
|
|
Args: |
|
|
message: User query |
|
|
history: Chat history (not used in current version) |
|
|
num_papers: Number of papers to include in response |
|
|
show_agent_logs: Whether to show agent processing logs |
|
|
|
|
|
Returns: |
|
|
Response string |
|
|
""" |
|
|
if not self.orchestrator: |
|
|
return "β οΈ System not initialized. Please run fetch_arxiv_data.py to download papers first." |
|
|
|
|
|
if not message.strip(): |
|
|
return "Please enter a question about research papers." |
|
|
|
|
|
try: |
|
|
|
|
|
response = self.orchestrator.process_query( |
|
|
message, |
|
|
top_k=num_papers * 2, |
|
|
max_papers_in_response=num_papers |
|
|
) |
|
|
|
|
|
return response |
|
|
|
|
|
except Exception as e: |
|
|
return f"β Error processing query: {str(e)}\n\nPlease try rephrasing your question." |
|
|
|
|
|
|
|
|
def create_interface(): |
|
|
"""Create Gradio chat interface""" |
|
|
|
|
|
app = DocMindApp() |
|
|
|
|
|
|
|
|
css = """ |
|
|
.gradio-container { |
|
|
font-family: 'Inter', 'Segoe UI', sans-serif; |
|
|
max-width: 1400px !important; |
|
|
} |
|
|
|
|
|
/* Header styling */ |
|
|
h1 { |
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); |
|
|
-webkit-background-clip: text; |
|
|
-webkit-text-fill-color: transparent; |
|
|
background-clip: text; |
|
|
font-weight: 700; |
|
|
font-size: 2.5em !important; |
|
|
margin-bottom: 0.5em; |
|
|
} |
|
|
|
|
|
/* Chat area improvements */ |
|
|
.message-wrap { |
|
|
padding: 1.2em !important; |
|
|
margin: 0.8em 0 !important; |
|
|
border-radius: 12px !important; |
|
|
line-height: 1.6; |
|
|
} |
|
|
|
|
|
/* User message */ |
|
|
.message-wrap.user { |
|
|
background: linear-gradient(135deg, #667eea15 0%, #764ba215 100%) !important; |
|
|
border-left: 3px solid #667eea; |
|
|
} |
|
|
|
|
|
/* Bot message */ |
|
|
.message-wrap.bot { |
|
|
background: #f8f9fa !important; |
|
|
border-left: 3px solid #28a745; |
|
|
} |
|
|
|
|
|
/* Input area */ |
|
|
.input-text textarea { |
|
|
border-radius: 12px !important; |
|
|
border: 2px solid #e0e0e0 !important; |
|
|
font-size: 1.05em !important; |
|
|
} |
|
|
|
|
|
.input-text textarea:focus { |
|
|
border-color: #667eea !important; |
|
|
box-shadow: 0 0 0 3px rgba(102, 126, 234, 0.1) !important; |
|
|
} |
|
|
|
|
|
/* Buttons */ |
|
|
.btn-primary { |
|
|
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%) !important; |
|
|
border: none !important; |
|
|
border-radius: 10px !important; |
|
|
padding: 0.8em 2em !important; |
|
|
font-weight: 600 !important; |
|
|
transition: transform 0.2s !important; |
|
|
} |
|
|
|
|
|
.btn-primary:hover { |
|
|
transform: translateY(-2px) !important; |
|
|
box-shadow: 0 4px 12px rgba(102, 126, 234, 0.4) !important; |
|
|
} |
|
|
|
|
|
/* Settings panel */ |
|
|
.settings-panel { |
|
|
background: #f8f9fa; |
|
|
border-radius: 12px; |
|
|
padding: 1.5em; |
|
|
} |
|
|
|
|
|
/* Slider */ |
|
|
input[type="range"] { |
|
|
accent-color: #667eea !important; |
|
|
} |
|
|
|
|
|
/* Example buttons */ |
|
|
.examples button { |
|
|
border-radius: 8px !important; |
|
|
border: 2px solid #e0e0e0 !important; |
|
|
padding: 0.7em 1em !important; |
|
|
transition: all 0.2s !important; |
|
|
} |
|
|
|
|
|
.examples button:hover { |
|
|
border-color: #667eea !important; |
|
|
background: #667eea10 !important; |
|
|
} |
|
|
|
|
|
/* Code blocks in responses */ |
|
|
code { |
|
|
background: #f4f4f4; |
|
|
padding: 0.2em 0.4em; |
|
|
border-radius: 4px; |
|
|
font-family: 'Courier New', monospace; |
|
|
} |
|
|
|
|
|
/* Remove footer */ |
|
|
footer { |
|
|
display: none !important; |
|
|
} |
|
|
|
|
|
/* Improve markdown rendering */ |
|
|
.markdown-body h2 { |
|
|
color: #667eea; |
|
|
border-bottom: 2px solid #667eea; |
|
|
padding-bottom: 0.3em; |
|
|
margin-top: 1.5em; |
|
|
} |
|
|
|
|
|
.markdown-body h3 { |
|
|
color: #764ba2; |
|
|
margin-top: 1.2em; |
|
|
} |
|
|
|
|
|
/* Better list styling */ |
|
|
.markdown-body ul { |
|
|
line-height: 1.8; |
|
|
} |
|
|
|
|
|
.markdown-body li { |
|
|
margin: 0.5em 0; |
|
|
} |
|
|
""" |
|
|
|
|
|
|
|
|
examples = [ |
|
|
"What are the latest methods for improving diffusion models?", |
|
|
"Summarize recent work on RLHF vs DPO for language model alignment", |
|
|
"What are the main challenges in scaling transformer models?", |
|
|
"Tell me about recent advances in vision transformers", |
|
|
"What's new in retrieval-augmented generation (RAG)?", |
|
|
] |
|
|
|
|
|
with gr.Blocks(css=css, title="DocMind - arXiv Research Assistant", theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown( |
|
|
""" |
|
|
# π§ DocMind: Multi-Agent Research Assistant |
|
|
|
|
|
Ask questions about recent AI/ML research papers from arXiv. DocMind uses a 4-agent pipeline to retrieve, read, critique, and synthesize answers. |
|
|
|
|
|
**Agent Pipeline:** π Retriever β π Reader β π Critic β β¨ Synthesizer |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(scale=7): |
|
|
chatbot = gr.Chatbot( |
|
|
label="Research Chat", |
|
|
height=550, |
|
|
type="messages", |
|
|
avatar_images=(None, "π§ "), |
|
|
bubble_full_width=False |
|
|
) |
|
|
|
|
|
with gr.Row(): |
|
|
msg = gr.Textbox( |
|
|
label="", |
|
|
placeholder="Ask about recent research papers... (e.g., 'What are the latest methods for improving diffusion models?')", |
|
|
lines=2, |
|
|
scale=9, |
|
|
show_label=False |
|
|
) |
|
|
submit = gr.Button("Send", variant="primary", scale=1, size="lg") |
|
|
|
|
|
with gr.Accordion("π‘ Example Questions", open=False): |
|
|
gr.Examples( |
|
|
examples=examples, |
|
|
inputs=msg, |
|
|
label="" |
|
|
) |
|
|
|
|
|
with gr.Column(scale=3): |
|
|
with gr.Group(): |
|
|
gr.Markdown("### βοΈ Settings") |
|
|
|
|
|
num_papers = gr.Slider( |
|
|
minimum=1, |
|
|
maximum=10, |
|
|
value=5, |
|
|
step=1, |
|
|
label="Papers to Include", |
|
|
info="More papers = more comprehensive, but slower" |
|
|
) |
|
|
|
|
|
show_logs = gr.Checkbox( |
|
|
label="Show Agent Logs", |
|
|
value=False, |
|
|
info="Display processing steps" |
|
|
) |
|
|
|
|
|
clear = gr.Button("ποΈ Clear Chat", variant="secondary", size="sm") |
|
|
|
|
|
gr.Markdown( |
|
|
""" |
|
|
--- |
|
|
### π About |
|
|
|
|
|
**How it works:** |
|
|
1. π **Retriever** finds relevant papers |
|
|
2. π **Reader** summarizes each paper |
|
|
3. π **Critic** filters low-quality results |
|
|
4. β¨ **Synthesizer** creates final answer |
|
|
|
|
|
**Data Source:** arXiv papers (AI/ML/CS) |
|
|
|
|
|
**Technology:** |
|
|
- FAISS for semantic search |
|
|
- Sentence Transformers for embeddings |
|
|
- 100 recent papers indexed |
|
|
""" |
|
|
) |
|
|
|
|
|
|
|
|
def respond(message, history, num_papers_val, show_logs_val): |
|
|
if not message.strip(): |
|
|
return history |
|
|
|
|
|
|
|
|
history.append({"role": "user", "content": message}) |
|
|
|
|
|
|
|
|
bot_response = app.chat(message, history, num_papers_val, show_logs_val) |
|
|
|
|
|
|
|
|
history.append({"role": "assistant", "content": bot_response}) |
|
|
|
|
|
return history |
|
|
|
|
|
def clear_chat(): |
|
|
return [] |
|
|
|
|
|
|
|
|
submit.click( |
|
|
respond, |
|
|
inputs=[msg, chatbot, num_papers, show_logs], |
|
|
outputs=[chatbot] |
|
|
).then( |
|
|
lambda: "", |
|
|
outputs=[msg] |
|
|
) |
|
|
|
|
|
msg.submit( |
|
|
respond, |
|
|
inputs=[msg, chatbot, num_papers, show_logs], |
|
|
outputs=[chatbot] |
|
|
).then( |
|
|
lambda: "", |
|
|
outputs=[msg] |
|
|
) |
|
|
|
|
|
clear.click(clear_chat, outputs=[chatbot]) |
|
|
|
|
|
gr.Markdown( |
|
|
""" |
|
|
<div style='text-align: center; margin-top: 2em; padding: 1em; color: #666;'> |
|
|
<small>Built with FAISS, Sentence Transformers, and Gradio β’ Powered by arXiv API</small> |
|
|
</div> |
|
|
""" |
|
|
) |
|
|
|
|
|
return demo |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo = create_interface() |
|
|
demo.launch( |
|
|
share=False, |
|
|
server_name="127.0.0.1", |
|
|
server_port=7860, |
|
|
show_error=True |
|
|
) |