Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -2,607 +2,198 @@ import gradio as gr
|
|
| 2 |
import regex as re
|
| 3 |
import csv
|
| 4 |
import pandas as pd
|
| 5 |
-
from typing import
|
| 6 |
import logging
|
| 7 |
-
from
|
| 8 |
import os
|
| 9 |
-
|
| 10 |
-
from
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
parse_llm_json_response,
|
| 14 |
-
analyze_code
|
| 15 |
-
)
|
| 16 |
-
from hf_utils import download_space_repo, search_top_spaces
|
| 17 |
-
from chatbot_page import chat_with_user, extract_keywords_from_conversation
|
| 18 |
|
| 19 |
# Configure logging
|
| 20 |
-
logging.basicConfig(
|
| 21 |
-
level=logging.INFO,
|
| 22 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 23 |
-
)
|
| 24 |
logger = logging.getLogger(__name__)
|
| 25 |
|
| 26 |
# Constants
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
"Return only the keywords as a comma-separated list."
|
| 32 |
-
)
|
| 33 |
-
|
| 34 |
-
CHATBOT_INITIAL_MESSAGE = (
|
| 35 |
-
"Hello! Please tell me about your ideal Hugging Face repo. "
|
| 36 |
-
"What use case, preferred language, or features are you looking for?"
|
| 37 |
-
)
|
| 38 |
|
| 39 |
-
# State management
|
| 40 |
class AppState:
|
|
|
|
| 41 |
def __init__(self):
|
| 42 |
self.repo_ids: List[str] = []
|
| 43 |
self.current_repo_idx: int = 0
|
| 44 |
-
self.
|
| 45 |
-
self.analysis_results: Dict[str, Dict[str, Any]] = {}
|
| 46 |
-
self.chat_history: List[Tuple[str, str]] = []
|
| 47 |
-
|
| 48 |
-
def reset(self):
|
| 49 |
-
self.__init__()
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
"""Read CSV file and return as DataFrame with string dtype."""
|
| 54 |
try:
|
| 55 |
-
return pd.read_csv(
|
| 56 |
except Exception as e:
|
| 57 |
-
logger.error(f"Error reading CSV
|
| 58 |
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
| 59 |
|
| 60 |
-
def write_repos_to_csv(repo_ids: List[str]
|
| 61 |
-
"""Write
|
| 62 |
try:
|
| 63 |
-
with open(
|
| 64 |
-
writer = csv.writer(
|
| 65 |
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
| 66 |
for repo_id in repo_ids:
|
| 67 |
writer.writerow([repo_id, "", "", "", ""])
|
| 68 |
except Exception as e:
|
| 69 |
-
logger.error(f"Error writing to CSV
|
| 70 |
|
| 71 |
-
def
|
| 72 |
-
"""
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
return
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
state.current_repo_idx = 0
|
| 81 |
-
|
| 82 |
-
write_repos_to_csv(repo_ids)
|
| 83 |
-
return read_csv_as_text("repo_ids.csv")
|
| 84 |
|
| 85 |
-
def
|
| 86 |
-
"""Analyze a single repository
|
| 87 |
try:
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
else:
|
| 106 |
-
summary = f"JSON extraction: FAILED\nRaw: {llm_json.get('raw', '') if isinstance(llm_json, dict) else llm_json}"
|
| 107 |
-
|
| 108 |
-
return combined_content, summary, llm_json
|
| 109 |
-
|
| 110 |
except Exception as e:
|
| 111 |
logger.error(f"Error analyzing repo {repo_id}: {e}")
|
| 112 |
-
return
|
| 113 |
|
| 114 |
-
def
|
| 115 |
-
"""
|
| 116 |
try:
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
for idx, row in df.iterrows():
|
| 121 |
-
if row["repo id"] == repo_id:
|
| 122 |
-
if isinstance(analysis_results, dict) and "error" not in analysis_results:
|
| 123 |
-
df.at[idx, "strength"] = analysis_results.get("strength", "")
|
| 124 |
-
df.at[idx, "weaknesses"] = analysis_results.get("weaknesses", "")
|
| 125 |
-
df.at[idx, "speciality"] = analysis_results.get("speciality", "")
|
| 126 |
-
df.at[idx, "relevance rating"] = analysis_results.get("relevance rating", "")
|
| 127 |
-
updated = True
|
| 128 |
-
break
|
| 129 |
-
|
| 130 |
-
if not updated and isinstance(analysis_results, dict) and "error" not in analysis_results:
|
| 131 |
-
new_row = {
|
| 132 |
-
"repo id": repo_id,
|
| 133 |
-
"strength": analysis_results.get("strength", ""),
|
| 134 |
-
"weaknesses": analysis_results.get("weaknesses", ""),
|
| 135 |
-
"speciality": analysis_results.get("speciality", ""),
|
| 136 |
-
"relevance rating": analysis_results.get("relevance rating", "")
|
| 137 |
-
}
|
| 138 |
-
df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)
|
| 139 |
-
|
| 140 |
-
df.to_csv(csv_filename, index=False)
|
| 141 |
-
return df
|
| 142 |
-
|
| 143 |
except Exception as e:
|
| 144 |
-
logger.error(f"Error
|
| 145 |
-
return
|
| 146 |
-
|
| 147 |
-
def show_combined_repo_and_llm(state: AppState) -> Tuple[str, str, pd.DataFrame]:
|
| 148 |
-
"""Show combined repo content and LLM analysis for current repo."""
|
| 149 |
-
if not state.repo_ids:
|
| 150 |
-
return "No repo ID available. Please submit repo IDs first.", "", pd.DataFrame()
|
| 151 |
-
|
| 152 |
-
if state.current_repo_idx >= len(state.repo_ids):
|
| 153 |
-
return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
|
| 154 |
-
|
| 155 |
-
repo_id = state.repo_ids[state.current_repo_idx]
|
| 156 |
-
combined_content, summary, analysis_results = analyze_single_repo(repo_id)
|
| 157 |
-
df = update_csv_with_analysis(repo_id, analysis_results)
|
| 158 |
-
|
| 159 |
-
state.current_repo_idx += 1
|
| 160 |
-
return combined_content, summary, df
|
| 161 |
|
| 162 |
-
def keyword_search_and_update(keyword: str, state: AppState) -> pd.DataFrame:
|
| 163 |
-
"""Search for repos using keywords and update state."""
|
| 164 |
-
if not keyword:
|
| 165 |
-
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
| 166 |
-
|
| 167 |
-
keyword_list = [k.strip() for k in re.split(r'[\n,]+', keyword) if k.strip()]
|
| 168 |
-
repo_ids = []
|
| 169 |
-
|
| 170 |
-
for kw in keyword_list:
|
| 171 |
-
repo_ids.extend(search_top_spaces(kw, limit=5))
|
| 172 |
-
|
| 173 |
-
# Remove duplicates while preserving order
|
| 174 |
-
seen = set()
|
| 175 |
-
unique_repo_ids = []
|
| 176 |
-
for rid in repo_ids:
|
| 177 |
-
if rid not in seen:
|
| 178 |
-
unique_repo_ids.append(rid)
|
| 179 |
-
seen.add(rid)
|
| 180 |
-
|
| 181 |
-
state.repo_ids = unique_repo_ids
|
| 182 |
-
state.current_repo_idx = 0
|
| 183 |
-
|
| 184 |
-
write_repos_to_csv(unique_repo_ids)
|
| 185 |
-
return read_csv_as_text("repo_ids.csv")
|
| 186 |
-
|
| 187 |
-
# UI Components
|
| 188 |
def create_ui() -> gr.Blocks:
|
| 189 |
-
"""Create
|
| 190 |
state = gr.State(AppState())
|
| 191 |
|
| 192 |
with gr.Blocks(title="Hugging Face Repo Analyzer", theme=gr.themes.Soft()) as app:
|
| 193 |
gr.Markdown("# Hugging Face Repository Analyzer")
|
| 194 |
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
- Chat with an AI assistant to find the perfect repository
|
| 207 |
-
- Get detailed analysis of repositories
|
| 208 |
-
|
| 209 |
-
Click 'Start Analysis' to begin!
|
| 210 |
-
""")
|
| 211 |
-
with gr.Row():
|
| 212 |
-
start_btn = gr.Button("Start Analysis", variant="primary")
|
| 213 |
-
help_btn = gr.Button("View Help Guide", variant="secondary")
|
| 214 |
-
|
| 215 |
-
# Help Guide
|
| 216 |
-
with gr.Group(visible=False) as help_page:
|
| 217 |
-
gr.Markdown("""
|
| 218 |
-
# Help Guide
|
| 219 |
-
|
| 220 |
-
## Quick Start
|
| 221 |
-
1. Enter repository IDs or search by keywords
|
| 222 |
-
2. Start the analysis
|
| 223 |
-
3. Review the results
|
| 224 |
-
|
| 225 |
-
## Features
|
| 226 |
-
- **Repository Analysis**: Get detailed insights about repositories
|
| 227 |
-
- **Keyword Search**: Find repositories matching your criteria
|
| 228 |
-
- **AI Assistant**: Chat to find the perfect repository
|
| 229 |
-
- **Comparison**: Compare repositories side by side
|
| 230 |
-
|
| 231 |
-
## Keyboard Shortcuts
|
| 232 |
-
- `Ctrl + Enter`: Send message in chat
|
| 233 |
-
- `Ctrl + S`: Start new analysis
|
| 234 |
-
- `Ctrl + H`: Toggle help guide
|
| 235 |
-
""")
|
| 236 |
-
back_btn = gr.Button("Back to Start", variant="primary")
|
| 237 |
-
|
| 238 |
-
# Input Page
|
| 239 |
-
with gr.Group(visible=False) as input_page:
|
| 240 |
-
with gr.Row():
|
| 241 |
-
with gr.Column():
|
| 242 |
-
gr.Markdown("### Enter Repository IDs")
|
| 243 |
-
repo_id_input = gr.Textbox(
|
| 244 |
-
label="Enter repo IDs (comma or newline separated)",
|
| 245 |
-
lines=5,
|
| 246 |
-
placeholder="repo1, repo2\nrepo3"
|
| 247 |
-
)
|
| 248 |
-
submit_btn = gr.Button("Submit Repo IDs", variant="primary")
|
| 249 |
-
submit_status = gr.Textbox(label="Status", visible=False)
|
| 250 |
-
|
| 251 |
-
with gr.Column():
|
| 252 |
-
gr.Markdown("### Or Search by Keywords")
|
| 253 |
-
keyword_input = gr.Textbox(
|
| 254 |
-
label="Enter keywords to search",
|
| 255 |
-
lines=3,
|
| 256 |
-
placeholder="Enter keywords separated by commas"
|
| 257 |
-
)
|
| 258 |
-
search_btn = gr.Button("Search by Keywords", variant="primary")
|
| 259 |
-
search_status = gr.Textbox(label="Status", visible=False)
|
| 260 |
-
|
| 261 |
-
df_output = gr.Dataframe(
|
| 262 |
-
headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"],
|
| 263 |
-
datatype=["str", "str", "str", "str", "str"]
|
| 264 |
-
)
|
| 265 |
-
with gr.Row():
|
| 266 |
-
analyze_btn = gr.Button("Start Analysis", variant="primary")
|
| 267 |
-
analyze_status = gr.Textbox(label="Status", visible=False)
|
| 268 |
-
compare_btn = gr.Button("Compare Repositories", variant="secondary")
|
| 269 |
-
|
| 270 |
-
# Analysis Page
|
| 271 |
-
with gr.Group(visible=False) as analysis_page:
|
| 272 |
-
gr.Markdown("### Repository Analysis")
|
| 273 |
-
progress = gr.Slider(
|
| 274 |
-
minimum=0,
|
| 275 |
-
maximum=100,
|
| 276 |
-
value=0,
|
| 277 |
-
label="Analysis Progress",
|
| 278 |
-
interactive=False
|
| 279 |
-
)
|
| 280 |
-
with gr.Row():
|
| 281 |
-
with gr.Column():
|
| 282 |
-
content_output = gr.Textbox(label="Repository Content", lines=10)
|
| 283 |
-
with gr.Column():
|
| 284 |
-
summary_output = gr.Textbox(label="Analysis Summary", lines=10)
|
| 285 |
-
with gr.Row():
|
| 286 |
-
next_btn = gr.Button("Analyze Next Repository", variant="primary")
|
| 287 |
-
next_status = gr.Textbox(label="Status", visible=False)
|
| 288 |
-
finish_btn = gr.Button("Finish Analysis", variant="secondary")
|
| 289 |
-
export_btn = gr.Button("Export Results", variant="secondary")
|
| 290 |
-
export_status = gr.Textbox(label="Status", visible=False)
|
| 291 |
-
|
| 292 |
-
# Comparison Page
|
| 293 |
-
with gr.Group(visible=False) as comparison_page:
|
| 294 |
-
gr.Markdown("### Repository Comparison")
|
| 295 |
-
with gr.Row():
|
| 296 |
-
with gr.Column():
|
| 297 |
-
repo1_select = gr.Dropdown(
|
| 298 |
-
label="Select First Repository",
|
| 299 |
-
choices=[],
|
| 300 |
-
interactive=True
|
| 301 |
-
)
|
| 302 |
-
repo1_content = gr.Textbox(label="Repository 1 Content", lines=10)
|
| 303 |
-
repo1_summary = gr.Textbox(label="Repository 1 Summary", lines=10)
|
| 304 |
-
with gr.Column():
|
| 305 |
-
repo2_select = gr.Dropdown(
|
| 306 |
-
label="Select Second Repository",
|
| 307 |
-
choices=[],
|
| 308 |
-
interactive=True
|
| 309 |
-
)
|
| 310 |
-
repo2_content = gr.Textbox(label="Repository 2 Content", lines=10)
|
| 311 |
-
repo2_summary = gr.Textbox(label="Repository 2 Summary", lines=10)
|
| 312 |
-
compare_btn = gr.Button("Compare", variant="primary")
|
| 313 |
-
back_to_analysis_btn = gr.Button("Back to Analysis", variant="secondary")
|
| 314 |
-
|
| 315 |
-
# Chatbot Page
|
| 316 |
-
with gr.Group(visible=False) as chatbot_page:
|
| 317 |
-
gr.Markdown("### Chat with Assistant")
|
| 318 |
-
gr.Markdown("""
|
| 319 |
-
Tell me about your ideal repository. I'll help you find the perfect match!
|
| 320 |
-
What are you looking for? Consider:
|
| 321 |
-
- Your use case
|
| 322 |
-
- Preferred programming language
|
| 323 |
-
- Required features
|
| 324 |
-
- Any specific requirements
|
| 325 |
-
""")
|
| 326 |
-
chatbot = gr.Chatbot(
|
| 327 |
-
label="Chat with Assistant",
|
| 328 |
-
height=400,
|
| 329 |
-
type="messages"
|
| 330 |
-
)
|
| 331 |
-
msg = gr.Textbox(
|
| 332 |
-
label="Message",
|
| 333 |
-
placeholder="Type your message here...",
|
| 334 |
-
lines=2
|
| 335 |
-
)
|
| 336 |
-
with gr.Row():
|
| 337 |
-
send_btn = gr.Button("Send", variant="primary")
|
| 338 |
-
send_status = gr.Textbox(label="Status", visible=False)
|
| 339 |
-
end_chat_btn = gr.Button("End Chat", variant="secondary")
|
| 340 |
-
end_chat_status = gr.Textbox(label="Status", visible=False)
|
| 341 |
|
| 342 |
-
|
| 343 |
-
|
| 344 |
-
|
| 345 |
-
|
| 346 |
-
|
| 347 |
-
|
| 348 |
-
|
| 349 |
-
|
| 350 |
-
|
| 351 |
-
|
| 352 |
-
|
| 353 |
-
|
| 354 |
-
|
| 355 |
-
|
| 356 |
-
|
| 357 |
-
|
| 358 |
-
|
| 359 |
-
|
| 360 |
-
|
| 361 |
-
gr.Markdown("### Analysis History")
|
| 362 |
-
history_df = gr.Dataframe(
|
| 363 |
-
headers=["Date", "Repositories", "Keywords", "Results"],
|
| 364 |
-
datatype=["str", "str", "str", "str"]
|
| 365 |
-
)
|
| 366 |
-
back_to_results_btn = gr.Button("Back to Results", variant="primary")
|
| 367 |
-
|
| 368 |
-
# Navigation functions
|
| 369 |
-
def navigate_to(page: str) -> List[gr.update]:
|
| 370 |
-
"""Navigate to a specific page."""
|
| 371 |
-
updates = []
|
| 372 |
-
for p in ["start", "input", "analysis", "chatbot", "results", "help", "comparison", "history"]:
|
| 373 |
-
updates.append(gr.update(visible=(p == page)))
|
| 374 |
-
return updates
|
| 375 |
-
|
| 376 |
-
# Event handlers
|
| 377 |
-
start_btn.click(
|
| 378 |
-
fn=lambda: navigate_to("input"),
|
| 379 |
-
inputs=[],
|
| 380 |
-
outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
|
| 381 |
-
)
|
| 382 |
-
|
| 383 |
-
help_btn.click(
|
| 384 |
-
fn=lambda: navigate_to("help"),
|
| 385 |
-
inputs=[],
|
| 386 |
-
outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
|
| 387 |
-
)
|
| 388 |
-
|
| 389 |
-
back_btn.click(
|
| 390 |
-
fn=lambda: navigate_to("start"),
|
| 391 |
-
inputs=[],
|
| 392 |
-
outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
|
| 393 |
-
)
|
| 394 |
-
|
| 395 |
-
# Modified event handlers with status updates
|
| 396 |
-
def process_repo_input_with_status(text: str, state: AppState) -> Tuple[pd.DataFrame, str]:
|
| 397 |
-
"""Process repo input with status update."""
|
| 398 |
-
df = process_repo_input(text, state)
|
| 399 |
-
return df, ""
|
| 400 |
-
|
| 401 |
-
def keyword_search_with_status(keyword: str, state: AppState) -> Tuple[pd.DataFrame, str]:
|
| 402 |
-
"""Search keywords with status update."""
|
| 403 |
try:
|
| 404 |
-
if
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 409 |
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
results = search_top_spaces(kw, limit=5)
|
| 413 |
-
repo_ids.extend(results)
|
| 414 |
-
except Exception as e:
|
| 415 |
-
logger.error(f"Error searching for keyword {kw}: {e}")
|
| 416 |
-
continue
|
| 417 |
|
| 418 |
-
|
| 419 |
-
|
| 420 |
-
unique_repo_ids = []
|
| 421 |
-
for rid in repo_ids:
|
| 422 |
-
if rid not in seen:
|
| 423 |
-
unique_repo_ids.append(rid)
|
| 424 |
-
seen.add(rid)
|
| 425 |
|
| 426 |
-
state
|
|
|
|
| 427 |
state.current_repo_idx = 0
|
|
|
|
|
|
|
|
|
|
|
|
|
| 428 |
|
| 429 |
-
|
| 430 |
-
df = read_csv_as_text("repo_ids.csv")
|
| 431 |
-
return df, ""
|
| 432 |
|
| 433 |
except Exception as e:
|
| 434 |
-
logger.error(f"Error
|
| 435 |
-
return pd.DataFrame(
|
| 436 |
-
|
| 437 |
-
def
|
| 438 |
-
"""
|
| 439 |
-
|
| 440 |
-
|
| 441 |
-
|
| 442 |
-
|
| 443 |
-
"""Send message with status update."""
|
| 444 |
-
if not user_message:
|
| 445 |
-
return history, "", ""
|
| 446 |
-
history.append({"role": "user", "content": user_message})
|
| 447 |
-
response = chat_with_user(user_message, history, CHATBOT_SYSTEM_PROMPT)
|
| 448 |
history.append({"role": "assistant", "content": response})
|
| 449 |
-
return history, ""
|
| 450 |
-
|
| 451 |
-
def end_chat_with_status(history: List[Dict[str, str]], state: AppState) -> Tuple[List[str], gr.update, str]:
|
| 452 |
-
"""End chat and extract keywords."""
|
| 453 |
-
if not history:
|
| 454 |
-
return [], gr.update(visible=True), ""
|
| 455 |
-
keywords = extract_keywords_from_conversation(history)
|
| 456 |
-
state.generated_keywords = keywords
|
| 457 |
-
return keywords, gr.update(visible=True), ""
|
| 458 |
-
|
| 459 |
-
def export_with_status(df: pd.DataFrame) -> Tuple[str, str]:
|
| 460 |
-
"""Export with status update."""
|
| 461 |
-
result = export_results(df)
|
| 462 |
-
return result, ""
|
| 463 |
-
|
| 464 |
-
# Update event handlers with status updates
|
| 465 |
-
submit_btn.click(
|
| 466 |
-
fn=lambda: "Processing...",
|
| 467 |
-
inputs=[],
|
| 468 |
-
outputs=[submit_status]
|
| 469 |
-
).then(
|
| 470 |
-
fn=process_repo_input_with_status,
|
| 471 |
-
inputs=[repo_id_input, state],
|
| 472 |
-
outputs=[df_output, submit_status]
|
| 473 |
-
)
|
| 474 |
|
| 475 |
-
def
|
| 476 |
-
"""
|
| 477 |
-
return "
|
| 478 |
-
|
| 479 |
-
def search_complete(keyword: str, state: AppState):
|
| 480 |
-
"""Complete search operation."""
|
| 481 |
-
return keyword_search_with_status(keyword, state)
|
| 482 |
-
|
| 483 |
-
search_btn.click(
|
| 484 |
-
fn=search_click,
|
| 485 |
-
inputs=[],
|
| 486 |
-
outputs=[search_status]
|
| 487 |
-
).then(
|
| 488 |
-
fn=search_complete,
|
| 489 |
-
inputs=[keyword_input, state],
|
| 490 |
-
outputs=[df_output, search_status]
|
| 491 |
-
)
|
| 492 |
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
fn=analyze_with_status,
|
| 499 |
-
inputs=[state],
|
| 500 |
-
outputs=[content_output, summary_output, df_output, next_status]
|
| 501 |
)
|
| 502 |
|
| 503 |
send_btn.click(
|
| 504 |
-
fn=
|
| 505 |
-
inputs=[],
|
| 506 |
-
outputs=[send_status]
|
| 507 |
-
).then(
|
| 508 |
-
fn=send_message_with_status,
|
| 509 |
inputs=[msg, chatbot, state],
|
| 510 |
-
outputs=[chatbot, msg
|
| 511 |
-
)
|
| 512 |
-
|
| 513 |
-
end_chat_btn.click(
|
| 514 |
-
fn=lambda: "Processing...",
|
| 515 |
-
inputs=[],
|
| 516 |
-
outputs=[end_chat_status]
|
| 517 |
-
).then(
|
| 518 |
-
fn=end_chat_with_status,
|
| 519 |
-
inputs=[chatbot, state],
|
| 520 |
-
outputs=[gr.Textbox(label="Extracted Keywords"), results_page, end_chat_status]
|
| 521 |
-
)
|
| 522 |
-
|
| 523 |
-
export_btn.click(
|
| 524 |
-
fn=lambda: "Exporting...",
|
| 525 |
-
inputs=[],
|
| 526 |
-
outputs=[export_status]
|
| 527 |
-
).then(
|
| 528 |
-
fn=export_with_status,
|
| 529 |
-
inputs=[results_df],
|
| 530 |
-
outputs=[gr.Textbox(label="Export Status"), export_status]
|
| 531 |
-
)
|
| 532 |
-
|
| 533 |
-
restart_btn.click(
|
| 534 |
-
fn=lambda: (state.reset(), navigate_to("start")),
|
| 535 |
-
inputs=[state],
|
| 536 |
-
outputs=[start_page, input_page, analysis_page, chatbot_page, results_page]
|
| 537 |
-
)
|
| 538 |
-
|
| 539 |
-
def update_progress(current: int, total: int) -> float:
|
| 540 |
-
"""Update progress bar."""
|
| 541 |
-
return (current / total) * 100
|
| 542 |
-
|
| 543 |
-
def export_results(df: pd.DataFrame) -> str:
|
| 544 |
-
"""Export results to CSV."""
|
| 545 |
-
try:
|
| 546 |
-
filename = f"analysis_results_{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv"
|
| 547 |
-
df.to_csv(filename, index=False)
|
| 548 |
-
return f"Results exported to {filename}"
|
| 549 |
-
except Exception as e:
|
| 550 |
-
return f"Error exporting results: {e}"
|
| 551 |
-
|
| 552 |
-
def load_history() -> pd.DataFrame:
|
| 553 |
-
"""Load analysis history."""
|
| 554 |
-
try:
|
| 555 |
-
return pd.read_csv("analysis_history.csv")
|
| 556 |
-
except:
|
| 557 |
-
return pd.DataFrame(columns=["Date", "Repositories", "Keywords", "Results"])
|
| 558 |
-
|
| 559 |
-
def save_to_history(repos: List[str], keywords: List[str], results: pd.DataFrame) -> None:
|
| 560 |
-
"""Save current analysis to history."""
|
| 561 |
-
try:
|
| 562 |
-
history_df = load_history()
|
| 563 |
-
new_row = {
|
| 564 |
-
"Date": pd.Timestamp.now().strftime("%Y-%m-%d %H:%M:%S"),
|
| 565 |
-
"Repositories": ", ".join(repos),
|
| 566 |
-
"Keywords": ", ".join(keywords),
|
| 567 |
-
"Results": results.to_json()
|
| 568 |
-
}
|
| 569 |
-
history_df = pd.concat([history_df, pd.DataFrame([new_row])], ignore_index=True)
|
| 570 |
-
history_df.to_csv("analysis_history.csv", index=False)
|
| 571 |
-
except Exception as e:
|
| 572 |
-
logger.error(f"Error saving to history: {e}")
|
| 573 |
-
|
| 574 |
-
# Add new event handlers for new features
|
| 575 |
-
history_btn.click(
|
| 576 |
-
fn=lambda: (load_history(), navigate_to("history")),
|
| 577 |
-
inputs=[],
|
| 578 |
-
outputs=[history_df, start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
|
| 579 |
)
|
| 580 |
|
| 581 |
-
|
| 582 |
-
fn=
|
| 583 |
inputs=[],
|
| 584 |
-
outputs=[
|
| 585 |
-
)
|
| 586 |
-
|
| 587 |
-
compare_btn.click(
|
| 588 |
-
fn=lambda: (update_repo_choices(state), navigate_to("comparison")),
|
| 589 |
-
inputs=[state],
|
| 590 |
-
outputs=[repo1_select, repo2_select, start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
|
| 591 |
)
|
| 592 |
-
|
| 593 |
-
back_to_analysis_btn.click(
|
| 594 |
-
fn=lambda: navigate_to("analysis"),
|
| 595 |
-
inputs=[],
|
| 596 |
-
outputs=[start_page, input_page, analysis_page, chatbot_page, results_page, help_page, comparison_page, history_page]
|
| 597 |
-
)
|
| 598 |
-
|
| 599 |
return app
|
| 600 |
|
| 601 |
-
def update_repo_choices(state: AppState) -> Tuple[List[str], List[str]]:
|
| 602 |
-
"""Update repository choices for comparison."""
|
| 603 |
-
choices = state.repo_ids
|
| 604 |
-
return choices, choices
|
| 605 |
-
|
| 606 |
if __name__ == "__main__":
|
| 607 |
app = create_ui()
|
| 608 |
app.launch()
|
|
|
|
| 2 |
import regex as re
|
| 3 |
import csv
|
| 4 |
import pandas as pd
|
| 5 |
+
from typing import List, Dict, Tuple, Optional
|
| 6 |
import logging
|
| 7 |
+
from datetime import datetime
|
| 8 |
import os
|
| 9 |
+
from huggingface_hub import HfApi, SpaceCard
|
| 10 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM
|
| 11 |
+
import torch
|
| 12 |
+
import json
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 13 |
|
| 14 |
# Configure logging
|
| 15 |
+
logging.basicConfig(level=logging.INFO)
|
|
|
|
|
|
|
|
|
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
| 18 |
# Constants
|
| 19 |
+
CSV_FILE = "repo_ids.csv"
|
| 20 |
+
CHATBOT_SYSTEM_PROMPT = """You are a helpful AI assistant that analyzes Hugging Face repositories.
|
| 21 |
+
Your task is to help users understand repositories, extract key information, and provide insights.
|
| 22 |
+
Be concise, clear, and focus on the most important aspects of each repository."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
|
|
|
|
| 24 |
class AppState:
|
| 25 |
+
"""Simple state management for the application."""
|
| 26 |
def __init__(self):
|
| 27 |
self.repo_ids: List[str] = []
|
| 28 |
self.current_repo_idx: int = 0
|
| 29 |
+
self.chat_history: List[Dict[str, str]] = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
def read_csv_as_text(filename: str) -> pd.DataFrame:
|
| 32 |
+
"""Read CSV file and return as DataFrame."""
|
|
|
|
| 33 |
try:
|
| 34 |
+
return pd.read_csv(filename)
|
| 35 |
except Exception as e:
|
| 36 |
+
logger.error(f"Error reading CSV: {e}")
|
| 37 |
return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
| 38 |
|
| 39 |
+
def write_repos_to_csv(repo_ids: List[str]) -> None:
|
| 40 |
+
"""Write repository IDs to CSV file."""
|
| 41 |
try:
|
| 42 |
+
with open(CSV_FILE, 'w', newline='') as f:
|
| 43 |
+
writer = csv.writer(f)
|
| 44 |
writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
|
| 45 |
for repo_id in repo_ids:
|
| 46 |
writer.writerow([repo_id, "", "", "", ""])
|
| 47 |
except Exception as e:
|
| 48 |
+
logger.error(f"Error writing to CSV: {e}")
|
| 49 |
|
| 50 |
+
def search_top_spaces(keyword: str, limit: int = 5) -> List[str]:
|
| 51 |
+
"""Search for repositories by keyword."""
|
| 52 |
+
try:
|
| 53 |
+
api = HfApi()
|
| 54 |
+
spaces = api.list_spaces(search=keyword, limit=limit)
|
| 55 |
+
return [space.id for space in spaces]
|
| 56 |
+
except Exception as e:
|
| 57 |
+
logger.error(f"Error searching spaces: {e}")
|
| 58 |
+
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
|
| 60 |
+
def analyze_repo(repo_id: str) -> Tuple[str, str]:
|
| 61 |
+
"""Analyze a single repository."""
|
| 62 |
try:
|
| 63 |
+
api = HfApi()
|
| 64 |
+
space = api.get_space(repo_id)
|
| 65 |
+
card = SpaceCard.load(repo_id)
|
| 66 |
+
|
| 67 |
+
content = f"""
|
| 68 |
+
Repository: {repo_id}
|
| 69 |
+
Title: {card.title}
|
| 70 |
+
Description: {card.description}
|
| 71 |
+
Tags: {', '.join(card.tags)}
|
| 72 |
+
"""
|
| 73 |
+
|
| 74 |
+
summary = f"Analysis of {repo_id}:\n"
|
| 75 |
+
summary += f"- Title: {card.title}\n"
|
| 76 |
+
summary += f"- Main focus: {card.description[:200]}...\n"
|
| 77 |
+
summary += f"- Key tags: {', '.join(card.tags[:5])}\n"
|
| 78 |
+
|
| 79 |
+
return content, summary
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
except Exception as e:
|
| 81 |
logger.error(f"Error analyzing repo {repo_id}: {e}")
|
| 82 |
+
return f"Error analyzing {repo_id}", f"Error: {str(e)}"
|
| 83 |
|
| 84 |
+
def chat_with_user(message: str, history: List[Dict[str, str]], system_prompt: str) -> str:
|
| 85 |
+
"""Simple chat response."""
|
| 86 |
try:
|
| 87 |
+
# For now, return a simple response
|
| 88 |
+
return f"I understand you're asking about: {message}. How can I help you analyze this repository?"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
except Exception as e:
|
| 90 |
+
logger.error(f"Error in chat: {e}")
|
| 91 |
+
return "I apologize, but I encountered an error. Please try again."
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
def create_ui() -> gr.Blocks:
|
| 94 |
+
"""Create a simplified Gradio interface."""
|
| 95 |
state = gr.State(AppState())
|
| 96 |
|
| 97 |
with gr.Blocks(title="Hugging Face Repo Analyzer", theme=gr.themes.Soft()) as app:
|
| 98 |
gr.Markdown("# Hugging Face Repository Analyzer")
|
| 99 |
|
| 100 |
+
with gr.Row():
|
| 101 |
+
with gr.Column():
|
| 102 |
+
# Input Section
|
| 103 |
+
gr.Markdown("### Enter Repository Information")
|
| 104 |
+
repo_input = gr.Textbox(
|
| 105 |
+
label="Enter repo IDs (comma or newline separated) or keywords to search",
|
| 106 |
+
lines=5,
|
| 107 |
+
placeholder="Enter repository IDs or keywords to search"
|
| 108 |
+
)
|
| 109 |
+
submit_btn = gr.Button("Submit", variant="primary")
|
| 110 |
+
status = gr.Textbox(label="Status", visible=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
+
# Results Section
|
| 113 |
+
df_output = gr.Dataframe(
|
| 114 |
+
headers=["repo id", "strength", "weaknesses", "speciality", "relevance rating"],
|
| 115 |
+
datatype=["str", "str", "str", "str", "str"]
|
| 116 |
+
)
|
| 117 |
+
|
| 118 |
+
# Analysis Section
|
| 119 |
+
content_output = gr.Textbox(label="Repository Content", lines=10)
|
| 120 |
+
summary_output = gr.Textbox(label="Analysis Summary", lines=5)
|
| 121 |
+
|
| 122 |
+
# Chat Section
|
| 123 |
+
chatbot = gr.Chatbot(label="Chat with Assistant", height=400)
|
| 124 |
+
msg = gr.Textbox(label="Message", placeholder="Ask about the repository...")
|
| 125 |
+
with gr.Row():
|
| 126 |
+
send_btn = gr.Button("Send", variant="primary")
|
| 127 |
+
clear_btn = gr.Button("Clear Chat", variant="secondary")
|
| 128 |
+
|
| 129 |
+
def process_input(text: str, state: AppState) -> Tuple[pd.DataFrame, str, str, str]:
|
| 130 |
+
"""Process input and return results."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 131 |
try:
|
| 132 |
+
# Check if input is keywords or repo IDs
|
| 133 |
+
if any(kw in text.lower() for kw in ['search', 'find', 'look for']):
|
| 134 |
+
# Handle as keyword search
|
| 135 |
+
keywords = [k.strip() for k in re.split(r'[\n,]+', text) if k.strip()]
|
| 136 |
+
repo_ids = []
|
| 137 |
+
for kw in keywords:
|
| 138 |
+
repo_ids.extend(search_top_spaces(kw, limit=5))
|
| 139 |
+
else:
|
| 140 |
+
# Handle as repo IDs
|
| 141 |
+
repo_ids = [rid.strip() for rid in re.split(r'[\n,]+', text) if rid.strip()]
|
| 142 |
|
| 143 |
+
# Remove duplicates
|
| 144 |
+
repo_ids = list(dict.fromkeys(repo_ids))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
|
| 146 |
+
if not repo_ids:
|
| 147 |
+
return pd.DataFrame(), "No repositories found", "", ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 148 |
|
| 149 |
+
# Update state and CSV
|
| 150 |
+
state.repo_ids = repo_ids
|
| 151 |
state.current_repo_idx = 0
|
| 152 |
+
write_repos_to_csv(repo_ids)
|
| 153 |
+
|
| 154 |
+
# Get first repo analysis
|
| 155 |
+
content, summary = analyze_repo(repo_ids[0])
|
| 156 |
|
| 157 |
+
return read_csv_as_text(CSV_FILE), f"Found {len(repo_ids)} repositories", content, summary
|
|
|
|
|
|
|
| 158 |
|
| 159 |
except Exception as e:
|
| 160 |
+
logger.error(f"Error processing input: {e}")
|
| 161 |
+
return pd.DataFrame(), f"Error: {str(e)}", "", ""
|
| 162 |
+
|
| 163 |
+
def send_message(message: str, history: List[Dict[str, str]], state: AppState) -> Tuple[List[Dict[str, str]], str]:
|
| 164 |
+
"""Send message to chat."""
|
| 165 |
+
if not message:
|
| 166 |
+
return history, ""
|
| 167 |
+
history.append({"role": "user", "content": message})
|
| 168 |
+
response = chat_with_user(message, history, CHATBOT_SYSTEM_PROMPT)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
history.append({"role": "assistant", "content": response})
|
| 170 |
+
return history, ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
|
| 172 |
+
def clear_chat() -> Tuple[List[Dict[str, str]], str]:
|
| 173 |
+
"""Clear chat history."""
|
| 174 |
+
return [], ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
+
# Event handlers
|
| 177 |
+
submit_btn.click(
|
| 178 |
+
fn=process_input,
|
| 179 |
+
inputs=[repo_input, state],
|
| 180 |
+
outputs=[df_output, status, content_output, summary_output]
|
|
|
|
|
|
|
|
|
|
| 181 |
)
|
| 182 |
|
| 183 |
send_btn.click(
|
| 184 |
+
fn=send_message,
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
inputs=[msg, chatbot, state],
|
| 186 |
+
outputs=[chatbot, msg]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 187 |
)
|
| 188 |
|
| 189 |
+
clear_btn.click(
|
| 190 |
+
fn=clear_chat,
|
| 191 |
inputs=[],
|
| 192 |
+
outputs=[chatbot, msg]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
)
|
| 194 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 195 |
return app
|
| 196 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
if __name__ == "__main__":
|
| 198 |
app = create_ui()
|
| 199 |
app.launch()
|