Spaces:

Agents-MCP-Hackathon
/

HF_RepoSense

Sleeping

App Files Files Community

naman1102 commited on Jun 8

Commit

e297e4a

1 Parent(s): ae53812

Update app.py

Browse files

Files changed (1) hide show

app.py +159 -56

app.py CHANGED Viewed

@@ -33,11 +33,9 @@ def write_repos_to_csv(repo_ids: List[str]) -> None:
     try:
         with open(CSV_FILE, mode="w", newline='', encoding="utf-8") as csvfile:
             writer = csv.writer(csvfile)
-            writer.writerow(["repo id", "link", "strength", "weaknesses", "speciality", "relevance rating"])
             for repo_id in repo_ids:
-                # Create Hugging Face Spaces link
-                hf_link = f"https://huggingface.co/spaces/{repo_id}"
-                writer.writerow([repo_id, hf_link, "", "", "", ""])
         logger.info(f"Wrote {len(repo_ids)} repo IDs to {CSV_FILE}")
     except Exception as e:
         logger.error(f"Error writing to CSV: {e}")
@@ -67,7 +65,6 @@ def read_csv_to_dataframe() -> pd.DataFrame:
         # Format text columns for better display
         if not df.empty:
             df['repo id'] = df['repo id'].apply(lambda x: format_text_for_dataframe(x, 50))
-            # Keep link as is since it's a URL
             df['strength'] = df['strength'].apply(lambda x: format_text_for_dataframe(x, 180))
             df['weaknesses'] = df['weaknesses'].apply(lambda x: format_text_for_dataframe(x, 180))
             df['speciality'] = df['speciality'].apply(lambda x: format_text_for_dataframe(x, 150))
@@ -75,7 +72,7 @@ def read_csv_to_dataframe() -> pd.DataFrame:
         return df
     except FileNotFoundError:
-        return pd.DataFrame(columns=["repo id", "link", "strength", "weaknesses", "speciality", "relevance rating"])
     except Exception as e:
         logger.error(f"Error reading CSV: {e}")
         return pd.DataFrame()
@@ -121,16 +118,28 @@ def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") ->
                     df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
                     df.at[idx, "speciality"] = llm_json.get("speciality", "")
                     df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
-                # Ensure link is present (in case it was added later)
-                if "link" in df.columns and (pd.isna(df.at[idx, "link"]) or df.at[idx, "link"] == ""):
-                    df.at[idx, "link"] = f"https://huggingface.co/spaces/{repo_id}"
                 repo_found_in_df = True
                 break
         if not repo_found_in_df:
              logger.warning(f"Repo ID {repo_id} not found in CSV for updating.")
-        df.to_csv(CSV_FILE, index=False)
         logger.info(f"Successfully analyzed and updated CSV for {repo_id}")
         return combined_content, summary, df
@@ -284,15 +293,13 @@ def create_ui() -> gr.Blocks:
     .gr-dataframe th:nth-child(1),
     .gr-dataframe td:nth-child(1) { width: 15%; }
     .gr-dataframe th:nth-child(2),
-    .gr-dataframe td:nth-child(2) { width: 15%; }
     .gr-dataframe th:nth-child(3),
-    .gr-dataframe td:nth-child(3) { width: 20%; }
     .gr-dataframe th:nth-child(4),
     .gr-dataframe td:nth-child(4) { width: 20%; }
     .gr-dataframe th:nth-child(5),
     .gr-dataframe td:nth-child(5) { width: 15%; }
-    .gr-dataframe th:nth-child(6),
-    .gr-dataframe td:nth-child(6) { width: 15%; }
     /* Make repository names clickable */
     .gr-dataframe td:nth-child(1) {
@@ -308,18 +315,9 @@ def create_ui() -> gr.Blocks:
         transform: scale(1.02);
     }
-    /* Make links clickable and styled */
-    .gr-dataframe td:nth-child(2) {
-        cursor: pointer;
-        color: #667eea;
-        text-decoration: underline;
-        font-size: 0.9rem;
-        transition: all 0.3s ease;
-    }
-    .gr-dataframe td:nth-child(2):hover {
-        background-color: rgba(102, 126, 234, 0.1);
-        color: #764ba2;
     }
     .gr-dataframe tbody tr:hover {
@@ -435,11 +433,27 @@ def create_ui() -> gr.Blocks:
                 gr.Markdown("### 📊 Results Dashboard")
                 gr.Markdown("💡 **Tip:** Click on any repository name to explore it in detail!")
                 df_output = gr.Dataframe(
-                    headers=["Repository", "Link", "Strengths", "Weaknesses", "Speciality", "Relevance"],
                     wrap=True,
                     interactive=False  # Prevent editing but allow selection
                 )
             # --- Chatbot Tab ---
             with gr.TabItem("🤖 AI Assistant", id="chatbot_tab"):
                 gr.Markdown("### 💬 Intelligent Repository Discovery")
@@ -620,15 +634,14 @@ def create_ui() -> gr.Blocks:
             status = "Status: Keywords extracted. User requirements saved for analysis."
             return final_keywords_str, status, user_requirements
-        def handle_dataframe_select(evt: gr.SelectData, df_data) -> Tuple[str, Any]:
-            """Handle dataframe row selection and navigate to repo explorer."""
             print(f"DEBUG: Selection event triggered!")
             print(f"DEBUG: evt = {evt}")
             print(f"DEBUG: df_data type = {type(df_data)}")
-            print(f"DEBUG: df_data = {df_data}")
             if evt is None:
-                return "", gr.update()
             try:
                 # Get the selected row and column from the event
@@ -636,27 +649,23 @@ def create_ui() -> gr.Blocks:
                 col_idx = evt.index[1]
                 print(f"DEBUG: Selected row {row_idx}, column {col_idx}")
                 # Handle pandas DataFrame
                 if isinstance(df_data, pd.DataFrame) and not df_data.empty and row_idx < len(df_data):
-                    # If link column (column 1) is clicked, open the URL
-                    if col_idx == 1 and "link" in df_data.columns:
-                        link_url = df_data.iloc[row_idx, 1]  # Second column contains link
-                        print(f"DEBUG: Link clicked: {link_url}")
-                        if link_url and str(link_url).strip() and str(link_url).startswith('http'):
-                            # Return JavaScript to open link in new tab
-                            js_code = f"window.open('{link_url}', '_blank');"
-                            return "", gr.update()
-                    # For other columns, get the repository ID from the first column (repo id)
                     repo_id = df_data.iloc[row_idx, 0]  # First column contains repo id
                     print(f"DEBUG: Extracted repo_id = '{repo_id}'")
                     # Only proceed if we actually have a repository ID
                     if repo_id and str(repo_id).strip() and str(repo_id).strip() != 'nan':
                         clean_repo_id = str(repo_id).strip()
-                        logger.info(f"Navigating to repo explorer for repository: {clean_repo_id}")
-                        return clean_repo_id, gr.update(selected="repo_explorer_tab")
                 else:
                     print(f"DEBUG: df_data is not a DataFrame or row_idx {row_idx} out of range")
@@ -664,7 +673,7 @@ def create_ui() -> gr.Blocks:
                 print(f"DEBUG: Exception occurred: {e}")
                 logger.error(f"Error handling dataframe selection: {e}")
-            return "", gr.update()
         def handle_analyze_all_repos(repo_ids: List[str], user_requirements: str, progress=gr.Progress()) -> Tuple[pd.DataFrame, str, str]:
             """Analyzes all repositories in the CSV file with progress tracking."""
@@ -681,6 +690,7 @@ def create_ui() -> gr.Blocks:
                 all_summaries = []
                 successful_analyses = 0
                 failed_analyses = 0
                 for i, repo_id in enumerate(repo_ids):
                     # Update progress
@@ -692,32 +702,84 @@ def create_ui() -> gr.Blocks:
                         # Analyze the repository
                         content, summary, df = analyze_and_update_single_repo(repo_id, user_requirements)
-                        all_summaries.append(f"✅ {repo_id}: Analysis completed")
-                        successful_analyses += 1
-                        # Small delay to show progress (optional)
-                        time.sleep(0.1)
                     except Exception as e:
                         logger.error(f"Error analyzing {repo_id}: {e}")
                         all_summaries.append(f"❌ {repo_id}: Error - {str(e)[:100]}...")
                         failed_analyses += 1
                 # Complete the progress
                 progress(1.0, desc="Batch analysis completed!")
-                # Final status
                 final_status = f"🎉 Batch Analysis Complete!\n✅ Successful: {successful_analyses}/{total_repos}\n❌ Failed: {failed_analyses}/{total_repos}"
                 # Create progress summary
-                progress_summary = "\n".join(all_summaries[-10:])  # Show last 10 entries
-                if len(all_summaries) > 10:
-                    progress_summary = f"... (showing last 10 of {len(all_summaries)} repositories)\n" + progress_summary
-                # Get updated dataframe
                 updated_df = read_csv_to_dataframe()
-                logger.info(f"Batch analysis completed: {successful_analyses} successful, {failed_analyses} failed")
                 return updated_df, final_status, progress_summary
             except Exception as e:
@@ -725,6 +787,31 @@ def create_ui() -> gr.Blocks:
                 error_status = f"❌ Batch analysis failed: {e}"
                 return read_csv_to_dataframe(), error_status, ""
         # --- Component Event Wiring ---
         # Initialize chatbot with welcome message on app load
@@ -800,11 +887,27 @@ def create_ui() -> gr.Blocks:
         # Repo Explorer Tab
         setup_repo_explorer_events(repo_components, repo_states)
         # Add dataframe selection event
         df_output.select(
             fn=handle_dataframe_select,
             inputs=[df_output],
-            outputs=[repo_components["repo_explorer_input"], tabs]
         )
     return app

     try:
         with open(CSV_FILE, mode="w", newline='', encoding="utf-8") as csvfile:
             writer = csv.writer(csvfile)
+            writer.writerow(["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
             for repo_id in repo_ids:
+                writer.writerow([repo_id, "", "", "", ""])
         logger.info(f"Wrote {len(repo_ids)} repo IDs to {CSV_FILE}")
     except Exception as e:
         logger.error(f"Error writing to CSV: {e}")
         # Format text columns for better display
         if not df.empty:
             df['repo id'] = df['repo id'].apply(lambda x: format_text_for_dataframe(x, 50))
             df['strength'] = df['strength'].apply(lambda x: format_text_for_dataframe(x, 180))
             df['weaknesses'] = df['weaknesses'].apply(lambda x: format_text_for_dataframe(x, 180))
             df['speciality'] = df['speciality'].apply(lambda x: format_text_for_dataframe(x, 150))
         return df
     except FileNotFoundError:
+        return pd.DataFrame(columns=["repo id", "strength", "weaknesses", "speciality", "relevance rating"])
     except Exception as e:
         logger.error(f"Error reading CSV: {e}")
         return pd.DataFrame()
                     df.at[idx, "weaknesses"] = llm_json.get("weaknesses", "")
                     df.at[idx, "speciality"] = llm_json.get("speciality", "")
                     df.at[idx, "relevance rating"] = llm_json.get("relevance rating", "")
                 repo_found_in_df = True
                 break
         if not repo_found_in_df:
              logger.warning(f"Repo ID {repo_id} not found in CSV for updating.")
+        # Write CSV with better error handling and flushing
+        try:
+            df.to_csv(CSV_FILE, index=False)
+            # Force file system flush
+            os.sync() if hasattr(os, 'sync') else None
+            logger.info(f"Successfully updated CSV for {repo_id}")
+        except Exception as csv_error:
+            logger.error(f"Failed to write CSV for {repo_id}: {csv_error}")
+            # Try once more with a small delay
+            time.sleep(0.2)
+            try:
+                df.to_csv(CSV_FILE, index=False)
+                logger.info(f"Successfully updated CSV for {repo_id} on retry")
+            except Exception as retry_error:
+                logger.error(f"Failed to write CSV for {repo_id} on retry: {retry_error}")
         logger.info(f"Successfully analyzed and updated CSV for {repo_id}")
         return combined_content, summary, df
     .gr-dataframe th:nth-child(1),
     .gr-dataframe td:nth-child(1) { width: 15%; }
     .gr-dataframe th:nth-child(2),
+    .gr-dataframe td:nth-child(2) { width: 25%; }
     .gr-dataframe th:nth-child(3),
+    .gr-dataframe td:nth-child(3) { width: 25%; }
     .gr-dataframe th:nth-child(4),
     .gr-dataframe td:nth-child(4) { width: 20%; }
     .gr-dataframe th:nth-child(5),
     .gr-dataframe td:nth-child(5) { width: 15%; }
     /* Make repository names clickable */
     .gr-dataframe td:nth-child(1) {
         transform: scale(1.02);
     }
+    /* Remove hover effect from other cells */
+    .gr-dataframe td:nth-child(n+2) {
+        cursor: default;
     }
     .gr-dataframe tbody tr:hover {
                 gr.Markdown("### 📊 Results Dashboard")
                 gr.Markdown("💡 **Tip:** Click on any repository name to explore it in detail!")
                 df_output = gr.Dataframe(
+                    headers=["Repository", "Strengths", "Weaknesses", "Speciality", "Relevance"],
                     wrap=True,
                     interactive=False  # Prevent editing but allow selection
                 )
+                # Modal popup for repository action selection
+                with gr.Row():
+                    with gr.Column():
+                        repo_action_modal = gr.Column(visible=False)
+                        with repo_action_modal:
+                            gr.Markdown("### 🔗 Repository Actions")
+                            selected_repo_display = gr.Textbox(
+                                label="Selected Repository",
+                                interactive=False,
+                                info="Choose what you'd like to do with this repository"
+                            )
+                            with gr.Row():
+                                visit_repo_btn = gr.Button("🌐 Visit Hugging Face Space", variant="primary", size="lg")
+                                explore_repo_btn = gr.Button("🔍 Open in Repo Explorer", variant="secondary", size="lg")
+                                cancel_modal_btn = gr.Button("❌ Cancel", size="lg")
             # --- Chatbot Tab ---
             with gr.TabItem("🤖 AI Assistant", id="chatbot_tab"):
                 gr.Markdown("### 💬 Intelligent Repository Discovery")
             status = "Status: Keywords extracted. User requirements saved for analysis."
             return final_keywords_str, status, user_requirements
+        def handle_dataframe_select(evt: gr.SelectData, df_data) -> Tuple[str, Any, Any]:
+            """Handle dataframe row selection - only repo ID column triggers modal."""
             print(f"DEBUG: Selection event triggered!")
             print(f"DEBUG: evt = {evt}")
             print(f"DEBUG: df_data type = {type(df_data)}")
             if evt is None:
+                return "", gr.update(visible=False), gr.update()
             try:
                 # Get the selected row and column from the event
                 col_idx = evt.index[1]
                 print(f"DEBUG: Selected row {row_idx}, column {col_idx}")
+                # Only respond to clicks on the repo ID column (column 0)
+                if col_idx != 0:
+                    print(f"DEBUG: Clicked on column {col_idx}, ignoring (only repo ID column responds)")
+                    return "", gr.update(visible=False), gr.update()
                 # Handle pandas DataFrame
                 if isinstance(df_data, pd.DataFrame) and not df_data.empty and row_idx < len(df_data):
+                    # Get the repository ID from the first column
                     repo_id = df_data.iloc[row_idx, 0]  # First column contains repo id
                     print(f"DEBUG: Extracted repo_id = '{repo_id}'")
                     # Only proceed if we actually have a repository ID
                     if repo_id and str(repo_id).strip() and str(repo_id).strip() != 'nan':
                         clean_repo_id = str(repo_id).strip()
+                        logger.info(f"Showing modal for repository: {clean_repo_id}")
+                        # Show modal and populate selected repo
+                        return clean_repo_id, gr.update(visible=True), gr.update()
                 else:
                     print(f"DEBUG: df_data is not a DataFrame or row_idx {row_idx} out of range")
                 print(f"DEBUG: Exception occurred: {e}")
                 logger.error(f"Error handling dataframe selection: {e}")
+            return "", gr.update(visible=False), gr.update()
         def handle_analyze_all_repos(repo_ids: List[str], user_requirements: str, progress=gr.Progress()) -> Tuple[pd.DataFrame, str, str]:
             """Analyzes all repositories in the CSV file with progress tracking."""
                 all_summaries = []
                 successful_analyses = 0
                 failed_analyses = 0
+                csv_update_failures = 0
                 for i, repo_id in enumerate(repo_ids):
                     # Update progress
                         # Analyze the repository
                         content, summary, df = analyze_and_update_single_repo(repo_id, user_requirements)
+                        # Verify the CSV was actually updated by checking if the repo has analysis data
+                        updated_df = read_csv_to_dataframe()
+                        repo_updated = False
+                        for idx, row in updated_df.iterrows():
+                            if row["repo id"] == repo_id:
+                                # Check if any analysis field is populated
+                                if (row.get("strength", "").strip() or
+                                    row.get("weaknesses", "").strip() or
+                                    row.get("speciality", "").strip() or
+                                    row.get("relevance rating", "").strip()):
+                                    repo_updated = True
+                                    break
+                        if repo_updated:
+                            all_summaries.append(f"✅ {repo_id}: Analysis completed & CSV updated")
+                            successful_analyses += 1
+                        else:
+                            # CSV update failed - try once more
+                            logger.warning(f"CSV update failed for {repo_id}, attempting retry...")
+                            time.sleep(0.5)  # Wait a bit longer
+                            # Force re-read and re-update
+                            df_retry = read_csv_to_dataframe()
+                            retry_success = False
+                            # Re-parse the analysis if available
+                            if summary and "JSON extraction: SUCCESS" in summary:
+                                # Extract the analysis from summary - this is a fallback
+                                logger.info(f"Attempting to re-update CSV for {repo_id}")
+                                content_retry, summary_retry, df_retry = analyze_and_update_single_repo(repo_id, user_requirements)
+                                # Check again
+                                final_df = read_csv_to_dataframe()
+                                for idx, row in final_df.iterrows():
+                                    if row["repo id"] == repo_id:
+                                        if (row.get("strength", "").strip() or
+                                            row.get("weaknesses", "").strip() or
+                                            row.get("speciality", "").strip() or
+                                            row.get("relevance rating", "").strip()):
+                                            retry_success = True
+                                            break
+                            if retry_success:
+                                all_summaries.append(f"✅ {repo_id}: Analysis completed & CSV updated (retry)")
+                                successful_analyses += 1
+                            else:
+                                all_summaries.append(f"⚠️ {repo_id}: Analysis completed but CSV update failed")
+                                csv_update_failures += 1
+                        # Longer delay to prevent file conflicts
+                        time.sleep(0.3)
                     except Exception as e:
                         logger.error(f"Error analyzing {repo_id}: {e}")
                         all_summaries.append(f"❌ {repo_id}: Error - {str(e)[:100]}...")
                         failed_analyses += 1
+                        # Still wait to prevent rapid failures
+                        time.sleep(0.2)
                 # Complete the progress
                 progress(1.0, desc="Batch analysis completed!")
+                # Final status with detailed breakdown
                 final_status = f"🎉 Batch Analysis Complete!\n✅ Successful: {successful_analyses}/{total_repos}\n❌ Failed: {failed_analyses}/{total_repos}"
+                if csv_update_failures > 0:
+                    final_status += f"\n⚠️ CSV Update Issues: {csv_update_failures}/{total_repos}"
                 # Create progress summary
+                progress_summary = "\n".join(all_summaries[-15:])  # Show last 15 entries
+                if len(all_summaries) > 15:
+                    progress_summary = f"... (showing last 15 of {len(all_summaries)} repositories)\n" + progress_summary
+                # Get final updated dataframe
                 updated_df = read_csv_to_dataframe()
+                logger.info(f"Batch analysis completed: {successful_analyses} successful, {failed_analyses} failed, {csv_update_failures} CSV update issues")
                 return updated_df, final_status, progress_summary
             except Exception as e:
                 error_status = f"❌ Batch analysis failed: {e}"
                 return read_csv_to_dataframe(), error_status, ""
+        def handle_visit_repo(repo_id: str) -> Tuple[Any, str]:
+            """Handle visiting the Hugging Face Space for the repository."""
+            if repo_id and repo_id.strip():
+                hf_url = f"https://huggingface.co/spaces/{repo_id.strip()}"
+                logger.info(f"User chose to visit: {hf_url}")
+                # Use JavaScript to open URL in new tab
+                js_code = f"""
+                <script>
+                window.open('{hf_url}', '_blank');
+                </script>
+                """
+                return gr.update(visible=False), f"🌐 Opening: {hf_url}"
+            return gr.update(visible=False), ""
+        def handle_explore_repo(repo_id: str) -> Tuple[Any, Any, str]:
+            """Handle navigating to the repo explorer for the repository."""
+            if repo_id and repo_id.strip():
+                logger.info(f"User chose to explore: {repo_id.strip()}")
+                return gr.update(visible=False), gr.update(selected="repo_explorer_tab"), repo_id.strip()
+            return gr.update(visible=False), gr.update(), ""
+        def handle_cancel_modal() -> Any:
+            """Handle closing the modal."""
+            return gr.update(visible=False)
         # --- Component Event Wiring ---
         # Initialize chatbot with welcome message on app load
         # Repo Explorer Tab
         setup_repo_explorer_events(repo_components, repo_states)
+        # Modal button events
+        visit_repo_btn.click(
+            fn=handle_visit_repo,
+            inputs=[selected_repo_display],
+            outputs=[repo_action_modal, selected_repo_display]
+        )
+        explore_repo_btn.click(
+            fn=handle_explore_repo,
+            inputs=[selected_repo_display],
+            outputs=[repo_action_modal, tabs, repo_components["repo_explorer_input"]]
+        )
+        cancel_modal_btn.click(
+            fn=handle_cancel_modal,
+            outputs=[repo_action_modal]
+        )
         # Add dataframe selection event
         df_output.select(
             fn=handle_dataframe_select,
             inputs=[df_output],
+            outputs=[selected_repo_display, repo_action_modal, tabs]
         )
     return app