Spaces:

Agents-MCP-Hackathon
/

HF_RepoSense

Sleeping

App Files Files Community

naman1102 commited on Jun 9

Commit

adcb6a8

1 Parent(s): 48d3c35

new

Browse files

Files changed (4) hide show

app.py +2 -2
app_old.py +4 -4
hf_utils.py +26 -38
repo_explorer.py +2 -2

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ import time
 # Import core logic from other modules, as in app_old.py
 from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
-from hf_utils import download_space_repo, search_top_spaces
 from chatbot_page import chat_with_user, extract_keywords_from_conversation
 from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events
@@ -196,7 +196,7 @@ def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") ->
     """
     try:
         logger.info(f"Starting analysis for repo: {repo_id}")
-        download_space_repo(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
         txt_path = combine_repo_files_for_llm()
         with open(txt_path, "r", encoding="utf-8") as f:

 # Import core logic from other modules, as in app_old.py
 from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
+from hf_utils import download_filtered_space_files, search_top_spaces
 from chatbot_page import chat_with_user, extract_keywords_from_conversation
 from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events
     """
     try:
         logger.info(f"Starting analysis for repo: {repo_id}")
+        download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
         txt_path = combine_repo_files_for_llm()
         with open(txt_path, "r", encoding="utf-8") as f:

app_old.py CHANGED Viewed

@@ -3,7 +3,7 @@ import regex as re
 import csv
 import pandas as pd
 from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
-from hf_utils import download_space_repo, search_top_spaces
 from chatbot_page import chat_with_user, extract_keywords_from_conversation
 # Import chatbot logic
 from analyzer import analyze_code
@@ -98,7 +98,7 @@ def show_combined_repo_and_llm():
         return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
     repo_id = last_repo_ids[current_repo_idx]
     try:
-        download_space_repo(repo_id, local_dir="repo_files")
     except Exception as e:
         return f"Error downloading repo: {e}", "", read_csv_as_text("repo_ids.csv")
     txt_path = combine_repo_files_for_llm()
@@ -221,7 +221,7 @@ def batch_analyze_and_select_top():
         for idx, row in df.iterrows():
             repo_id = row["repo id"]
             try:
-                download_space_repo(repo_id, local_dir="repo_files")
                 txt_path = combine_repo_files_for_llm()
                 llm_output = analyze_combined_file(txt_path)
                 last_start = llm_output.rfind('{')
@@ -277,7 +277,7 @@ def batch_analyze_and_select_top_for_chat(state):
         for idx, row in df.iterrows():
             repo_id = row["repo id"]
             try:
-                download_space_repo(repo_id, local_dir="repo_files")
                 txt_path = combine_repo_files_for_llm()
                 llm_output = analyze_combined_file(txt_path)
                 last_start = llm_output.rfind('{')

 import csv
 import pandas as pd
 from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
+from hf_utils import download_filtered_space_files, search_top_spaces
 from chatbot_page import chat_with_user, extract_keywords_from_conversation
 # Import chatbot logic
 from analyzer import analyze_code
         return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
     repo_id = last_repo_ids[current_repo_idx]
     try:
+        download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
     except Exception as e:
         return f"Error downloading repo: {e}", "", read_csv_as_text("repo_ids.csv")
     txt_path = combine_repo_files_for_llm()
         for idx, row in df.iterrows():
             repo_id = row["repo id"]
             try:
+                download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
                 txt_path = combine_repo_files_for_llm()
                 llm_output = analyze_combined_file(txt_path)
                 last_start = llm_output.rfind('{')
         for idx, row in df.iterrows():
             repo_id = row["repo id"]
             try:
+                download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
                 txt_path = combine_repo_files_for_llm()
                 llm_output = analyze_combined_file(txt_path)
                 last_start = llm_output.rfind('{')

hf_utils.py CHANGED Viewed

@@ -2,58 +2,46 @@ from huggingface_hub import snapshot_download
 import os
 import shutil
-def download_space_repo(space_id: str, local_dir: str = "repo_files", file_extensions: list = None):
     """
-    Downloads files from a Hugging Face Space repository, optionally filtering by file extensions.
     Args:
         space_id (str): The ID of the Hugging Face Space (e.g., "naman1102/Final_Assignment_Template").
         local_dir (str): Local directory to store the downloaded files.
-        file_extensions (list): Optional list of file extensions to download (e.g., ['.py', '.md']).
-                               If None, downloads all files.
     """
-    print(f"Downloading Space '{space_id}'...")
-    if file_extensions:
-        print(f"Filtering for file types: {', '.join(file_extensions)}")
-    # Download the snapshot of the space repo
     repo_path = snapshot_download(repo_id=space_id, repo_type="space")
-    # Remove existing directory if it exists
     if os.path.exists(local_dir):
         shutil.rmtree(local_dir)
-    if file_extensions is None:
-        # Download all files (original behavior)
-        shutil.copytree(repo_path, local_dir)
-        print(f"All files from Space '{space_id}' downloaded to: {local_dir}")
-    else:
-        # Filter and copy only specified file types
-        os.makedirs(local_dir, exist_ok=True)
-        copied_files = 0
-        for root, dirs, files in os.walk(repo_path):
-            for file in files:
-                # Check if file has one of the desired extensions
-                if any(file.lower().endswith(ext.lower()) for ext in file_extensions):
-                    source_path = os.path.join(root, file)
-                    # Maintain directory structure
-                    relative_path = os.path.relpath(source_path, repo_path)
-                    dest_path = os.path.join(local_dir, relative_path)
-                    # Create destination directory if it doesn't exist
-                    os.makedirs(os.path.dirname(dest_path), exist_ok=True)
-                    # Copy the file
-                    shutil.copy2(source_path, dest_path)
-                    copied_files += 1
-        print(f"Filtered download complete: {copied_files} files with extensions {file_extensions} from Space '{space_id}' downloaded to: {local_dir}")
 # Example usage
-# download_space_repo("finegrain/finegrain-image-enhancer")  # Downloads all files
-# download_space_repo("finegrain/finegrain-image-enhancer", file_extensions=['.py', '.md', '.txt'])  # Downloads only .py, .md, and .txt files
 from huggingface_hub import list_spaces

 import os
 import shutil
+def download_filtered_space_files(space_id: str, local_dir: str = "repo_files", file_extensions: list = None):
     """
+    Downloads only files with specified extensions from a Hugging Face Space repository.
     Args:
         space_id (str): The ID of the Hugging Face Space (e.g., "naman1102/Final_Assignment_Template").
         local_dir (str): Local directory to store the downloaded files.
+        file_extensions (list): List of file extensions to include (e.g., ['.py', '.md']).
+                                If None, no filtering is applied (all files are downloaded).
     """
+    if not file_extensions:
+        raise ValueError("You must specify a list of file extensions to filter by.")
+    print(f"Downloading Space '{space_id}' and filtering for: {', '.join(file_extensions)}")
+    # Download the full snapshot to a temp directory
     repo_path = snapshot_download(repo_id=space_id, repo_type="space")
+    # Clear out local_dir if it already exists
     if os.path.exists(local_dir):
         shutil.rmtree(local_dir)
+    os.makedirs(local_dir, exist_ok=True)
+    copied_files = 0
+    # Walk through the snapshot and copy only files with desired extensions
+    for root, _, files in os.walk(repo_path):
+        for file in files:
+            if any(file.endswith(ext) for ext in file_extensions):
+                src_file = os.path.join(root, file)
+                rel_path = os.path.relpath(src_file, repo_path)
+                dest_file = os.path.join(local_dir, rel_path)
+                os.makedirs(os.path.dirname(dest_file), exist_ok=True)
+                shutil.copy2(src_file, dest_file)
+                copied_files += 1
+    print(f"Downloaded {copied_files} filtered file(s) to: {local_dir}")
 # Example usage
+# download_filtered_space_files("finegrain/finegrain-image-enhancer", file_extensions=['.py', '.md', '.txt'])  # Downloads only .py, .md, and .txt files
 from huggingface_hub import list_spaces

repo_explorer.py CHANGED Viewed

@@ -3,7 +3,7 @@ import os
 import logging
 from typing import List, Dict, Tuple
 from analyzer import combine_repo_files_for_llm
-from hf_utils import download_space_repo
 # Setup logger
 logger = logging.getLogger(__name__)
@@ -208,7 +208,7 @@ def handle_load_repository(repo_id: str) -> Tuple[str, str]:
         # Download and process the repository
         try:
-            download_space_repo(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
             combined_text_path = combine_repo_files_for_llm()
         except Exception as e:

 import logging
 from typing import List, Dict, Tuple
 from analyzer import combine_repo_files_for_llm
+from hf_utils import download_filtered_space_files
 # Setup logger
 logger = logging.getLogger(__name__)
         # Download and process the repository
         try:
+            download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
             combined_text_path = combine_repo_files_for_llm()
         except Exception as e: