Spaces:
Sleeping
Sleeping
new
Browse files- app.py +2 -2
- app_old.py +4 -4
- hf_utils.py +26 -38
- repo_explorer.py +2 -2
app.py
CHANGED
|
@@ -9,7 +9,7 @@ import time
|
|
| 9 |
|
| 10 |
# Import core logic from other modules, as in app_old.py
|
| 11 |
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
|
| 12 |
-
from hf_utils import
|
| 13 |
from chatbot_page import chat_with_user, extract_keywords_from_conversation
|
| 14 |
from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events
|
| 15 |
|
|
@@ -196,7 +196,7 @@ def analyze_and_update_single_repo(repo_id: str, user_requirements: str = "") ->
|
|
| 196 |
"""
|
| 197 |
try:
|
| 198 |
logger.info(f"Starting analysis for repo: {repo_id}")
|
| 199 |
-
|
| 200 |
txt_path = combine_repo_files_for_llm()
|
| 201 |
|
| 202 |
with open(txt_path, "r", encoding="utf-8") as f:
|
|
|
|
| 9 |
|
| 10 |
# Import core logic from other modules, as in app_old.py
|
| 11 |
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
|
| 12 |
+
from hf_utils import download_filtered_space_files, search_top_spaces
|
| 13 |
from chatbot_page import chat_with_user, extract_keywords_from_conversation
|
| 14 |
from repo_explorer import create_repo_explorer_tab, setup_repo_explorer_events
|
| 15 |
|
|
|
|
| 196 |
"""
|
| 197 |
try:
|
| 198 |
logger.info(f"Starting analysis for repo: {repo_id}")
|
| 199 |
+
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
|
| 200 |
txt_path = combine_repo_files_for_llm()
|
| 201 |
|
| 202 |
with open(txt_path, "r", encoding="utf-8") as f:
|
app_old.py
CHANGED
|
@@ -3,7 +3,7 @@ import regex as re
|
|
| 3 |
import csv
|
| 4 |
import pandas as pd
|
| 5 |
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
|
| 6 |
-
from hf_utils import
|
| 7 |
from chatbot_page import chat_with_user, extract_keywords_from_conversation
|
| 8 |
# Import chatbot logic
|
| 9 |
from analyzer import analyze_code
|
|
@@ -98,7 +98,7 @@ def show_combined_repo_and_llm():
|
|
| 98 |
return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
|
| 99 |
repo_id = last_repo_ids[current_repo_idx]
|
| 100 |
try:
|
| 101 |
-
|
| 102 |
except Exception as e:
|
| 103 |
return f"Error downloading repo: {e}", "", read_csv_as_text("repo_ids.csv")
|
| 104 |
txt_path = combine_repo_files_for_llm()
|
|
@@ -221,7 +221,7 @@ def batch_analyze_and_select_top():
|
|
| 221 |
for idx, row in df.iterrows():
|
| 222 |
repo_id = row["repo id"]
|
| 223 |
try:
|
| 224 |
-
|
| 225 |
txt_path = combine_repo_files_for_llm()
|
| 226 |
llm_output = analyze_combined_file(txt_path)
|
| 227 |
last_start = llm_output.rfind('{')
|
|
@@ -277,7 +277,7 @@ def batch_analyze_and_select_top_for_chat(state):
|
|
| 277 |
for idx, row in df.iterrows():
|
| 278 |
repo_id = row["repo id"]
|
| 279 |
try:
|
| 280 |
-
|
| 281 |
txt_path = combine_repo_files_for_llm()
|
| 282 |
llm_output = analyze_combined_file(txt_path)
|
| 283 |
last_start = llm_output.rfind('{')
|
|
|
|
| 3 |
import csv
|
| 4 |
import pandas as pd
|
| 5 |
from analyzer import combine_repo_files_for_llm, analyze_combined_file, parse_llm_json_response
|
| 6 |
+
from hf_utils import download_filtered_space_files, search_top_spaces
|
| 7 |
from chatbot_page import chat_with_user, extract_keywords_from_conversation
|
| 8 |
# Import chatbot logic
|
| 9 |
from analyzer import analyze_code
|
|
|
|
| 98 |
return "All repo IDs have been processed.", "", read_csv_as_text("repo_ids.csv")
|
| 99 |
repo_id = last_repo_ids[current_repo_idx]
|
| 100 |
try:
|
| 101 |
+
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
|
| 102 |
except Exception as e:
|
| 103 |
return f"Error downloading repo: {e}", "", read_csv_as_text("repo_ids.csv")
|
| 104 |
txt_path = combine_repo_files_for_llm()
|
|
|
|
| 221 |
for idx, row in df.iterrows():
|
| 222 |
repo_id = row["repo id"]
|
| 223 |
try:
|
| 224 |
+
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
|
| 225 |
txt_path = combine_repo_files_for_llm()
|
| 226 |
llm_output = analyze_combined_file(txt_path)
|
| 227 |
last_start = llm_output.rfind('{')
|
|
|
|
| 277 |
for idx, row in df.iterrows():
|
| 278 |
repo_id = row["repo id"]
|
| 279 |
try:
|
| 280 |
+
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=[".py", ".md", ".txt"])
|
| 281 |
txt_path = combine_repo_files_for_llm()
|
| 282 |
llm_output = analyze_combined_file(txt_path)
|
| 283 |
last_start = llm_output.rfind('{')
|
hf_utils.py
CHANGED
|
@@ -2,58 +2,46 @@ from huggingface_hub import snapshot_download
|
|
| 2 |
import os
|
| 3 |
import shutil
|
| 4 |
|
| 5 |
-
def
|
| 6 |
"""
|
| 7 |
-
Downloads files from a Hugging Face Space repository
|
| 8 |
|
| 9 |
Args:
|
| 10 |
space_id (str): The ID of the Hugging Face Space (e.g., "naman1102/Final_Assignment_Template").
|
| 11 |
local_dir (str): Local directory to store the downloaded files.
|
| 12 |
-
file_extensions (list):
|
| 13 |
-
|
| 14 |
"""
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
|
| 20 |
-
# Download the snapshot
|
| 21 |
repo_path = snapshot_download(repo_id=space_id, repo_type="space")
|
| 22 |
|
| 23 |
-
#
|
| 24 |
if os.path.exists(local_dir):
|
| 25 |
shutil.rmtree(local_dir)
|
| 26 |
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
relative_path = os.path.relpath(source_path, repo_path)
|
| 43 |
-
dest_path = os.path.join(local_dir, relative_path)
|
| 44 |
-
|
| 45 |
-
# Create destination directory if it doesn't exist
|
| 46 |
-
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
|
| 47 |
-
|
| 48 |
-
# Copy the file
|
| 49 |
-
shutil.copy2(source_path, dest_path)
|
| 50 |
-
copied_files += 1
|
| 51 |
-
|
| 52 |
-
print(f"Filtered download complete: {copied_files} files with extensions {file_extensions} from Space '{space_id}' downloaded to: {local_dir}")
|
| 53 |
|
| 54 |
# Example usage
|
| 55 |
-
#
|
| 56 |
-
# download_space_repo("finegrain/finegrain-image-enhancer", file_extensions=['.py', '.md', '.txt']) # Downloads only .py, .md, and .txt files
|
| 57 |
|
| 58 |
from huggingface_hub import list_spaces
|
| 59 |
|
|
|
|
| 2 |
import os
|
| 3 |
import shutil
|
| 4 |
|
| 5 |
+
def download_filtered_space_files(space_id: str, local_dir: str = "repo_files", file_extensions: list = None):
|
| 6 |
"""
|
| 7 |
+
Downloads only files with specified extensions from a Hugging Face Space repository.
|
| 8 |
|
| 9 |
Args:
|
| 10 |
space_id (str): The ID of the Hugging Face Space (e.g., "naman1102/Final_Assignment_Template").
|
| 11 |
local_dir (str): Local directory to store the downloaded files.
|
| 12 |
+
file_extensions (list): List of file extensions to include (e.g., ['.py', '.md']).
|
| 13 |
+
If None, no filtering is applied (all files are downloaded).
|
| 14 |
"""
|
| 15 |
+
if not file_extensions:
|
| 16 |
+
raise ValueError("You must specify a list of file extensions to filter by.")
|
| 17 |
+
|
| 18 |
+
print(f"Downloading Space '{space_id}' and filtering for: {', '.join(file_extensions)}")
|
| 19 |
|
| 20 |
+
# Download the full snapshot to a temp directory
|
| 21 |
repo_path = snapshot_download(repo_id=space_id, repo_type="space")
|
| 22 |
|
| 23 |
+
# Clear out local_dir if it already exists
|
| 24 |
if os.path.exists(local_dir):
|
| 25 |
shutil.rmtree(local_dir)
|
| 26 |
|
| 27 |
+
os.makedirs(local_dir, exist_ok=True)
|
| 28 |
+
copied_files = 0
|
| 29 |
+
|
| 30 |
+
# Walk through the snapshot and copy only files with desired extensions
|
| 31 |
+
for root, _, files in os.walk(repo_path):
|
| 32 |
+
for file in files:
|
| 33 |
+
if any(file.endswith(ext) for ext in file_extensions):
|
| 34 |
+
src_file = os.path.join(root, file)
|
| 35 |
+
rel_path = os.path.relpath(src_file, repo_path)
|
| 36 |
+
dest_file = os.path.join(local_dir, rel_path)
|
| 37 |
+
os.makedirs(os.path.dirname(dest_file), exist_ok=True)
|
| 38 |
+
shutil.copy2(src_file, dest_file)
|
| 39 |
+
copied_files += 1
|
| 40 |
+
|
| 41 |
+
print(f"Downloaded {copied_files} filtered file(s) to: {local_dir}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
|
| 43 |
# Example usage
|
| 44 |
+
# download_filtered_space_files("finegrain/finegrain-image-enhancer", file_extensions=['.py', '.md', '.txt']) # Downloads only .py, .md, and .txt files
|
|
|
|
| 45 |
|
| 46 |
from huggingface_hub import list_spaces
|
| 47 |
|
repo_explorer.py
CHANGED
|
@@ -3,7 +3,7 @@ import os
|
|
| 3 |
import logging
|
| 4 |
from typing import List, Dict, Tuple
|
| 5 |
from analyzer import combine_repo_files_for_llm
|
| 6 |
-
from hf_utils import
|
| 7 |
|
| 8 |
# Setup logger
|
| 9 |
logger = logging.getLogger(__name__)
|
|
@@ -208,7 +208,7 @@ def handle_load_repository(repo_id: str) -> Tuple[str, str]:
|
|
| 208 |
|
| 209 |
# Download and process the repository
|
| 210 |
try:
|
| 211 |
-
|
| 212 |
combined_text_path = combine_repo_files_for_llm()
|
| 213 |
|
| 214 |
except Exception as e:
|
|
|
|
| 3 |
import logging
|
| 4 |
from typing import List, Dict, Tuple
|
| 5 |
from analyzer import combine_repo_files_for_llm
|
| 6 |
+
from hf_utils import download_filtered_space_files
|
| 7 |
|
| 8 |
# Setup logger
|
| 9 |
logger = logging.getLogger(__name__)
|
|
|
|
| 208 |
|
| 209 |
# Download and process the repository
|
| 210 |
try:
|
| 211 |
+
download_filtered_space_files(repo_id, local_dir="repo_files", file_extensions=['.py', '.md', '.txt'])
|
| 212 |
combined_text_path = combine_repo_files_for_llm()
|
| 213 |
|
| 214 |
except Exception as e:
|