Spaces:
Sleeping
Sleeping
hello
Browse files- analyzer.py +146 -31
- app.py +7 -5
analyzer.py
CHANGED
|
@@ -38,44 +38,159 @@ def analyze_code(code: str) -> str:
|
|
| 38 |
return response.choices[0].message.content
|
| 39 |
|
| 40 |
def parse_llm_json_response(response: str):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
try:
|
| 42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
|
| 44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 45 |
start = response.find('{')
|
| 46 |
end = response.rfind('}')
|
| 47 |
-
if start
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
#
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
#
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
json_str = re.sub(r'"(.*?)"', escape_inner_quotes, json_str)
|
| 73 |
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
except Exception as e:
|
| 77 |
-
|
| 78 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 79 |
|
| 80 |
def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
|
| 81 |
"""
|
|
|
|
| 38 |
return response.choices[0].message.content
|
| 39 |
|
| 40 |
def parse_llm_json_response(response: str):
|
| 41 |
+
"""
|
| 42 |
+
Robust JSON parser with multiple fallback strategies for LLM responses.
|
| 43 |
+
"""
|
| 44 |
+
logger.info(f"Attempting to parse LLM response: {response[:100]}...")
|
| 45 |
+
|
| 46 |
+
# Strategy 1: Try direct JSON parsing (cleanest case)
|
| 47 |
+
try:
|
| 48 |
+
# Clean the response first
|
| 49 |
+
cleaned = response.strip()
|
| 50 |
+
result = json.loads(cleaned)
|
| 51 |
+
logger.info("β
Direct JSON parsing successful")
|
| 52 |
+
return result
|
| 53 |
+
except:
|
| 54 |
+
pass
|
| 55 |
+
|
| 56 |
+
# Strategy 2: Extract JSON block from response
|
| 57 |
try:
|
| 58 |
+
# Find the first complete JSON object
|
| 59 |
+
start = response.find('{')
|
| 60 |
+
if start == -1:
|
| 61 |
+
raise ValueError("No opening brace found")
|
| 62 |
+
|
| 63 |
+
# Find matching closing brace
|
| 64 |
+
brace_count = 0
|
| 65 |
+
end = start
|
| 66 |
+
for i, char in enumerate(response[start:], start):
|
| 67 |
+
if char == '{':
|
| 68 |
+
brace_count += 1
|
| 69 |
+
elif char == '}':
|
| 70 |
+
brace_count -= 1
|
| 71 |
+
if brace_count == 0:
|
| 72 |
+
end = i
|
| 73 |
+
break
|
| 74 |
+
|
| 75 |
+
if brace_count != 0:
|
| 76 |
+
# Fallback to last closing brace
|
| 77 |
+
end = response.rfind('}')
|
| 78 |
+
if end == -1 or end < start:
|
| 79 |
+
raise ValueError("No matching closing brace found")
|
| 80 |
|
| 81 |
+
json_str = response[start:end+1]
|
| 82 |
+
result = json.loads(json_str)
|
| 83 |
+
logger.info("β
JSON block extraction successful")
|
| 84 |
+
return result
|
| 85 |
+
except Exception as e:
|
| 86 |
+
logger.warning(f"JSON block extraction failed: {e}")
|
| 87 |
+
|
| 88 |
+
# Strategy 3: Clean and fix common JSON issues
|
| 89 |
+
try:
|
| 90 |
+
# Extract JSON part
|
| 91 |
start = response.find('{')
|
| 92 |
end = response.rfind('}')
|
| 93 |
+
if start != -1 and end != -1 and end > start:
|
| 94 |
+
json_str = response[start:end+1]
|
| 95 |
+
|
| 96 |
+
# Fix common issues
|
| 97 |
+
# Replace single quotes with double quotes (but be careful with contractions)
|
| 98 |
+
json_str = re.sub(r"(?<!\\)'([^']*)'(?=\s*[,}])", r'"\1"', json_str)
|
| 99 |
+
json_str = re.sub(r"(?<!\\)'([^']*)'(?=\s*:)", r'"\1"', json_str)
|
| 100 |
+
|
| 101 |
+
# Fix unescaped quotes in values
|
| 102 |
+
json_str = re.sub(r':\s*"([^"]*)"([^",}]*)"', r': "\1\2"', json_str)
|
| 103 |
+
|
| 104 |
+
# Remove trailing commas
|
| 105 |
+
json_str = re.sub(r',(\s*[}\]])', r'\1', json_str)
|
| 106 |
+
|
| 107 |
+
# Try parsing the cleaned version
|
| 108 |
+
result = json.loads(json_str)
|
| 109 |
+
logger.info("β
JSON cleaning and fixing successful")
|
| 110 |
+
return result
|
| 111 |
+
except Exception as e:
|
| 112 |
+
logger.warning(f"JSON cleaning failed: {e}")
|
| 113 |
+
|
| 114 |
+
# Strategy 4: Manual field extraction as last resort
|
| 115 |
+
try:
|
| 116 |
+
logger.info("Attempting manual field extraction...")
|
| 117 |
+
result = {}
|
|
|
|
| 118 |
|
| 119 |
+
# Extract each field using regex patterns
|
| 120 |
+
patterns = {
|
| 121 |
+
'strength': [
|
| 122 |
+
r'"strength"\s*:\s*"([^"]*)"',
|
| 123 |
+
r"'strength'\s*:\s*'([^']*)'",
|
| 124 |
+
r'strength[:\s]+"([^"]*)"',
|
| 125 |
+
r'strength[:\s]+\'([^\']*)\''
|
| 126 |
+
],
|
| 127 |
+
'weaknesses': [
|
| 128 |
+
r'"weaknesses"\s*:\s*"([^"]*)"',
|
| 129 |
+
r"'weaknesses'\s*:\s*'([^']*)'",
|
| 130 |
+
r'weaknesses[:\s]+"([^"]*)"',
|
| 131 |
+
r'weaknesses[:\s]+\'([^\']*)\''
|
| 132 |
+
],
|
| 133 |
+
'speciality': [
|
| 134 |
+
r'"speciality"\s*:\s*"([^"]*)"',
|
| 135 |
+
r"'speciality'\s*:\s*'([^']*)'",
|
| 136 |
+
r'speciality[:\s]+"([^"]*)"',
|
| 137 |
+
r'speciality[:\s]+\'([^\']*)\''
|
| 138 |
+
],
|
| 139 |
+
'relevance rating': [
|
| 140 |
+
r'"relevance rating"\s*:\s*"([^"]*)"',
|
| 141 |
+
r"'relevance rating'\s*:\s*'([^']*)'",
|
| 142 |
+
r'relevance[^:]*rating[:\s]+"([^"]*)"',
|
| 143 |
+
r'relevance[^:]*rating[:\s]+\'([^\']*)\''
|
| 144 |
+
]
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
for field, field_patterns in patterns.items():
|
| 148 |
+
found = False
|
| 149 |
+
for pattern in field_patterns:
|
| 150 |
+
match = re.search(pattern, response, re.IGNORECASE | re.DOTALL)
|
| 151 |
+
if match:
|
| 152 |
+
value = match.group(1).strip()
|
| 153 |
+
# Clean up the extracted value
|
| 154 |
+
value = re.sub(r'\\+(["\'])', r'\1', value) # Remove excessive escaping
|
| 155 |
+
value = value.replace('\\"', '"').replace("\\'", "'")
|
| 156 |
+
result[field] = value
|
| 157 |
+
found = True
|
| 158 |
+
break
|
| 159 |
+
|
| 160 |
+
if not found:
|
| 161 |
+
result[field] = ""
|
| 162 |
+
|
| 163 |
+
# Validate relevance rating
|
| 164 |
+
valid_ratings = ['very low', 'low', 'high', 'very high']
|
| 165 |
+
if result.get('relevance rating', '').lower() not in [r.lower() for r in valid_ratings]:
|
| 166 |
+
# Try to fix common variations
|
| 167 |
+
rating = result.get('relevance rating', '').lower()
|
| 168 |
+
if 'very' in rating and 'low' in rating:
|
| 169 |
+
result['relevance rating'] = 'very low'
|
| 170 |
+
elif 'very' in rating and 'high' in rating:
|
| 171 |
+
result['relevance rating'] = 'very high'
|
| 172 |
+
elif 'low' in rating:
|
| 173 |
+
result['relevance rating'] = 'low'
|
| 174 |
+
elif 'high' in rating:
|
| 175 |
+
result['relevance rating'] = 'high'
|
| 176 |
+
else:
|
| 177 |
+
result['relevance rating'] = 'low' # Default fallback
|
| 178 |
+
|
| 179 |
+
logger.info("β
Manual field extraction successful")
|
| 180 |
+
return result
|
| 181 |
|
| 182 |
except Exception as e:
|
| 183 |
+
logger.warning(f"Manual extraction failed: {e}")
|
| 184 |
+
|
| 185 |
+
# Strategy 5: Complete fallback with empty values
|
| 186 |
+
logger.error("All JSON parsing strategies failed, returning empty structure")
|
| 187 |
+
return {
|
| 188 |
+
"strength": "Analysis could not be completed - please try again",
|
| 189 |
+
"weaknesses": "Analysis could not be completed - please try again",
|
| 190 |
+
"speciality": "Analysis could not be completed - please try again",
|
| 191 |
+
"relevance rating": "low",
|
| 192 |
+
"error": f"Failed to parse LLM response after all strategies. Raw: {response[:200]}..."
|
| 193 |
+
}
|
| 194 |
|
| 195 |
def combine_repo_files_for_llm(repo_dir="repo_files", output_file="combined_repo.txt"):
|
| 196 |
"""
|
app.py
CHANGED
|
@@ -1036,12 +1036,13 @@ def create_ui() -> gr.Blocks:
|
|
| 1036 |
return gr.update(visible=False), hf_url
|
| 1037 |
return gr.update(visible=False), ""
|
| 1038 |
|
| 1039 |
-
def handle_explore_repo(repo_id: str) -> Tuple[Any, Any]:
|
| 1040 |
-
"""Handle navigating to the repo explorer
|
| 1041 |
-
logger.info(f"Switching to repo explorer tab")
|
| 1042 |
return (
|
| 1043 |
gr.update(visible=False), # close modal
|
| 1044 |
-
gr.update(selected="repo_explorer_tab") # switch tab
|
|
|
|
| 1045 |
)
|
| 1046 |
|
| 1047 |
def handle_cancel_modal() -> Any:
|
|
@@ -1192,7 +1193,8 @@ def create_ui() -> gr.Blocks:
|
|
| 1192 |
inputs=[selected_repo_display],
|
| 1193 |
outputs=[
|
| 1194 |
repo_action_modal,
|
| 1195 |
-
tabs
|
|
|
|
| 1196 |
],
|
| 1197 |
js="() => { setTimeout(() => { window.scrollTo({top: 0, behavior: 'smooth'}); window.dispatchEvent(new Event('repoExplorerNavigation')); }, 150); }"
|
| 1198 |
)
|
|
|
|
| 1036 |
return gr.update(visible=False), hf_url
|
| 1037 |
return gr.update(visible=False), ""
|
| 1038 |
|
| 1039 |
+
def handle_explore_repo(repo_id: str) -> Tuple[Any, Any, Any]:
|
| 1040 |
+
"""Handle navigating to the repo explorer and setting the repo ID."""
|
| 1041 |
+
logger.info(f"Switching to repo explorer tab with repo: {repo_id}")
|
| 1042 |
return (
|
| 1043 |
gr.update(visible=False), # close modal
|
| 1044 |
+
gr.update(selected="repo_explorer_tab"), # switch tab
|
| 1045 |
+
gr.update(value=repo_id if repo_id else "") # set repo ID
|
| 1046 |
)
|
| 1047 |
|
| 1048 |
def handle_cancel_modal() -> Any:
|
|
|
|
| 1193 |
inputs=[selected_repo_display],
|
| 1194 |
outputs=[
|
| 1195 |
repo_action_modal,
|
| 1196 |
+
tabs,
|
| 1197 |
+
repo_components["repo_explorer_input"]
|
| 1198 |
],
|
| 1199 |
js="() => { setTimeout(() => { window.scrollTo({top: 0, behavior: 'smooth'}); window.dispatchEvent(new Event('repoExplorerNavigation')); }, 150); }"
|
| 1200 |
)
|