Spaces:
Running
Running
add
Browse files
app.py
CHANGED
|
@@ -28,6 +28,7 @@ load_dotenv()
|
|
| 28 |
|
| 29 |
AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
|
| 30 |
REVIEW_METADATA_REPO = "SWE-Arena/review_metadata" # HuggingFace dataset for review metadata
|
|
|
|
| 31 |
LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for constructing leaderboard
|
| 32 |
UPDATE_TIME_FRAME_DAYS = 30 # Time frame for mining new reviews
|
| 33 |
|
|
@@ -1694,6 +1695,99 @@ def save_agent_to_hf(data):
|
|
| 1694 |
return False
|
| 1695 |
|
| 1696 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1697 |
|
| 1698 |
|
| 1699 |
# =============================================================================
|
|
@@ -1709,6 +1803,7 @@ def update_all_agents_incremental():
|
|
| 1709 |
2. Fetch new reviews from the last UPDATE_TIME_FRAME_DAYS days
|
| 1710 |
3. Save all updated/new metadata back to HuggingFace
|
| 1711 |
4. Reload statistics from updated metadata
|
|
|
|
| 1712 |
"""
|
| 1713 |
print(f"\n{'='*80}")
|
| 1714 |
print(f"π Incremental Update started at {datetime.now(timezone.utc).isoformat()}")
|
|
@@ -1719,14 +1814,24 @@ def update_all_agents_incremental():
|
|
| 1719 |
fetch_and_update_weekly_reviews()
|
| 1720 |
|
| 1721 |
# Reload statistics from updated metadata
|
| 1722 |
-
print(f"\nπ
|
| 1723 |
-
construct_leaderboard_from_metadata()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1724 |
|
| 1725 |
print(f"\n{'='*80}")
|
| 1726 |
print(f"π Update Summary:")
|
| 1727 |
print(f" β Updated existing review statuses")
|
| 1728 |
print(f" β Fetched new reviews from last {UPDATE_TIME_FRAME_DAYS} days")
|
| 1729 |
-
print(f" β
|
|
|
|
|
|
|
| 1730 |
print(f"{'='*80}")
|
| 1731 |
|
| 1732 |
print(f"\nβ
Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
|
|
@@ -1797,7 +1902,36 @@ def create_monthly_metrics_plot(top_n=None):
|
|
| 1797 |
Args:
|
| 1798 |
top_n: If specified, only show metrics for the top N agents by total reviews.
|
| 1799 |
"""
|
| 1800 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1801 |
|
| 1802 |
if not metrics['agents'] or not metrics['months']:
|
| 1803 |
# Return an empty figure with a message
|
|
@@ -1919,11 +2053,20 @@ def create_monthly_metrics_plot(top_n=None):
|
|
| 1919 |
|
| 1920 |
def get_leaderboard_dataframe():
|
| 1921 |
"""
|
| 1922 |
-
|
|
|
|
| 1923 |
Returns formatted DataFrame sorted by total reviews.
|
| 1924 |
"""
|
| 1925 |
-
#
|
| 1926 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1927 |
|
| 1928 |
print(f"π Cache dict size: {len(cache_dict)}")
|
| 1929 |
|
|
@@ -2021,6 +2164,16 @@ def submit_agent(identifier, agent_name, developer, website):
|
|
| 2021 |
if not save_agent_to_hf(submission):
|
| 2022 |
return "β Failed to save submission", get_leaderboard_dataframe()
|
| 2023 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2024 |
# Return success message - data will be populated by daily incremental updates
|
| 2025 |
return f"β
Successfully submitted {agent_name}! Review data will be populated by the next daily incremental update.", get_leaderboard_dataframe()
|
| 2026 |
|
|
@@ -2139,10 +2292,54 @@ def fetch_and_update_weekly_reviews():
|
|
| 2139 |
continue
|
| 2140 |
|
| 2141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2142 |
# =============================================================================
|
| 2143 |
# GRADIO APPLICATION
|
| 2144 |
# =============================================================================
|
| 2145 |
|
|
|
|
|
|
|
|
|
|
| 2146 |
# Start APScheduler for incremental updates at 12:00 AM UTC every Monday
|
| 2147 |
scheduler = BackgroundScheduler(timezone="UTC")
|
| 2148 |
scheduler.add_job(
|
|
@@ -2156,7 +2353,7 @@ scheduler.start()
|
|
| 2156 |
print(f"\n{'='*80}")
|
| 2157 |
print(f"β Scheduler initialized successfully")
|
| 2158 |
print(f"βοΈ Mining schedule: Every Monday at 12:00 AM UTC")
|
| 2159 |
-
print(f"π₯ On startup:
|
| 2160 |
print(f"{'='*80}\n")
|
| 2161 |
|
| 2162 |
# Create Gradio interface
|
|
|
|
| 28 |
|
| 29 |
AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
|
| 30 |
REVIEW_METADATA_REPO = "SWE-Arena/review_metadata" # HuggingFace dataset for review metadata
|
| 31 |
+
LEADERBOARD_REPO = "SWE-Arena/swe_leaderboard" # HuggingFace dataset for leaderboard data
|
| 32 |
LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for constructing leaderboard
|
| 33 |
UPDATE_TIME_FRAME_DAYS = 30 # Time frame for mining new reviews
|
| 34 |
|
|
|
|
| 1695 |
return False
|
| 1696 |
|
| 1697 |
|
| 1698 |
+
def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
|
| 1699 |
+
"""
|
| 1700 |
+
Save leaderboard data and monthly metrics to HuggingFace dataset as swe-review.json.
|
| 1701 |
+
|
| 1702 |
+
Args:
|
| 1703 |
+
leaderboard_dict: Dictionary of agent stats from construct_leaderboard_from_metadata()
|
| 1704 |
+
monthly_metrics: Monthly metrics data from calculate_monthly_metrics_by_agent()
|
| 1705 |
+
|
| 1706 |
+
Returns:
|
| 1707 |
+
bool: True if successful, False otherwise
|
| 1708 |
+
"""
|
| 1709 |
+
try:
|
| 1710 |
+
api = HfApi()
|
| 1711 |
+
token = get_hf_token()
|
| 1712 |
+
|
| 1713 |
+
if not token:
|
| 1714 |
+
raise Exception("No HuggingFace token found. Please set HF_TOKEN in your Space settings.")
|
| 1715 |
+
|
| 1716 |
+
filename = "swe-review.json"
|
| 1717 |
+
|
| 1718 |
+
# Combine leaderboard and monthly metrics
|
| 1719 |
+
combined_data = {
|
| 1720 |
+
'last_updated': datetime.now(timezone.utc).isoformat(),
|
| 1721 |
+
'leaderboard': leaderboard_dict,
|
| 1722 |
+
'monthly_metrics': monthly_metrics,
|
| 1723 |
+
'metadata': {
|
| 1724 |
+
'leaderboard_time_frame_days': LEADERBOARD_TIME_FRAME_DAYS,
|
| 1725 |
+
'update_time_frame_days': UPDATE_TIME_FRAME_DAYS
|
| 1726 |
+
}
|
| 1727 |
+
}
|
| 1728 |
+
|
| 1729 |
+
# Save locally first
|
| 1730 |
+
with open(filename, 'w') as f:
|
| 1731 |
+
json.dump(combined_data, f, indent=2)
|
| 1732 |
+
|
| 1733 |
+
try:
|
| 1734 |
+
# Upload to HuggingFace
|
| 1735 |
+
upload_with_retry(
|
| 1736 |
+
api=api,
|
| 1737 |
+
path_or_fileobj=filename,
|
| 1738 |
+
path_in_repo=filename,
|
| 1739 |
+
repo_id=LEADERBOARD_REPO,
|
| 1740 |
+
repo_type="dataset",
|
| 1741 |
+
token=token
|
| 1742 |
+
)
|
| 1743 |
+
print(f"β Saved leaderboard data to HuggingFace: {filename}")
|
| 1744 |
+
return True
|
| 1745 |
+
finally:
|
| 1746 |
+
# Always clean up local file, even if upload fails
|
| 1747 |
+
if os.path.exists(filename):
|
| 1748 |
+
os.remove(filename)
|
| 1749 |
+
|
| 1750 |
+
except Exception as e:
|
| 1751 |
+
print(f"β Error saving leaderboard data: {str(e)}")
|
| 1752 |
+
import traceback
|
| 1753 |
+
traceback.print_exc()
|
| 1754 |
+
return False
|
| 1755 |
+
|
| 1756 |
+
|
| 1757 |
+
def load_leaderboard_data_from_hf():
|
| 1758 |
+
"""
|
| 1759 |
+
Load leaderboard data and monthly metrics from HuggingFace dataset.
|
| 1760 |
+
|
| 1761 |
+
Returns:
|
| 1762 |
+
dict: Dictionary with 'leaderboard', 'monthly_metrics', and 'last_updated' keys
|
| 1763 |
+
Returns None if file doesn't exist or error occurs
|
| 1764 |
+
"""
|
| 1765 |
+
try:
|
| 1766 |
+
token = get_hf_token()
|
| 1767 |
+
filename = "swe-review.json"
|
| 1768 |
+
|
| 1769 |
+
# Download file
|
| 1770 |
+
file_path = hf_hub_download(
|
| 1771 |
+
repo_id=LEADERBOARD_REPO,
|
| 1772 |
+
filename=filename,
|
| 1773 |
+
repo_type="dataset",
|
| 1774 |
+
token=token
|
| 1775 |
+
)
|
| 1776 |
+
|
| 1777 |
+
# Load JSON data
|
| 1778 |
+
with open(file_path, 'r') as f:
|
| 1779 |
+
data = json.load(f)
|
| 1780 |
+
|
| 1781 |
+
last_updated = data.get('last_updated', 'Unknown')
|
| 1782 |
+
print(f"β Loaded leaderboard data from HuggingFace (last updated: {last_updated})")
|
| 1783 |
+
|
| 1784 |
+
return data
|
| 1785 |
+
|
| 1786 |
+
except Exception as e:
|
| 1787 |
+
print(f"β οΈ Could not load leaderboard data from HuggingFace: {str(e)}")
|
| 1788 |
+
return None
|
| 1789 |
+
|
| 1790 |
+
|
| 1791 |
|
| 1792 |
|
| 1793 |
# =============================================================================
|
|
|
|
| 1803 |
2. Fetch new reviews from the last UPDATE_TIME_FRAME_DAYS days
|
| 1804 |
3. Save all updated/new metadata back to HuggingFace
|
| 1805 |
4. Reload statistics from updated metadata
|
| 1806 |
+
5. Save leaderboard and monthly metrics to swe_leaderboard dataset
|
| 1807 |
"""
|
| 1808 |
print(f"\n{'='*80}")
|
| 1809 |
print(f"π Incremental Update started at {datetime.now(timezone.utc).isoformat()}")
|
|
|
|
| 1814 |
fetch_and_update_weekly_reviews()
|
| 1815 |
|
| 1816 |
# Reload statistics from updated metadata
|
| 1817 |
+
print(f"\nπ Constructing leaderboard from updated review metadata...")
|
| 1818 |
+
leaderboard_dict = construct_leaderboard_from_metadata()
|
| 1819 |
+
|
| 1820 |
+
# Calculate monthly metrics
|
| 1821 |
+
print(f"\nπ Calculating monthly metrics...")
|
| 1822 |
+
monthly_metrics = calculate_monthly_metrics_by_agent()
|
| 1823 |
+
|
| 1824 |
+
# Save to HuggingFace leaderboard dataset
|
| 1825 |
+
print(f"\nπΎ Saving leaderboard data to HuggingFace...")
|
| 1826 |
+
save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
|
| 1827 |
|
| 1828 |
print(f"\n{'='*80}")
|
| 1829 |
print(f"π Update Summary:")
|
| 1830 |
print(f" β Updated existing review statuses")
|
| 1831 |
print(f" β Fetched new reviews from last {UPDATE_TIME_FRAME_DAYS} days")
|
| 1832 |
+
print(f" β Leaderboard constructed with {len(leaderboard_dict)} agents")
|
| 1833 |
+
print(f" β Monthly metrics calculated")
|
| 1834 |
+
print(f" β Data saved to {LEADERBOARD_REPO}")
|
| 1835 |
print(f"{'='*80}")
|
| 1836 |
|
| 1837 |
print(f"\nβ
Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
|
|
|
|
| 1902 |
Args:
|
| 1903 |
top_n: If specified, only show metrics for the top N agents by total reviews.
|
| 1904 |
"""
|
| 1905 |
+
# Try loading from saved dataset first
|
| 1906 |
+
saved_data = load_leaderboard_data_from_hf()
|
| 1907 |
+
|
| 1908 |
+
if saved_data and 'monthly_metrics' in saved_data:
|
| 1909 |
+
metrics = saved_data['monthly_metrics']
|
| 1910 |
+
print(f"π Loaded monthly metrics from saved dataset")
|
| 1911 |
+
|
| 1912 |
+
# Apply top_n filter if specified
|
| 1913 |
+
if top_n is not None and top_n > 0 and metrics.get('agents'):
|
| 1914 |
+
# Calculate total reviews for each agent
|
| 1915 |
+
agent_totals = []
|
| 1916 |
+
for agent_name in metrics['agents']:
|
| 1917 |
+
agent_data = metrics['data'].get(agent_name, {})
|
| 1918 |
+
total_reviews = sum(agent_data.get('total_reviews', []))
|
| 1919 |
+
agent_totals.append((agent_name, total_reviews))
|
| 1920 |
+
|
| 1921 |
+
# Sort by total reviews and take top N
|
| 1922 |
+
agent_totals.sort(key=lambda x: x[1], reverse=True)
|
| 1923 |
+
top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
|
| 1924 |
+
|
| 1925 |
+
# Filter metrics to only include top agents
|
| 1926 |
+
metrics = {
|
| 1927 |
+
'agents': top_agents,
|
| 1928 |
+
'months': metrics['months'],
|
| 1929 |
+
'data': {agent: metrics['data'][agent] for agent in top_agents if agent in metrics['data']}
|
| 1930 |
+
}
|
| 1931 |
+
else:
|
| 1932 |
+
# Fallback: calculate from metadata if saved data doesn't exist
|
| 1933 |
+
print(f"π Saved data not available, calculating monthly metrics from metadata...")
|
| 1934 |
+
metrics = calculate_monthly_metrics_by_agent(top_n=top_n)
|
| 1935 |
|
| 1936 |
if not metrics['agents'] or not metrics['months']:
|
| 1937 |
# Return an empty figure with a message
|
|
|
|
| 2053 |
|
| 2054 |
def get_leaderboard_dataframe():
|
| 2055 |
"""
|
| 2056 |
+
Load leaderboard from saved dataset and convert to pandas DataFrame for display.
|
| 2057 |
+
Falls back to constructing from metadata if saved data is not available.
|
| 2058 |
Returns formatted DataFrame sorted by total reviews.
|
| 2059 |
"""
|
| 2060 |
+
# Try loading from saved dataset first
|
| 2061 |
+
saved_data = load_leaderboard_data_from_hf()
|
| 2062 |
+
|
| 2063 |
+
if saved_data and 'leaderboard' in saved_data:
|
| 2064 |
+
cache_dict = saved_data['leaderboard']
|
| 2065 |
+
print(f"π Loaded leaderboard from saved dataset (last updated: {saved_data.get('last_updated', 'Unknown')})")
|
| 2066 |
+
else:
|
| 2067 |
+
# Fallback: construct from metadata if saved data doesn't exist
|
| 2068 |
+
print(f"π Saved data not available, constructing leaderboard from metadata...")
|
| 2069 |
+
cache_dict = construct_leaderboard_from_metadata()
|
| 2070 |
|
| 2071 |
print(f"π Cache dict size: {len(cache_dict)}")
|
| 2072 |
|
|
|
|
| 2164 |
if not save_agent_to_hf(submission):
|
| 2165 |
return "β Failed to save submission", get_leaderboard_dataframe()
|
| 2166 |
|
| 2167 |
+
# Reconstruct and save leaderboard data with new agent
|
| 2168 |
+
try:
|
| 2169 |
+
print(f"π Reconstructing leaderboard with new agent...")
|
| 2170 |
+
leaderboard_dict = construct_leaderboard_from_metadata()
|
| 2171 |
+
monthly_metrics = calculate_monthly_metrics_by_agent()
|
| 2172 |
+
save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
|
| 2173 |
+
print(f"β Leaderboard data updated")
|
| 2174 |
+
except Exception as e:
|
| 2175 |
+
print(f"β οΈ Failed to update leaderboard data: {str(e)}")
|
| 2176 |
+
|
| 2177 |
# Return success message - data will be populated by daily incremental updates
|
| 2178 |
return f"β
Successfully submitted {agent_name}! Review data will be populated by the next daily incremental update.", get_leaderboard_dataframe()
|
| 2179 |
|
|
|
|
| 2292 |
continue
|
| 2293 |
|
| 2294 |
|
| 2295 |
+
# =============================================================================
|
| 2296 |
+
# STARTUP & INITIALIZATION
|
| 2297 |
+
# =============================================================================
|
| 2298 |
+
|
| 2299 |
+
def initialize_leaderboard_data():
|
| 2300 |
+
"""
|
| 2301 |
+
Initialize leaderboard data on startup.
|
| 2302 |
+
If saved data doesn't exist, construct from metadata and save.
|
| 2303 |
+
"""
|
| 2304 |
+
print(f"\n{'='*80}")
|
| 2305 |
+
print(f"π Initializing leaderboard data...")
|
| 2306 |
+
print(f"{'='*80}\n")
|
| 2307 |
+
|
| 2308 |
+
# Try loading from saved dataset
|
| 2309 |
+
saved_data = load_leaderboard_data_from_hf()
|
| 2310 |
+
|
| 2311 |
+
if saved_data:
|
| 2312 |
+
print(f"β Leaderboard data already exists (last updated: {saved_data.get('last_updated', 'Unknown')})")
|
| 2313 |
+
else:
|
| 2314 |
+
print(f"β οΈ No saved leaderboard data found. Constructing from metadata...")
|
| 2315 |
+
try:
|
| 2316 |
+
# Construct leaderboard
|
| 2317 |
+
leaderboard_dict = construct_leaderboard_from_metadata()
|
| 2318 |
+
|
| 2319 |
+
# Calculate monthly metrics
|
| 2320 |
+
monthly_metrics = calculate_monthly_metrics_by_agent()
|
| 2321 |
+
|
| 2322 |
+
# Save to HuggingFace
|
| 2323 |
+
save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
|
| 2324 |
+
|
| 2325 |
+
print(f"β Initial leaderboard data created and saved")
|
| 2326 |
+
except Exception as e:
|
| 2327 |
+
print(f"β Failed to initialize leaderboard data: {str(e)}")
|
| 2328 |
+
import traceback
|
| 2329 |
+
traceback.print_exc()
|
| 2330 |
+
|
| 2331 |
+
print(f"\n{'='*80}")
|
| 2332 |
+
print(f"β Leaderboard initialization complete")
|
| 2333 |
+
print(f"{'='*80}\n")
|
| 2334 |
+
|
| 2335 |
+
|
| 2336 |
# =============================================================================
|
| 2337 |
# GRADIO APPLICATION
|
| 2338 |
# =============================================================================
|
| 2339 |
|
| 2340 |
+
# Initialize leaderboard data on startup
|
| 2341 |
+
initialize_leaderboard_data()
|
| 2342 |
+
|
| 2343 |
# Start APScheduler for incremental updates at 12:00 AM UTC every Monday
|
| 2344 |
scheduler = BackgroundScheduler(timezone="UTC")
|
| 2345 |
scheduler.add_job(
|
|
|
|
| 2353 |
print(f"\n{'='*80}")
|
| 2354 |
print(f"β Scheduler initialized successfully")
|
| 2355 |
print(f"βοΈ Mining schedule: Every Monday at 12:00 AM UTC")
|
| 2356 |
+
print(f"π₯ On startup: Loads cached data from {LEADERBOARD_REPO}")
|
| 2357 |
print(f"{'='*80}\n")
|
| 2358 |
|
| 2359 |
# Create Gradio interface
|
msr.py
CHANGED
|
@@ -21,6 +21,7 @@ load_dotenv()
|
|
| 21 |
|
| 22 |
AGENTS_REPO = "SWE-Arena/swe_agents"
|
| 23 |
REVIEW_METADATA_REPO = "SWE-Arena/review_metadata"
|
|
|
|
| 24 |
LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for leaderboard
|
| 25 |
|
| 26 |
# =============================================================================
|
|
@@ -448,21 +449,21 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
|
|
| 448 |
def load_agents_from_hf():
|
| 449 |
"""
|
| 450 |
Load all agent metadata JSON files from HuggingFace dataset.
|
| 451 |
-
|
| 452 |
The github_identifier is extracted from the filename (e.g., 'agent-name[bot].json' -> 'agent-name[bot]')
|
| 453 |
"""
|
| 454 |
try:
|
| 455 |
api = HfApi()
|
| 456 |
agents = []
|
| 457 |
-
|
| 458 |
# List all files in the repository
|
| 459 |
files = api.list_repo_files(repo_id=AGENTS_REPO, repo_type="dataset")
|
| 460 |
-
|
| 461 |
# Filter for JSON files only
|
| 462 |
json_files = [f for f in files if f.endswith('.json')]
|
| 463 |
-
|
| 464 |
print(f"Found {len(json_files)} agent files in {AGENTS_REPO}")
|
| 465 |
-
|
| 466 |
# Download and parse each JSON file
|
| 467 |
for json_file in json_files:
|
| 468 |
try:
|
|
@@ -471,7 +472,7 @@ def load_agents_from_hf():
|
|
| 471 |
filename=json_file,
|
| 472 |
repo_type="dataset"
|
| 473 |
)
|
| 474 |
-
|
| 475 |
with open(file_path, 'r') as f:
|
| 476 |
agent_data = json.load(f)
|
| 477 |
|
|
@@ -485,19 +486,347 @@ def load_agents_from_hf():
|
|
| 485 |
agent_data['github_identifier'] = github_identifier
|
| 486 |
|
| 487 |
agents.append(agent_data)
|
| 488 |
-
|
| 489 |
except Exception as e:
|
| 490 |
print(f"Warning: Could not load {json_file}: {str(e)}")
|
| 491 |
continue
|
| 492 |
-
|
| 493 |
print(f"β Loaded {len(agents)} agents from HuggingFace")
|
| 494 |
return agents
|
| 495 |
-
|
| 496 |
except Exception as e:
|
| 497 |
print(f"Could not load agents from HuggingFace: {str(e)}")
|
| 498 |
return []
|
| 499 |
|
| 500 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 501 |
# =============================================================================
|
| 502 |
# MAIN MINING FUNCTION
|
| 503 |
# =============================================================================
|
|
@@ -596,6 +925,35 @@ def mine_all_agents():
|
|
| 596 |
print(f" BigQuery queries executed: 1")
|
| 597 |
print(f"{'='*80}\n")
|
| 598 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 599 |
|
| 600 |
# =============================================================================
|
| 601 |
# ENTRY POINT
|
|
|
|
| 21 |
|
| 22 |
AGENTS_REPO = "SWE-Arena/swe_agents"
|
| 23 |
REVIEW_METADATA_REPO = "SWE-Arena/review_metadata"
|
| 24 |
+
LEADERBOARD_REPO = "SWE-Arena/swe_leaderboard" # HuggingFace dataset for leaderboard data
|
| 25 |
LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for leaderboard
|
| 26 |
|
| 27 |
# =============================================================================
|
|
|
|
| 449 |
def load_agents_from_hf():
|
| 450 |
"""
|
| 451 |
Load all agent metadata JSON files from HuggingFace dataset.
|
| 452 |
+
|
| 453 |
The github_identifier is extracted from the filename (e.g., 'agent-name[bot].json' -> 'agent-name[bot]')
|
| 454 |
"""
|
| 455 |
try:
|
| 456 |
api = HfApi()
|
| 457 |
agents = []
|
| 458 |
+
|
| 459 |
# List all files in the repository
|
| 460 |
files = api.list_repo_files(repo_id=AGENTS_REPO, repo_type="dataset")
|
| 461 |
+
|
| 462 |
# Filter for JSON files only
|
| 463 |
json_files = [f for f in files if f.endswith('.json')]
|
| 464 |
+
|
| 465 |
print(f"Found {len(json_files)} agent files in {AGENTS_REPO}")
|
| 466 |
+
|
| 467 |
# Download and parse each JSON file
|
| 468 |
for json_file in json_files:
|
| 469 |
try:
|
|
|
|
| 472 |
filename=json_file,
|
| 473 |
repo_type="dataset"
|
| 474 |
)
|
| 475 |
+
|
| 476 |
with open(file_path, 'r') as f:
|
| 477 |
agent_data = json.load(f)
|
| 478 |
|
|
|
|
| 486 |
agent_data['github_identifier'] = github_identifier
|
| 487 |
|
| 488 |
agents.append(agent_data)
|
| 489 |
+
|
| 490 |
except Exception as e:
|
| 491 |
print(f"Warning: Could not load {json_file}: {str(e)}")
|
| 492 |
continue
|
| 493 |
+
|
| 494 |
print(f"β Loaded {len(agents)} agents from HuggingFace")
|
| 495 |
return agents
|
| 496 |
+
|
| 497 |
except Exception as e:
|
| 498 |
print(f"Could not load agents from HuggingFace: {str(e)}")
|
| 499 |
return []
|
| 500 |
|
| 501 |
|
| 502 |
+
def load_review_metadata():
|
| 503 |
+
"""
|
| 504 |
+
Load all review metadata from HuggingFace dataset within LEADERBOARD_TIME_FRAME_DAYS.
|
| 505 |
+
|
| 506 |
+
Returns:
|
| 507 |
+
List of dictionaries with 'agent_identifier' added to each review metadata.
|
| 508 |
+
"""
|
| 509 |
+
# Calculate cutoff date
|
| 510 |
+
current_time = datetime.now(timezone.utc)
|
| 511 |
+
cutoff_date = current_time - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
|
| 512 |
+
|
| 513 |
+
try:
|
| 514 |
+
api = HfApi()
|
| 515 |
+
token = get_hf_token()
|
| 516 |
+
|
| 517 |
+
# List all files in the repository
|
| 518 |
+
files = api.list_repo_files(repo_id=REVIEW_METADATA_REPO, repo_type="dataset")
|
| 519 |
+
|
| 520 |
+
# Filter for JSONL files matching pattern: [agent_identifier]/YYYY.MM.DD.jsonl
|
| 521 |
+
time_frame_files = []
|
| 522 |
+
for f in files:
|
| 523 |
+
if f.endswith('.jsonl'):
|
| 524 |
+
parts = f.split('/')
|
| 525 |
+
if len(parts) == 2:
|
| 526 |
+
filename = parts[1]
|
| 527 |
+
# Parse date from filename: YYYY.MM.DD.jsonl
|
| 528 |
+
try:
|
| 529 |
+
date_part = filename.replace('.jsonl', '')
|
| 530 |
+
date_components = date_part.split('.')
|
| 531 |
+
if len(date_components) == 3:
|
| 532 |
+
file_year, file_month, file_day = map(int, date_components)
|
| 533 |
+
file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc)
|
| 534 |
+
|
| 535 |
+
# Only include files within time frame
|
| 536 |
+
if file_date >= cutoff_date:
|
| 537 |
+
time_frame_files.append(f)
|
| 538 |
+
except Exception:
|
| 539 |
+
continue
|
| 540 |
+
|
| 541 |
+
print(f"π₯ Loading review metadata from last {LEADERBOARD_TIME_FRAME_DAYS} days ({len(time_frame_files)} daily files)...")
|
| 542 |
+
|
| 543 |
+
all_metadata = []
|
| 544 |
+
|
| 545 |
+
for filename in time_frame_files:
|
| 546 |
+
try:
|
| 547 |
+
# Extract agent_identifier from path
|
| 548 |
+
parts = filename.split('/')
|
| 549 |
+
if len(parts) != 2:
|
| 550 |
+
continue
|
| 551 |
+
|
| 552 |
+
agent_identifier = parts[0]
|
| 553 |
+
|
| 554 |
+
file_path = hf_hub_download(
|
| 555 |
+
repo_id=REVIEW_METADATA_REPO,
|
| 556 |
+
filename=filename,
|
| 557 |
+
repo_type="dataset",
|
| 558 |
+
token=token
|
| 559 |
+
)
|
| 560 |
+
day_metadata = load_jsonl(file_path)
|
| 561 |
+
|
| 562 |
+
# Add agent_identifier to each review
|
| 563 |
+
for review_meta in day_metadata:
|
| 564 |
+
review_meta['agent_identifier'] = agent_identifier
|
| 565 |
+
all_metadata.append(review_meta)
|
| 566 |
+
|
| 567 |
+
except Exception as e:
|
| 568 |
+
print(f" Warning: Could not load {filename}: {str(e)}")
|
| 569 |
+
|
| 570 |
+
print(f"β Loaded {len(all_metadata)} total reviews from last {LEADERBOARD_TIME_FRAME_DAYS} days")
|
| 571 |
+
return all_metadata
|
| 572 |
+
|
| 573 |
+
except Exception as e:
|
| 574 |
+
print(f"β Error loading review metadata: {str(e)}")
|
| 575 |
+
return []
|
| 576 |
+
|
| 577 |
+
|
| 578 |
+
def get_pr_status_from_metadata(review_meta):
|
| 579 |
+
"""
|
| 580 |
+
Derive PR status from merged_at and closed_at fields.
|
| 581 |
+
|
| 582 |
+
Returns:
|
| 583 |
+
str: 'merged', 'closed', or 'open'
|
| 584 |
+
"""
|
| 585 |
+
merged_at = review_meta.get('merged_at')
|
| 586 |
+
closed_at = review_meta.get('closed_at')
|
| 587 |
+
|
| 588 |
+
if merged_at:
|
| 589 |
+
return 'merged'
|
| 590 |
+
elif closed_at:
|
| 591 |
+
return 'closed'
|
| 592 |
+
else:
|
| 593 |
+
return 'open'
|
| 594 |
+
|
| 595 |
+
|
| 596 |
+
def calculate_review_stats_from_metadata(metadata_list):
|
| 597 |
+
"""
|
| 598 |
+
Calculate statistics from a list of review metadata.
|
| 599 |
+
|
| 600 |
+
Returns:
|
| 601 |
+
Dictionary with review metrics (total_reviews, merged_prs, acceptance_rate, etc.)
|
| 602 |
+
"""
|
| 603 |
+
total_reviews = len(metadata_list)
|
| 604 |
+
|
| 605 |
+
# Count merged PRs
|
| 606 |
+
merged_prs = sum(1 for review_meta in metadata_list
|
| 607 |
+
if get_pr_status_from_metadata(review_meta) == 'merged')
|
| 608 |
+
|
| 609 |
+
# Count rejected PRs
|
| 610 |
+
rejected_prs = sum(1 for review_meta in metadata_list
|
| 611 |
+
if get_pr_status_from_metadata(review_meta) == 'closed')
|
| 612 |
+
|
| 613 |
+
# Count pending PRs
|
| 614 |
+
pending_prs = sum(1 for review_meta in metadata_list
|
| 615 |
+
if get_pr_status_from_metadata(review_meta) == 'open')
|
| 616 |
+
|
| 617 |
+
# Calculate acceptance rate (exclude pending PRs)
|
| 618 |
+
completed_prs = merged_prs + rejected_prs
|
| 619 |
+
acceptance_rate = (merged_prs / completed_prs * 100) if completed_prs > 0 else 0
|
| 620 |
+
|
| 621 |
+
return {
|
| 622 |
+
'total_reviews': total_reviews,
|
| 623 |
+
'merged_prs': merged_prs,
|
| 624 |
+
'pending_prs': pending_prs,
|
| 625 |
+
'acceptance_rate': round(acceptance_rate, 2),
|
| 626 |
+
}
|
| 627 |
+
|
| 628 |
+
|
| 629 |
+
def calculate_monthly_metrics_by_agent():
|
| 630 |
+
"""
|
| 631 |
+
Calculate monthly metrics for all agents for visualization.
|
| 632 |
+
|
| 633 |
+
Returns:
|
| 634 |
+
dict: {
|
| 635 |
+
'agents': list of agent names,
|
| 636 |
+
'months': list of month labels (e.g., '2025-01'),
|
| 637 |
+
'data': {
|
| 638 |
+
agent_name: {
|
| 639 |
+
'acceptance_rates': list of acceptance rates by month,
|
| 640 |
+
'total_reviews': list of review counts by month,
|
| 641 |
+
'merged_prs': list of merged PR counts by month,
|
| 642 |
+
}
|
| 643 |
+
}
|
| 644 |
+
}
|
| 645 |
+
"""
|
| 646 |
+
# Load agents
|
| 647 |
+
agents = load_agents_from_hf()
|
| 648 |
+
|
| 649 |
+
# Create mapping from agent_identifier to agent_name
|
| 650 |
+
identifier_to_name = {agent.get('github_identifier'): agent.get('name') for agent in agents if agent.get('github_identifier')}
|
| 651 |
+
|
| 652 |
+
# Load all review metadata
|
| 653 |
+
all_metadata = load_review_metadata()
|
| 654 |
+
|
| 655 |
+
if not all_metadata:
|
| 656 |
+
return {'agents': [], 'months': [], 'data': {}}
|
| 657 |
+
|
| 658 |
+
# Group by agent and month
|
| 659 |
+
agent_month_data = defaultdict(lambda: defaultdict(list))
|
| 660 |
+
|
| 661 |
+
for review_meta in all_metadata:
|
| 662 |
+
agent_identifier = review_meta.get('agent_identifier')
|
| 663 |
+
reviewed_at = review_meta.get('reviewed_at')
|
| 664 |
+
|
| 665 |
+
if not agent_identifier or not reviewed_at:
|
| 666 |
+
continue
|
| 667 |
+
|
| 668 |
+
# Get agent_name from identifier
|
| 669 |
+
agent_name = identifier_to_name.get(agent_identifier, agent_identifier)
|
| 670 |
+
|
| 671 |
+
try:
|
| 672 |
+
dt = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
|
| 673 |
+
month_key = f"{dt.year}-{dt.month:02d}"
|
| 674 |
+
agent_month_data[agent_name][month_key].append(review_meta)
|
| 675 |
+
except Exception as e:
|
| 676 |
+
print(f"Warning: Could not parse date '{reviewed_at}': {e}")
|
| 677 |
+
continue
|
| 678 |
+
|
| 679 |
+
# Get all unique months and sort them
|
| 680 |
+
all_months = set()
|
| 681 |
+
for agent_data in agent_month_data.values():
|
| 682 |
+
all_months.update(agent_data.keys())
|
| 683 |
+
months = sorted(list(all_months))
|
| 684 |
+
|
| 685 |
+
# Calculate metrics for each agent and month
|
| 686 |
+
result_data = {}
|
| 687 |
+
for agent_name, month_dict in agent_month_data.items():
|
| 688 |
+
acceptance_rates = []
|
| 689 |
+
total_reviews_list = []
|
| 690 |
+
merged_prs_list = []
|
| 691 |
+
|
| 692 |
+
for month in months:
|
| 693 |
+
reviews_in_month = month_dict.get(month, [])
|
| 694 |
+
|
| 695 |
+
# Count merged PRs
|
| 696 |
+
merged_count = sum(1 for review in reviews_in_month
|
| 697 |
+
if get_pr_status_from_metadata(review) == 'merged')
|
| 698 |
+
|
| 699 |
+
# Count rejected PRs
|
| 700 |
+
rejected_count = sum(1 for review in reviews_in_month
|
| 701 |
+
if get_pr_status_from_metadata(review) == 'closed')
|
| 702 |
+
|
| 703 |
+
# Total reviews
|
| 704 |
+
total_count = len(reviews_in_month)
|
| 705 |
+
|
| 706 |
+
# Calculate acceptance rate (exclude pending PRs)
|
| 707 |
+
completed_count = merged_count + rejected_count
|
| 708 |
+
acceptance_rate = (merged_count / completed_count * 100) if completed_count > 0 else None
|
| 709 |
+
|
| 710 |
+
acceptance_rates.append(acceptance_rate)
|
| 711 |
+
total_reviews_list.append(total_count)
|
| 712 |
+
merged_prs_list.append(merged_count)
|
| 713 |
+
|
| 714 |
+
result_data[agent_name] = {
|
| 715 |
+
'acceptance_rates': acceptance_rates,
|
| 716 |
+
'total_reviews': total_reviews_list,
|
| 717 |
+
'merged_prs': merged_prs_list,
|
| 718 |
+
}
|
| 719 |
+
|
| 720 |
+
agents_list = sorted(list(agent_month_data.keys()))
|
| 721 |
+
|
| 722 |
+
return {
|
| 723 |
+
'agents': agents_list,
|
| 724 |
+
'months': months,
|
| 725 |
+
'data': result_data
|
| 726 |
+
}
|
| 727 |
+
|
| 728 |
+
|
| 729 |
+
def construct_leaderboard_from_metadata():
|
| 730 |
+
"""
|
| 731 |
+
Construct leaderboard from stored review metadata.
|
| 732 |
+
|
| 733 |
+
Returns:
|
| 734 |
+
Dictionary of agent stats.
|
| 735 |
+
"""
|
| 736 |
+
print("\nπ Constructing leaderboard from review metadata...")
|
| 737 |
+
|
| 738 |
+
# Load agents
|
| 739 |
+
agents = load_agents_from_hf()
|
| 740 |
+
if not agents:
|
| 741 |
+
print("β οΈ No agents found")
|
| 742 |
+
return {}
|
| 743 |
+
|
| 744 |
+
print(f"β Loaded {len(agents)} agents")
|
| 745 |
+
|
| 746 |
+
# Load all review metadata
|
| 747 |
+
all_metadata = load_review_metadata()
|
| 748 |
+
print(f"β Loaded {len(all_metadata)} review metadata entries")
|
| 749 |
+
|
| 750 |
+
cache_dict = {}
|
| 751 |
+
|
| 752 |
+
for agent in agents:
|
| 753 |
+
identifier = agent.get('github_identifier')
|
| 754 |
+
agent_name = agent.get('name', 'Unknown')
|
| 755 |
+
|
| 756 |
+
# Filter metadata for this agent
|
| 757 |
+
agent_metadata = [review for review in all_metadata if review.get("agent_identifier") == identifier]
|
| 758 |
+
|
| 759 |
+
# Calculate stats
|
| 760 |
+
stats = calculate_review_stats_from_metadata(agent_metadata)
|
| 761 |
+
|
| 762 |
+
cache_dict[identifier] = {
|
| 763 |
+
'agent_name': agent_name,
|
| 764 |
+
'name': agent_name,
|
| 765 |
+
'website': agent.get('website', 'N/A'),
|
| 766 |
+
'github_identifier': identifier,
|
| 767 |
+
**stats
|
| 768 |
+
}
|
| 769 |
+
|
| 770 |
+
print(f"β Constructed cache with {len(cache_dict)} agent entries")
|
| 771 |
+
|
| 772 |
+
return cache_dict
|
| 773 |
+
|
| 774 |
+
|
| 775 |
+
def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
|
| 776 |
+
"""
|
| 777 |
+
Save leaderboard data and monthly metrics to HuggingFace dataset as swe-review.json.
|
| 778 |
+
|
| 779 |
+
Args:
|
| 780 |
+
leaderboard_dict: Dictionary of agent stats from construct_leaderboard_from_metadata()
|
| 781 |
+
monthly_metrics: Monthly metrics data from calculate_monthly_metrics_by_agent()
|
| 782 |
+
|
| 783 |
+
Returns:
|
| 784 |
+
bool: True if successful, False otherwise
|
| 785 |
+
"""
|
| 786 |
+
try:
|
| 787 |
+
token = get_hf_token()
|
| 788 |
+
if not token:
|
| 789 |
+
raise Exception("No HuggingFace token found")
|
| 790 |
+
|
| 791 |
+
api = HfApi(token=token)
|
| 792 |
+
filename = "swe-review.json"
|
| 793 |
+
|
| 794 |
+
# Combine leaderboard and monthly metrics
|
| 795 |
+
combined_data = {
|
| 796 |
+
'last_updated': datetime.now(timezone.utc).isoformat(),
|
| 797 |
+
'leaderboard': leaderboard_dict,
|
| 798 |
+
'monthly_metrics': monthly_metrics,
|
| 799 |
+
'metadata': {
|
| 800 |
+
'leaderboard_time_frame_days': LEADERBOARD_TIME_FRAME_DAYS
|
| 801 |
+
}
|
| 802 |
+
}
|
| 803 |
+
|
| 804 |
+
# Save locally first
|
| 805 |
+
with open(filename, 'w') as f:
|
| 806 |
+
json.dump(combined_data, f, indent=2)
|
| 807 |
+
|
| 808 |
+
try:
|
| 809 |
+
# Upload to HuggingFace
|
| 810 |
+
api.upload_file(
|
| 811 |
+
path_or_fileobj=filename,
|
| 812 |
+
path_in_repo=filename,
|
| 813 |
+
repo_id=LEADERBOARD_REPO,
|
| 814 |
+
repo_type="dataset"
|
| 815 |
+
)
|
| 816 |
+
print(f"β Saved leaderboard data to HuggingFace: {filename}")
|
| 817 |
+
return True
|
| 818 |
+
finally:
|
| 819 |
+
# Always clean up local file
|
| 820 |
+
if os.path.exists(filename):
|
| 821 |
+
os.remove(filename)
|
| 822 |
+
|
| 823 |
+
except Exception as e:
|
| 824 |
+
print(f"β Error saving leaderboard data: {str(e)}")
|
| 825 |
+
import traceback
|
| 826 |
+
traceback.print_exc()
|
| 827 |
+
return False
|
| 828 |
+
|
| 829 |
+
|
| 830 |
# =============================================================================
|
| 831 |
# MAIN MINING FUNCTION
|
| 832 |
# =============================================================================
|
|
|
|
| 925 |
print(f" BigQuery queries executed: 1")
|
| 926 |
print(f"{'='*80}\n")
|
| 927 |
|
| 928 |
+
# Construct and save leaderboard data
|
| 929 |
+
print(f"\n{'='*80}")
|
| 930 |
+
print(f"π Constructing and saving leaderboard data...")
|
| 931 |
+
print(f"{'='*80}\n")
|
| 932 |
+
|
| 933 |
+
try:
|
| 934 |
+
# Construct leaderboard
|
| 935 |
+
leaderboard_dict = construct_leaderboard_from_metadata()
|
| 936 |
+
|
| 937 |
+
# Calculate monthly metrics
|
| 938 |
+
print(f"\nπ Calculating monthly metrics...")
|
| 939 |
+
monthly_metrics = calculate_monthly_metrics_by_agent()
|
| 940 |
+
|
| 941 |
+
# Save to HuggingFace
|
| 942 |
+
print(f"\nπΎ Saving leaderboard data to HuggingFace...")
|
| 943 |
+
save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
|
| 944 |
+
|
| 945 |
+
print(f"\n{'='*80}")
|
| 946 |
+
print(f"β
Leaderboard data saved successfully!")
|
| 947 |
+
print(f" Leaderboard entries: {len(leaderboard_dict)}")
|
| 948 |
+
print(f" Monthly data points: {len(monthly_metrics.get('months', []))} months")
|
| 949 |
+
print(f" Saved to: {LEADERBOARD_REPO}/swe-review.json")
|
| 950 |
+
print(f"{'='*80}\n")
|
| 951 |
+
|
| 952 |
+
except Exception as e:
|
| 953 |
+
print(f"\nβ Failed to construct/save leaderboard data: {str(e)}")
|
| 954 |
+
import traceback
|
| 955 |
+
traceback.print_exc()
|
| 956 |
+
|
| 957 |
|
| 958 |
# =============================================================================
|
| 959 |
# ENTRY POINT
|