zhiminy commited on
Commit
d454e42
Β·
1 Parent(s): dcdb282
Files changed (2) hide show
  1. app.py +205 -8
  2. msr.py +367 -9
app.py CHANGED
@@ -28,6 +28,7 @@ load_dotenv()
28
 
29
  AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
30
  REVIEW_METADATA_REPO = "SWE-Arena/review_metadata" # HuggingFace dataset for review metadata
 
31
  LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for constructing leaderboard
32
  UPDATE_TIME_FRAME_DAYS = 30 # Time frame for mining new reviews
33
 
@@ -1694,6 +1695,99 @@ def save_agent_to_hf(data):
1694
  return False
1695
 
1696
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1697
 
1698
 
1699
  # =============================================================================
@@ -1709,6 +1803,7 @@ def update_all_agents_incremental():
1709
  2. Fetch new reviews from the last UPDATE_TIME_FRAME_DAYS days
1710
  3. Save all updated/new metadata back to HuggingFace
1711
  4. Reload statistics from updated metadata
 
1712
  """
1713
  print(f"\n{'='*80}")
1714
  print(f"πŸ•› Incremental Update started at {datetime.now(timezone.utc).isoformat()}")
@@ -1719,14 +1814,24 @@ def update_all_agents_incremental():
1719
  fetch_and_update_weekly_reviews()
1720
 
1721
  # Reload statistics from updated metadata
1722
- print(f"\nπŸ“‹ Reloading statistics from updated review metadata...")
1723
- construct_leaderboard_from_metadata()
 
 
 
 
 
 
 
 
1724
 
1725
  print(f"\n{'='*80}")
1726
  print(f"πŸ“Š Update Summary:")
1727
  print(f" βœ“ Updated existing review statuses")
1728
  print(f" βœ“ Fetched new reviews from last {UPDATE_TIME_FRAME_DAYS} days")
1729
- print(f" βœ“ Statistics reloaded")
 
 
1730
  print(f"{'='*80}")
1731
 
1732
  print(f"\nβœ… Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
@@ -1797,7 +1902,36 @@ def create_monthly_metrics_plot(top_n=None):
1797
  Args:
1798
  top_n: If specified, only show metrics for the top N agents by total reviews.
1799
  """
1800
- metrics = calculate_monthly_metrics_by_agent(top_n=top_n)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1801
 
1802
  if not metrics['agents'] or not metrics['months']:
1803
  # Return an empty figure with a message
@@ -1919,11 +2053,20 @@ def create_monthly_metrics_plot(top_n=None):
1919
 
1920
  def get_leaderboard_dataframe():
1921
  """
1922
- Construct leaderboard from review metadata and convert to pandas DataFrame for display.
 
1923
  Returns formatted DataFrame sorted by total reviews.
1924
  """
1925
- # Construct leaderboard from metadata
1926
- cache_dict = construct_leaderboard_from_metadata()
 
 
 
 
 
 
 
 
1927
 
1928
  print(f"πŸ“Š Cache dict size: {len(cache_dict)}")
1929
 
@@ -2021,6 +2164,16 @@ def submit_agent(identifier, agent_name, developer, website):
2021
  if not save_agent_to_hf(submission):
2022
  return "❌ Failed to save submission", get_leaderboard_dataframe()
2023
 
 
 
 
 
 
 
 
 
 
 
2024
  # Return success message - data will be populated by daily incremental updates
2025
  return f"βœ… Successfully submitted {agent_name}! Review data will be populated by the next daily incremental update.", get_leaderboard_dataframe()
2026
 
@@ -2139,10 +2292,54 @@ def fetch_and_update_weekly_reviews():
2139
  continue
2140
 
2141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2142
  # =============================================================================
2143
  # GRADIO APPLICATION
2144
  # =============================================================================
2145
 
 
 
 
2146
  # Start APScheduler for incremental updates at 12:00 AM UTC every Monday
2147
  scheduler = BackgroundScheduler(timezone="UTC")
2148
  scheduler.add_job(
@@ -2156,7 +2353,7 @@ scheduler.start()
2156
  print(f"\n{'='*80}")
2157
  print(f"βœ“ Scheduler initialized successfully")
2158
  print(f"⛏️ Mining schedule: Every Monday at 12:00 AM UTC")
2159
- print(f"πŸ“₯ On startup: Only loads cached data from HuggingFace (no mining)")
2160
  print(f"{'='*80}\n")
2161
 
2162
  # Create Gradio interface
 
28
 
29
  AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
30
  REVIEW_METADATA_REPO = "SWE-Arena/review_metadata" # HuggingFace dataset for review metadata
31
+ LEADERBOARD_REPO = "SWE-Arena/swe_leaderboard" # HuggingFace dataset for leaderboard data
32
  LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for constructing leaderboard
33
  UPDATE_TIME_FRAME_DAYS = 30 # Time frame for mining new reviews
34
 
 
1695
  return False
1696
 
1697
 
1698
+ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
1699
+ """
1700
+ Save leaderboard data and monthly metrics to HuggingFace dataset as swe-review.json.
1701
+
1702
+ Args:
1703
+ leaderboard_dict: Dictionary of agent stats from construct_leaderboard_from_metadata()
1704
+ monthly_metrics: Monthly metrics data from calculate_monthly_metrics_by_agent()
1705
+
1706
+ Returns:
1707
+ bool: True if successful, False otherwise
1708
+ """
1709
+ try:
1710
+ api = HfApi()
1711
+ token = get_hf_token()
1712
+
1713
+ if not token:
1714
+ raise Exception("No HuggingFace token found. Please set HF_TOKEN in your Space settings.")
1715
+
1716
+ filename = "swe-review.json"
1717
+
1718
+ # Combine leaderboard and monthly metrics
1719
+ combined_data = {
1720
+ 'last_updated': datetime.now(timezone.utc).isoformat(),
1721
+ 'leaderboard': leaderboard_dict,
1722
+ 'monthly_metrics': monthly_metrics,
1723
+ 'metadata': {
1724
+ 'leaderboard_time_frame_days': LEADERBOARD_TIME_FRAME_DAYS,
1725
+ 'update_time_frame_days': UPDATE_TIME_FRAME_DAYS
1726
+ }
1727
+ }
1728
+
1729
+ # Save locally first
1730
+ with open(filename, 'w') as f:
1731
+ json.dump(combined_data, f, indent=2)
1732
+
1733
+ try:
1734
+ # Upload to HuggingFace
1735
+ upload_with_retry(
1736
+ api=api,
1737
+ path_or_fileobj=filename,
1738
+ path_in_repo=filename,
1739
+ repo_id=LEADERBOARD_REPO,
1740
+ repo_type="dataset",
1741
+ token=token
1742
+ )
1743
+ print(f"βœ“ Saved leaderboard data to HuggingFace: {filename}")
1744
+ return True
1745
+ finally:
1746
+ # Always clean up local file, even if upload fails
1747
+ if os.path.exists(filename):
1748
+ os.remove(filename)
1749
+
1750
+ except Exception as e:
1751
+ print(f"βœ— Error saving leaderboard data: {str(e)}")
1752
+ import traceback
1753
+ traceback.print_exc()
1754
+ return False
1755
+
1756
+
1757
+ def load_leaderboard_data_from_hf():
1758
+ """
1759
+ Load leaderboard data and monthly metrics from HuggingFace dataset.
1760
+
1761
+ Returns:
1762
+ dict: Dictionary with 'leaderboard', 'monthly_metrics', and 'last_updated' keys
1763
+ Returns None if file doesn't exist or error occurs
1764
+ """
1765
+ try:
1766
+ token = get_hf_token()
1767
+ filename = "swe-review.json"
1768
+
1769
+ # Download file
1770
+ file_path = hf_hub_download(
1771
+ repo_id=LEADERBOARD_REPO,
1772
+ filename=filename,
1773
+ repo_type="dataset",
1774
+ token=token
1775
+ )
1776
+
1777
+ # Load JSON data
1778
+ with open(file_path, 'r') as f:
1779
+ data = json.load(f)
1780
+
1781
+ last_updated = data.get('last_updated', 'Unknown')
1782
+ print(f"βœ“ Loaded leaderboard data from HuggingFace (last updated: {last_updated})")
1783
+
1784
+ return data
1785
+
1786
+ except Exception as e:
1787
+ print(f"⚠️ Could not load leaderboard data from HuggingFace: {str(e)}")
1788
+ return None
1789
+
1790
+
1791
 
1792
 
1793
  # =============================================================================
 
1803
  2. Fetch new reviews from the last UPDATE_TIME_FRAME_DAYS days
1804
  3. Save all updated/new metadata back to HuggingFace
1805
  4. Reload statistics from updated metadata
1806
+ 5. Save leaderboard and monthly metrics to swe_leaderboard dataset
1807
  """
1808
  print(f"\n{'='*80}")
1809
  print(f"πŸ•› Incremental Update started at {datetime.now(timezone.utc).isoformat()}")
 
1814
  fetch_and_update_weekly_reviews()
1815
 
1816
  # Reload statistics from updated metadata
1817
+ print(f"\nπŸ“‹ Constructing leaderboard from updated review metadata...")
1818
+ leaderboard_dict = construct_leaderboard_from_metadata()
1819
+
1820
+ # Calculate monthly metrics
1821
+ print(f"\nπŸ“ˆ Calculating monthly metrics...")
1822
+ monthly_metrics = calculate_monthly_metrics_by_agent()
1823
+
1824
+ # Save to HuggingFace leaderboard dataset
1825
+ print(f"\nπŸ’Ύ Saving leaderboard data to HuggingFace...")
1826
+ save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
1827
 
1828
  print(f"\n{'='*80}")
1829
  print(f"πŸ“Š Update Summary:")
1830
  print(f" βœ“ Updated existing review statuses")
1831
  print(f" βœ“ Fetched new reviews from last {UPDATE_TIME_FRAME_DAYS} days")
1832
+ print(f" βœ“ Leaderboard constructed with {len(leaderboard_dict)} agents")
1833
+ print(f" βœ“ Monthly metrics calculated")
1834
+ print(f" βœ“ Data saved to {LEADERBOARD_REPO}")
1835
  print(f"{'='*80}")
1836
 
1837
  print(f"\nβœ… Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
 
1902
  Args:
1903
  top_n: If specified, only show metrics for the top N agents by total reviews.
1904
  """
1905
+ # Try loading from saved dataset first
1906
+ saved_data = load_leaderboard_data_from_hf()
1907
+
1908
+ if saved_data and 'monthly_metrics' in saved_data:
1909
+ metrics = saved_data['monthly_metrics']
1910
+ print(f"πŸ“ˆ Loaded monthly metrics from saved dataset")
1911
+
1912
+ # Apply top_n filter if specified
1913
+ if top_n is not None and top_n > 0 and metrics.get('agents'):
1914
+ # Calculate total reviews for each agent
1915
+ agent_totals = []
1916
+ for agent_name in metrics['agents']:
1917
+ agent_data = metrics['data'].get(agent_name, {})
1918
+ total_reviews = sum(agent_data.get('total_reviews', []))
1919
+ agent_totals.append((agent_name, total_reviews))
1920
+
1921
+ # Sort by total reviews and take top N
1922
+ agent_totals.sort(key=lambda x: x[1], reverse=True)
1923
+ top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]]
1924
+
1925
+ # Filter metrics to only include top agents
1926
+ metrics = {
1927
+ 'agents': top_agents,
1928
+ 'months': metrics['months'],
1929
+ 'data': {agent: metrics['data'][agent] for agent in top_agents if agent in metrics['data']}
1930
+ }
1931
+ else:
1932
+ # Fallback: calculate from metadata if saved data doesn't exist
1933
+ print(f"πŸ“ˆ Saved data not available, calculating monthly metrics from metadata...")
1934
+ metrics = calculate_monthly_metrics_by_agent(top_n=top_n)
1935
 
1936
  if not metrics['agents'] or not metrics['months']:
1937
  # Return an empty figure with a message
 
2053
 
2054
  def get_leaderboard_dataframe():
2055
  """
2056
+ Load leaderboard from saved dataset and convert to pandas DataFrame for display.
2057
+ Falls back to constructing from metadata if saved data is not available.
2058
  Returns formatted DataFrame sorted by total reviews.
2059
  """
2060
+ # Try loading from saved dataset first
2061
+ saved_data = load_leaderboard_data_from_hf()
2062
+
2063
+ if saved_data and 'leaderboard' in saved_data:
2064
+ cache_dict = saved_data['leaderboard']
2065
+ print(f"πŸ“Š Loaded leaderboard from saved dataset (last updated: {saved_data.get('last_updated', 'Unknown')})")
2066
+ else:
2067
+ # Fallback: construct from metadata if saved data doesn't exist
2068
+ print(f"πŸ“Š Saved data not available, constructing leaderboard from metadata...")
2069
+ cache_dict = construct_leaderboard_from_metadata()
2070
 
2071
  print(f"πŸ“Š Cache dict size: {len(cache_dict)}")
2072
 
 
2164
  if not save_agent_to_hf(submission):
2165
  return "❌ Failed to save submission", get_leaderboard_dataframe()
2166
 
2167
+ # Reconstruct and save leaderboard data with new agent
2168
+ try:
2169
+ print(f"πŸ“Š Reconstructing leaderboard with new agent...")
2170
+ leaderboard_dict = construct_leaderboard_from_metadata()
2171
+ monthly_metrics = calculate_monthly_metrics_by_agent()
2172
+ save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
2173
+ print(f"βœ“ Leaderboard data updated")
2174
+ except Exception as e:
2175
+ print(f"⚠️ Failed to update leaderboard data: {str(e)}")
2176
+
2177
  # Return success message - data will be populated by daily incremental updates
2178
  return f"βœ… Successfully submitted {agent_name}! Review data will be populated by the next daily incremental update.", get_leaderboard_dataframe()
2179
 
 
2292
  continue
2293
 
2294
 
2295
+ # =============================================================================
2296
+ # STARTUP & INITIALIZATION
2297
+ # =============================================================================
2298
+
2299
+ def initialize_leaderboard_data():
2300
+ """
2301
+ Initialize leaderboard data on startup.
2302
+ If saved data doesn't exist, construct from metadata and save.
2303
+ """
2304
+ print(f"\n{'='*80}")
2305
+ print(f"πŸš€ Initializing leaderboard data...")
2306
+ print(f"{'='*80}\n")
2307
+
2308
+ # Try loading from saved dataset
2309
+ saved_data = load_leaderboard_data_from_hf()
2310
+
2311
+ if saved_data:
2312
+ print(f"βœ“ Leaderboard data already exists (last updated: {saved_data.get('last_updated', 'Unknown')})")
2313
+ else:
2314
+ print(f"⚠️ No saved leaderboard data found. Constructing from metadata...")
2315
+ try:
2316
+ # Construct leaderboard
2317
+ leaderboard_dict = construct_leaderboard_from_metadata()
2318
+
2319
+ # Calculate monthly metrics
2320
+ monthly_metrics = calculate_monthly_metrics_by_agent()
2321
+
2322
+ # Save to HuggingFace
2323
+ save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
2324
+
2325
+ print(f"βœ“ Initial leaderboard data created and saved")
2326
+ except Exception as e:
2327
+ print(f"βœ— Failed to initialize leaderboard data: {str(e)}")
2328
+ import traceback
2329
+ traceback.print_exc()
2330
+
2331
+ print(f"\n{'='*80}")
2332
+ print(f"βœ“ Leaderboard initialization complete")
2333
+ print(f"{'='*80}\n")
2334
+
2335
+
2336
  # =============================================================================
2337
  # GRADIO APPLICATION
2338
  # =============================================================================
2339
 
2340
+ # Initialize leaderboard data on startup
2341
+ initialize_leaderboard_data()
2342
+
2343
  # Start APScheduler for incremental updates at 12:00 AM UTC every Monday
2344
  scheduler = BackgroundScheduler(timezone="UTC")
2345
  scheduler.add_job(
 
2353
  print(f"\n{'='*80}")
2354
  print(f"βœ“ Scheduler initialized successfully")
2355
  print(f"⛏️ Mining schedule: Every Monday at 12:00 AM UTC")
2356
+ print(f"πŸ“₯ On startup: Loads cached data from {LEADERBOARD_REPO}")
2357
  print(f"{'='*80}\n")
2358
 
2359
  # Create Gradio interface
msr.py CHANGED
@@ -21,6 +21,7 @@ load_dotenv()
21
 
22
  AGENTS_REPO = "SWE-Arena/swe_agents"
23
  REVIEW_METADATA_REPO = "SWE-Arena/review_metadata"
 
24
  LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for leaderboard
25
 
26
  # =============================================================================
@@ -448,21 +449,21 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
448
  def load_agents_from_hf():
449
  """
450
  Load all agent metadata JSON files from HuggingFace dataset.
451
-
452
  The github_identifier is extracted from the filename (e.g., 'agent-name[bot].json' -> 'agent-name[bot]')
453
  """
454
  try:
455
  api = HfApi()
456
  agents = []
457
-
458
  # List all files in the repository
459
  files = api.list_repo_files(repo_id=AGENTS_REPO, repo_type="dataset")
460
-
461
  # Filter for JSON files only
462
  json_files = [f for f in files if f.endswith('.json')]
463
-
464
  print(f"Found {len(json_files)} agent files in {AGENTS_REPO}")
465
-
466
  # Download and parse each JSON file
467
  for json_file in json_files:
468
  try:
@@ -471,7 +472,7 @@ def load_agents_from_hf():
471
  filename=json_file,
472
  repo_type="dataset"
473
  )
474
-
475
  with open(file_path, 'r') as f:
476
  agent_data = json.load(f)
477
 
@@ -485,19 +486,347 @@ def load_agents_from_hf():
485
  agent_data['github_identifier'] = github_identifier
486
 
487
  agents.append(agent_data)
488
-
489
  except Exception as e:
490
  print(f"Warning: Could not load {json_file}: {str(e)}")
491
  continue
492
-
493
  print(f"βœ“ Loaded {len(agents)} agents from HuggingFace")
494
  return agents
495
-
496
  except Exception as e:
497
  print(f"Could not load agents from HuggingFace: {str(e)}")
498
  return []
499
 
500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
501
  # =============================================================================
502
  # MAIN MINING FUNCTION
503
  # =============================================================================
@@ -596,6 +925,35 @@ def mine_all_agents():
596
  print(f" BigQuery queries executed: 1")
597
  print(f"{'='*80}\n")
598
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
599
 
600
  # =============================================================================
601
  # ENTRY POINT
 
21
 
22
  AGENTS_REPO = "SWE-Arena/swe_agents"
23
  REVIEW_METADATA_REPO = "SWE-Arena/review_metadata"
24
+ LEADERBOARD_REPO = "SWE-Arena/swe_leaderboard" # HuggingFace dataset for leaderboard data
25
  LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for leaderboard
26
 
27
  # =============================================================================
 
449
  def load_agents_from_hf():
450
  """
451
  Load all agent metadata JSON files from HuggingFace dataset.
452
+
453
  The github_identifier is extracted from the filename (e.g., 'agent-name[bot].json' -> 'agent-name[bot]')
454
  """
455
  try:
456
  api = HfApi()
457
  agents = []
458
+
459
  # List all files in the repository
460
  files = api.list_repo_files(repo_id=AGENTS_REPO, repo_type="dataset")
461
+
462
  # Filter for JSON files only
463
  json_files = [f for f in files if f.endswith('.json')]
464
+
465
  print(f"Found {len(json_files)} agent files in {AGENTS_REPO}")
466
+
467
  # Download and parse each JSON file
468
  for json_file in json_files:
469
  try:
 
472
  filename=json_file,
473
  repo_type="dataset"
474
  )
475
+
476
  with open(file_path, 'r') as f:
477
  agent_data = json.load(f)
478
 
 
486
  agent_data['github_identifier'] = github_identifier
487
 
488
  agents.append(agent_data)
489
+
490
  except Exception as e:
491
  print(f"Warning: Could not load {json_file}: {str(e)}")
492
  continue
493
+
494
  print(f"βœ“ Loaded {len(agents)} agents from HuggingFace")
495
  return agents
496
+
497
  except Exception as e:
498
  print(f"Could not load agents from HuggingFace: {str(e)}")
499
  return []
500
 
501
 
502
+ def load_review_metadata():
503
+ """
504
+ Load all review metadata from HuggingFace dataset within LEADERBOARD_TIME_FRAME_DAYS.
505
+
506
+ Returns:
507
+ List of dictionaries with 'agent_identifier' added to each review metadata.
508
+ """
509
+ # Calculate cutoff date
510
+ current_time = datetime.now(timezone.utc)
511
+ cutoff_date = current_time - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS)
512
+
513
+ try:
514
+ api = HfApi()
515
+ token = get_hf_token()
516
+
517
+ # List all files in the repository
518
+ files = api.list_repo_files(repo_id=REVIEW_METADATA_REPO, repo_type="dataset")
519
+
520
+ # Filter for JSONL files matching pattern: [agent_identifier]/YYYY.MM.DD.jsonl
521
+ time_frame_files = []
522
+ for f in files:
523
+ if f.endswith('.jsonl'):
524
+ parts = f.split('/')
525
+ if len(parts) == 2:
526
+ filename = parts[1]
527
+ # Parse date from filename: YYYY.MM.DD.jsonl
528
+ try:
529
+ date_part = filename.replace('.jsonl', '')
530
+ date_components = date_part.split('.')
531
+ if len(date_components) == 3:
532
+ file_year, file_month, file_day = map(int, date_components)
533
+ file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc)
534
+
535
+ # Only include files within time frame
536
+ if file_date >= cutoff_date:
537
+ time_frame_files.append(f)
538
+ except Exception:
539
+ continue
540
+
541
+ print(f"πŸ“₯ Loading review metadata from last {LEADERBOARD_TIME_FRAME_DAYS} days ({len(time_frame_files)} daily files)...")
542
+
543
+ all_metadata = []
544
+
545
+ for filename in time_frame_files:
546
+ try:
547
+ # Extract agent_identifier from path
548
+ parts = filename.split('/')
549
+ if len(parts) != 2:
550
+ continue
551
+
552
+ agent_identifier = parts[0]
553
+
554
+ file_path = hf_hub_download(
555
+ repo_id=REVIEW_METADATA_REPO,
556
+ filename=filename,
557
+ repo_type="dataset",
558
+ token=token
559
+ )
560
+ day_metadata = load_jsonl(file_path)
561
+
562
+ # Add agent_identifier to each review
563
+ for review_meta in day_metadata:
564
+ review_meta['agent_identifier'] = agent_identifier
565
+ all_metadata.append(review_meta)
566
+
567
+ except Exception as e:
568
+ print(f" Warning: Could not load {filename}: {str(e)}")
569
+
570
+ print(f"βœ“ Loaded {len(all_metadata)} total reviews from last {LEADERBOARD_TIME_FRAME_DAYS} days")
571
+ return all_metadata
572
+
573
+ except Exception as e:
574
+ print(f"βœ— Error loading review metadata: {str(e)}")
575
+ return []
576
+
577
+
578
+ def get_pr_status_from_metadata(review_meta):
579
+ """
580
+ Derive PR status from merged_at and closed_at fields.
581
+
582
+ Returns:
583
+ str: 'merged', 'closed', or 'open'
584
+ """
585
+ merged_at = review_meta.get('merged_at')
586
+ closed_at = review_meta.get('closed_at')
587
+
588
+ if merged_at:
589
+ return 'merged'
590
+ elif closed_at:
591
+ return 'closed'
592
+ else:
593
+ return 'open'
594
+
595
+
596
+ def calculate_review_stats_from_metadata(metadata_list):
597
+ """
598
+ Calculate statistics from a list of review metadata.
599
+
600
+ Returns:
601
+ Dictionary with review metrics (total_reviews, merged_prs, acceptance_rate, etc.)
602
+ """
603
+ total_reviews = len(metadata_list)
604
+
605
+ # Count merged PRs
606
+ merged_prs = sum(1 for review_meta in metadata_list
607
+ if get_pr_status_from_metadata(review_meta) == 'merged')
608
+
609
+ # Count rejected PRs
610
+ rejected_prs = sum(1 for review_meta in metadata_list
611
+ if get_pr_status_from_metadata(review_meta) == 'closed')
612
+
613
+ # Count pending PRs
614
+ pending_prs = sum(1 for review_meta in metadata_list
615
+ if get_pr_status_from_metadata(review_meta) == 'open')
616
+
617
+ # Calculate acceptance rate (exclude pending PRs)
618
+ completed_prs = merged_prs + rejected_prs
619
+ acceptance_rate = (merged_prs / completed_prs * 100) if completed_prs > 0 else 0
620
+
621
+ return {
622
+ 'total_reviews': total_reviews,
623
+ 'merged_prs': merged_prs,
624
+ 'pending_prs': pending_prs,
625
+ 'acceptance_rate': round(acceptance_rate, 2),
626
+ }
627
+
628
+
629
+ def calculate_monthly_metrics_by_agent():
630
+ """
631
+ Calculate monthly metrics for all agents for visualization.
632
+
633
+ Returns:
634
+ dict: {
635
+ 'agents': list of agent names,
636
+ 'months': list of month labels (e.g., '2025-01'),
637
+ 'data': {
638
+ agent_name: {
639
+ 'acceptance_rates': list of acceptance rates by month,
640
+ 'total_reviews': list of review counts by month,
641
+ 'merged_prs': list of merged PR counts by month,
642
+ }
643
+ }
644
+ }
645
+ """
646
+ # Load agents
647
+ agents = load_agents_from_hf()
648
+
649
+ # Create mapping from agent_identifier to agent_name
650
+ identifier_to_name = {agent.get('github_identifier'): agent.get('name') for agent in agents if agent.get('github_identifier')}
651
+
652
+ # Load all review metadata
653
+ all_metadata = load_review_metadata()
654
+
655
+ if not all_metadata:
656
+ return {'agents': [], 'months': [], 'data': {}}
657
+
658
+ # Group by agent and month
659
+ agent_month_data = defaultdict(lambda: defaultdict(list))
660
+
661
+ for review_meta in all_metadata:
662
+ agent_identifier = review_meta.get('agent_identifier')
663
+ reviewed_at = review_meta.get('reviewed_at')
664
+
665
+ if not agent_identifier or not reviewed_at:
666
+ continue
667
+
668
+ # Get agent_name from identifier
669
+ agent_name = identifier_to_name.get(agent_identifier, agent_identifier)
670
+
671
+ try:
672
+ dt = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
673
+ month_key = f"{dt.year}-{dt.month:02d}"
674
+ agent_month_data[agent_name][month_key].append(review_meta)
675
+ except Exception as e:
676
+ print(f"Warning: Could not parse date '{reviewed_at}': {e}")
677
+ continue
678
+
679
+ # Get all unique months and sort them
680
+ all_months = set()
681
+ for agent_data in agent_month_data.values():
682
+ all_months.update(agent_data.keys())
683
+ months = sorted(list(all_months))
684
+
685
+ # Calculate metrics for each agent and month
686
+ result_data = {}
687
+ for agent_name, month_dict in agent_month_data.items():
688
+ acceptance_rates = []
689
+ total_reviews_list = []
690
+ merged_prs_list = []
691
+
692
+ for month in months:
693
+ reviews_in_month = month_dict.get(month, [])
694
+
695
+ # Count merged PRs
696
+ merged_count = sum(1 for review in reviews_in_month
697
+ if get_pr_status_from_metadata(review) == 'merged')
698
+
699
+ # Count rejected PRs
700
+ rejected_count = sum(1 for review in reviews_in_month
701
+ if get_pr_status_from_metadata(review) == 'closed')
702
+
703
+ # Total reviews
704
+ total_count = len(reviews_in_month)
705
+
706
+ # Calculate acceptance rate (exclude pending PRs)
707
+ completed_count = merged_count + rejected_count
708
+ acceptance_rate = (merged_count / completed_count * 100) if completed_count > 0 else None
709
+
710
+ acceptance_rates.append(acceptance_rate)
711
+ total_reviews_list.append(total_count)
712
+ merged_prs_list.append(merged_count)
713
+
714
+ result_data[agent_name] = {
715
+ 'acceptance_rates': acceptance_rates,
716
+ 'total_reviews': total_reviews_list,
717
+ 'merged_prs': merged_prs_list,
718
+ }
719
+
720
+ agents_list = sorted(list(agent_month_data.keys()))
721
+
722
+ return {
723
+ 'agents': agents_list,
724
+ 'months': months,
725
+ 'data': result_data
726
+ }
727
+
728
+
729
+ def construct_leaderboard_from_metadata():
730
+ """
731
+ Construct leaderboard from stored review metadata.
732
+
733
+ Returns:
734
+ Dictionary of agent stats.
735
+ """
736
+ print("\nπŸ“Š Constructing leaderboard from review metadata...")
737
+
738
+ # Load agents
739
+ agents = load_agents_from_hf()
740
+ if not agents:
741
+ print("⚠️ No agents found")
742
+ return {}
743
+
744
+ print(f"βœ“ Loaded {len(agents)} agents")
745
+
746
+ # Load all review metadata
747
+ all_metadata = load_review_metadata()
748
+ print(f"βœ“ Loaded {len(all_metadata)} review metadata entries")
749
+
750
+ cache_dict = {}
751
+
752
+ for agent in agents:
753
+ identifier = agent.get('github_identifier')
754
+ agent_name = agent.get('name', 'Unknown')
755
+
756
+ # Filter metadata for this agent
757
+ agent_metadata = [review for review in all_metadata if review.get("agent_identifier") == identifier]
758
+
759
+ # Calculate stats
760
+ stats = calculate_review_stats_from_metadata(agent_metadata)
761
+
762
+ cache_dict[identifier] = {
763
+ 'agent_name': agent_name,
764
+ 'name': agent_name,
765
+ 'website': agent.get('website', 'N/A'),
766
+ 'github_identifier': identifier,
767
+ **stats
768
+ }
769
+
770
+ print(f"βœ“ Constructed cache with {len(cache_dict)} agent entries")
771
+
772
+ return cache_dict
773
+
774
+
775
+ def save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics):
776
+ """
777
+ Save leaderboard data and monthly metrics to HuggingFace dataset as swe-review.json.
778
+
779
+ Args:
780
+ leaderboard_dict: Dictionary of agent stats from construct_leaderboard_from_metadata()
781
+ monthly_metrics: Monthly metrics data from calculate_monthly_metrics_by_agent()
782
+
783
+ Returns:
784
+ bool: True if successful, False otherwise
785
+ """
786
+ try:
787
+ token = get_hf_token()
788
+ if not token:
789
+ raise Exception("No HuggingFace token found")
790
+
791
+ api = HfApi(token=token)
792
+ filename = "swe-review.json"
793
+
794
+ # Combine leaderboard and monthly metrics
795
+ combined_data = {
796
+ 'last_updated': datetime.now(timezone.utc).isoformat(),
797
+ 'leaderboard': leaderboard_dict,
798
+ 'monthly_metrics': monthly_metrics,
799
+ 'metadata': {
800
+ 'leaderboard_time_frame_days': LEADERBOARD_TIME_FRAME_DAYS
801
+ }
802
+ }
803
+
804
+ # Save locally first
805
+ with open(filename, 'w') as f:
806
+ json.dump(combined_data, f, indent=2)
807
+
808
+ try:
809
+ # Upload to HuggingFace
810
+ api.upload_file(
811
+ path_or_fileobj=filename,
812
+ path_in_repo=filename,
813
+ repo_id=LEADERBOARD_REPO,
814
+ repo_type="dataset"
815
+ )
816
+ print(f"βœ“ Saved leaderboard data to HuggingFace: {filename}")
817
+ return True
818
+ finally:
819
+ # Always clean up local file
820
+ if os.path.exists(filename):
821
+ os.remove(filename)
822
+
823
+ except Exception as e:
824
+ print(f"βœ— Error saving leaderboard data: {str(e)}")
825
+ import traceback
826
+ traceback.print_exc()
827
+ return False
828
+
829
+
830
  # =============================================================================
831
  # MAIN MINING FUNCTION
832
  # =============================================================================
 
925
  print(f" BigQuery queries executed: 1")
926
  print(f"{'='*80}\n")
927
 
928
+ # Construct and save leaderboard data
929
+ print(f"\n{'='*80}")
930
+ print(f"πŸ“Š Constructing and saving leaderboard data...")
931
+ print(f"{'='*80}\n")
932
+
933
+ try:
934
+ # Construct leaderboard
935
+ leaderboard_dict = construct_leaderboard_from_metadata()
936
+
937
+ # Calculate monthly metrics
938
+ print(f"\nπŸ“ˆ Calculating monthly metrics...")
939
+ monthly_metrics = calculate_monthly_metrics_by_agent()
940
+
941
+ # Save to HuggingFace
942
+ print(f"\nπŸ’Ύ Saving leaderboard data to HuggingFace...")
943
+ save_leaderboard_data_to_hf(leaderboard_dict, monthly_metrics)
944
+
945
+ print(f"\n{'='*80}")
946
+ print(f"βœ… Leaderboard data saved successfully!")
947
+ print(f" Leaderboard entries: {len(leaderboard_dict)}")
948
+ print(f" Monthly data points: {len(monthly_metrics.get('months', []))} months")
949
+ print(f" Saved to: {LEADERBOARD_REPO}/swe-review.json")
950
+ print(f"{'='*80}\n")
951
+
952
+ except Exception as e:
953
+ print(f"\nβœ— Failed to construct/save leaderboard data: {str(e)}")
954
+ import traceback
955
+ traceback.print_exc()
956
+
957
 
958
  # =============================================================================
959
  # ENTRY POINT