zhiminy commited on
Commit
47008cd
·
1 Parent(s): 5a89f16
Files changed (1) hide show
  1. app.py +102 -161
app.py CHANGED
@@ -2065,8 +2065,98 @@ def mine_all_agents():
2065
  print(f"{'='*80}")
2066
 
2067
  try:
2068
- # Fetch and update reviews
2069
- fetch_and_update_weekly_reviews()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2070
 
2071
  # After mining is complete, save leaderboard and metrics to HuggingFace
2072
  print(f"\n📤 Uploading leaderboard and metrics data...")
@@ -2093,7 +2183,7 @@ def mine_all_agents():
2093
  print(f"\n✅ Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
2094
 
2095
  except Exception as e:
2096
- print(f"✗ Weekly update failed: {str(e)}")
2097
  import traceback
2098
  traceback.print_exc()
2099
 
@@ -2435,177 +2525,28 @@ def submit_agent(identifier, agent_name, developer, website):
2435
  return f"✅ Successfully submitted {agent_name}! Review data will be populated by the next daily incremental update.", get_leaderboard_dataframe()
2436
 
2437
 
2438
- # =============================================================================
2439
- # BACKGROUND TASKS
2440
- # =============================================================================
2441
-
2442
- def fetch_and_update_weekly_reviews():
2443
- """
2444
- Fetch and update reviews with comprehensive status checking using BigQuery.
2445
-
2446
- Strategy:
2447
- 1. For each agent:
2448
- - Examine ALL open reviews from last LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS for their closed_at status
2449
- - Update PR status for all existing metadata using BigQuery (last LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS)
2450
- - Fetch new reviews from last UPDATE_TIME_FRAME_DAYS days using BigQuery
2451
- - Save all updated/new metadata back to HuggingFace
2452
- """
2453
- # Initialize BigQuery client
2454
- try:
2455
- client = get_bigquery_client()
2456
- except Exception as e:
2457
- print(f"✗ Failed to initialize BigQuery client: {str(e)}")
2458
- return
2459
-
2460
- # Load all agents
2461
- agents = load_agents_from_hf()
2462
- if not agents:
2463
- print("No agents found in HuggingFace dataset")
2464
- return
2465
-
2466
- # Calculate date range
2467
- today_utc = datetime.now(timezone.utc)
2468
- today_midnight = datetime.combine(today_utc.date(), datetime.min.time(), tzinfo=timezone.utc)
2469
- update_start_midnight = today_midnight - timedelta(days=UPDATE_TIME_FRAME_DAYS)
2470
- cutoff_date = today_midnight - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS)
2471
-
2472
- print(f"📅 Time Range Configuration:")
2473
- print(f" Update period start (12am UTC): {update_start_midnight.isoformat()}")
2474
- print(f" Today 12am UTC: {today_midnight.isoformat()}")
2475
- print(f" Cutoff for existing reviews: {cutoff_date.isoformat()}")
2476
- print(f" Examining reviews from: {cutoff_date.date()} to {today_midnight.date()}")
2477
-
2478
- for agent in agents:
2479
- identifier = agent.get('github_identifier')
2480
- agent_name = agent.get('name', 'Unknown')
2481
-
2482
- if not identifier:
2483
- print(f"Warning: Skipping agent without identifier: {agent}")
2484
- continue
2485
-
2486
- try:
2487
- print(f"\n{'='*60}")
2488
- print(f"Processing: {agent_name} ({identifier})")
2489
- print(f"{'='*60}")
2490
-
2491
- # Step 1: Load all existing metadata within timeframe
2492
- print(f"📊 Loading existing metadata from last {LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS} days...")
2493
- all_metadata = load_review_metadata()
2494
- agent_metadata = [r for r in all_metadata if r.get("agent_identifier") == identifier]
2495
-
2496
- # Filter to last (LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS) days (from cutoff to today)
2497
- recent_metadata = []
2498
- for review in agent_metadata:
2499
- reviewed_at = review.get('reviewed_at', '')
2500
- if reviewed_at:
2501
- try:
2502
- review_date = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
2503
- if cutoff_date <= review_date < today_midnight:
2504
- recent_metadata.append(review)
2505
- except Exception as e:
2506
- print(f" Warning: Could not parse date '{reviewed_at}': {e}")
2507
- continue
2508
-
2509
- print(f" ✓ Loaded {len(recent_metadata)} existing reviews from timeframe")
2510
-
2511
- # Step 2: Fetch NEW reviews from last UPDATE_TIME_FRAME_DAYS to today using BigQuery
2512
- print(f"🔍 Fetching new reviews from {update_start_midnight.isoformat()} to {today_midnight.isoformat()} using BigQuery...")
2513
-
2514
- review_rows = fetch_reviews_from_bigquery(client, identifier, update_start_midnight, today_midnight)
2515
-
2516
- # Extract unique PRs
2517
- urls = list(set([row.url for row in review_rows if row.url]))
2518
- print(f" Found {len(review_rows)} review events across {len(urls)} unique PRs")
2519
-
2520
- # Extract metadata for new reviews
2521
- weekly_metadata = []
2522
- seen_prs = set()
2523
- for row in review_rows:
2524
- url = row.url
2525
- if url in seen_prs:
2526
- continue
2527
- seen_prs.add(url)
2528
-
2529
- metadata = extract_review_metadata_from_bigquery(row)
2530
- metadata['agent_identifier'] = identifier
2531
- weekly_metadata.append(metadata)
2532
-
2533
- print(f" ✓ Found {len(weekly_metadata)} unique PRs in {UPDATE_TIME_FRAME_DAYS}-day window")
2534
-
2535
- # Step 3: Combine and save all metadata
2536
- all_updated_metadata = recent_metadata + weekly_metadata
2537
-
2538
- if all_updated_metadata:
2539
- print(f"💾 Saving {len(all_updated_metadata)} total reviews to HuggingFace...")
2540
- save_review_metadata_to_hf(all_updated_metadata, identifier)
2541
- print(f"✓ Updated {identifier}: {len(recent_metadata)} existing + {len(weekly_metadata)} new = {len(all_updated_metadata)} total")
2542
- else:
2543
- print(f" No reviews to save for {identifier}")
2544
-
2545
- except Exception as e:
2546
- print(f"✗ Error processing {identifier}: {str(e)}")
2547
- import traceback
2548
- traceback.print_exc()
2549
- continue
2550
-
2551
-
2552
- # =============================================================================
2553
- # STARTUP & INITIALIZATION
2554
- # =============================================================================
2555
-
2556
- def initialize_leaderboard_data():
2557
- """
2558
- Initialize leaderboard data on startup.
2559
- If saved data doesn't exist, construct from metadata and save.
2560
- """
2561
- print(f"\n{'='*80}")
2562
- print(f"🚀 Initializing leaderboard data...")
2563
- print(f"{'='*80}\n")
2564
-
2565
- # Try loading from saved dataset
2566
- saved_data = load_leaderboard_data_from_hf()
2567
-
2568
- if saved_data:
2569
- print(f"✓ Leaderboard data already exists (last updated: {saved_data.get('last_updated', 'Unknown')})")
2570
- else:
2571
- print(f"⚠️ No saved leaderboard data found. Constructing from metadata...")
2572
- try:
2573
- # Save leaderboard and metrics to HuggingFace
2574
- if save_leaderboard_and_metrics_to_hf():
2575
- print(f"✓ Initial leaderboard data created and saved")
2576
- else:
2577
- print(f"⚠️ Failed to save initial leaderboard data")
2578
- except Exception as e:
2579
- print(f"✗ Failed to initialize leaderboard data: {str(e)}")
2580
- import traceback
2581
- traceback.print_exc()
2582
-
2583
- print(f"\n{'='*80}")
2584
- print(f"✓ Leaderboard initialization complete")
2585
- print(f"{'='*80}\n")
2586
-
2587
-
2588
  # =============================================================================
2589
  # GRADIO APPLICATION
2590
  # =============================================================================
2591
 
2592
- # Initialize leaderboard data on startup
2593
- initialize_leaderboard_data()
 
2594
 
2595
- # Start APScheduler for incremental updates at 12:00 AM UTC every Monday
2596
  scheduler = BackgroundScheduler(timezone="UTC")
2597
  scheduler.add_job(
2598
  mine_all_agents,
2599
- trigger=CronTrigger(day_of_week='mon', hour=0, minute=0), # 12:00 AM UTC every Monday
2600
- id='incremental_review_mining',
2601
- name='Incremental Review Mining',
2602
  replace_existing=True
2603
  )
2604
  scheduler.start()
2605
  print(f"\n{'='*80}")
2606
  print(f"✓ Scheduler initialized successfully")
2607
- print(f"⛏️ Mining schedule: Every Monday at 12:00 AM UTC")
2608
- print(f"📥 On startup: Loads cached data from {LEADERBOARD_REPO}")
2609
  print(f"{'='*80}\n")
2610
 
2611
  # Create Gradio interface
 
2065
  print(f"{'='*80}")
2066
 
2067
  try:
2068
+ client = get_bigquery_client()
2069
+
2070
+ # Load all agents
2071
+ agents = load_agents_from_hf()
2072
+ if not agents:
2073
+ print("No agents found in HuggingFace dataset")
2074
+ return
2075
+
2076
+ # Calculate date range
2077
+ today_utc = datetime.now(timezone.utc)
2078
+ today_midnight = datetime.combine(today_utc.date(), datetime.min.time(), tzinfo=timezone.utc)
2079
+ update_start_midnight = today_midnight - timedelta(days=UPDATE_TIME_FRAME_DAYS)
2080
+ cutoff_date = today_midnight - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS)
2081
+
2082
+ print(f"📅 Time Range Configuration:")
2083
+ print(f" Update period start (12am UTC): {update_start_midnight.isoformat()}")
2084
+ print(f" Today 12am UTC: {today_midnight.isoformat()}")
2085
+ print(f" Cutoff for existing reviews: {cutoff_date.isoformat()}")
2086
+ print(f" Examining reviews from: {cutoff_date.date()} to {today_midnight.date()}")
2087
+
2088
+ for agent in agents:
2089
+ identifier = agent.get('github_identifier')
2090
+ agent_name = agent.get('name', 'Unknown')
2091
+
2092
+ if not identifier:
2093
+ print(f"Warning: Skipping agent without identifier: {agent}")
2094
+ continue
2095
+
2096
+ try:
2097
+ print(f"\n{'='*60}")
2098
+ print(f"Processing: {agent_name} ({identifier})")
2099
+ print(f"{'='*60}")
2100
+
2101
+ # Step 1: Load all existing metadata within timeframe
2102
+ print(f"📊 Loading existing metadata from last {LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS} days...")
2103
+ all_metadata = load_review_metadata()
2104
+ agent_metadata = [r for r in all_metadata if r.get("agent_identifier") == identifier]
2105
+
2106
+ # Filter to last (LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS) days (from cutoff to today)
2107
+ recent_metadata = []
2108
+ for review in agent_metadata:
2109
+ reviewed_at = review.get('reviewed_at', '')
2110
+ if reviewed_at:
2111
+ try:
2112
+ review_date = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
2113
+ if cutoff_date <= review_date < today_midnight:
2114
+ recent_metadata.append(review)
2115
+ except Exception as e:
2116
+ print(f" Warning: Could not parse date '{reviewed_at}': {e}")
2117
+ continue
2118
+
2119
+ print(f" ✓ Loaded {len(recent_metadata)} existing reviews from timeframe")
2120
+
2121
+ # Step 2: Fetch NEW reviews from last UPDATE_TIME_FRAME_DAYS to today using BigQuery
2122
+ print(f"🔍 Fetching new reviews from {update_start_midnight.isoformat()} to {today_midnight.isoformat()} using BigQuery...")
2123
+
2124
+ review_rows = fetch_reviews_from_bigquery(client, identifier, update_start_midnight, today_midnight)
2125
+
2126
+ # Extract unique PRs
2127
+ urls = list(set([row.url for row in review_rows if row.url]))
2128
+ print(f" Found {len(review_rows)} review events across {len(urls)} unique PRs")
2129
+
2130
+ # Extract metadata for new reviews
2131
+ review_metadata = []
2132
+ seen_prs = set()
2133
+ for row in review_rows:
2134
+ url = row.url
2135
+ if url in seen_prs:
2136
+ continue
2137
+ seen_prs.add(url)
2138
+
2139
+ metadata = extract_review_metadata_from_bigquery(row)
2140
+ metadata['agent_identifier'] = identifier
2141
+ review_metadata.append(metadata)
2142
+
2143
+ print(f" ✓ Found {len(review_metadata)} unique PRs in {UPDATE_TIME_FRAME_DAYS}-day window")
2144
+
2145
+ # Step 3: Combine and save all metadata
2146
+ all_updated_metadata = recent_metadata + review_metadata
2147
+
2148
+ if all_updated_metadata:
2149
+ print(f"💾 Saving {len(all_updated_metadata)} total reviews to HuggingFace...")
2150
+ save_review_metadata_to_hf(all_updated_metadata, identifier)
2151
+ print(f"✓ Updated {identifier}: {len(recent_metadata)} existing + {len(review_metadata)} new = {len(all_updated_metadata)} total")
2152
+ else:
2153
+ print(f" No reviews to save for {identifier}")
2154
+
2155
+ except Exception as e:
2156
+ print(f"✗ Error processing {identifier}: {str(e)}")
2157
+ import traceback
2158
+ traceback.print_exc()
2159
+ continue
2160
 
2161
  # After mining is complete, save leaderboard and metrics to HuggingFace
2162
  print(f"\n📤 Uploading leaderboard and metrics data...")
 
2183
  print(f"\n✅ Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
2184
 
2185
  except Exception as e:
2186
+ print(f"✗ Monthly update failed: {str(e)}")
2187
  import traceback
2188
  traceback.print_exc()
2189
 
 
2525
  return f"✅ Successfully submitted {agent_name}! Review data will be populated by the next daily incremental update.", get_leaderboard_dataframe()
2526
 
2527
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2528
  # =============================================================================
2529
  # GRADIO APPLICATION
2530
  # =============================================================================
2531
 
2532
+ print(f"\n🚀 Starting SWE Agent PR Leaderboard")
2533
+ print(f" Leaderboard time frame: {LEADERBOARD_TIME_FRAME_DAYS} days ({LEADERBOARD_TIME_FRAME_DAYS // 30} months)")
2534
+ print(f" Mining update frequency: Every {UPDATE_TIME_FRAME_DAYS} days\n")
2535
 
2536
+ # Start APScheduler for monthly PR mining at 12:00 AM UTC every 1st of the month
2537
  scheduler = BackgroundScheduler(timezone="UTC")
2538
  scheduler.add_job(
2539
  mine_all_agents,
2540
+ trigger=CronTrigger(day=1, hour=0, minute=0), # 12:00 AM UTC every 1st of the month
2541
+ id='monthly_review_mining',
2542
+ name='Monthly Review Mining',
2543
  replace_existing=True
2544
  )
2545
  scheduler.start()
2546
  print(f"\n{'='*80}")
2547
  print(f"✓ Scheduler initialized successfully")
2548
+ print(f"⛏️ Mining schedule: Every 1st of the month at 12:00 AM UTC")
2549
+ print(f"📥 On startup: Only loads cached data from HuggingFace (no mining)")
2550
  print(f"{'='*80}\n")
2551
 
2552
  # Create Gradio interface