zhiminy commited on
Commit
e98a10e
Β·
1 Parent(s): 3dcd27a

update daily msr

Browse files
Files changed (1) hide show
  1. app.py +137 -369
app.py CHANGED
@@ -584,132 +584,6 @@ def update_pr_status(metadata_list, headers, token):
584
  return metadata_list
585
 
586
 
587
- def fetch_all_reviews_metadata(identifier, agent_name, token=None, start_from_date=None, exclude_dates=None):
588
- """
589
- Fetch PR reviews associated with a GitHub user or bot for the past 6 months.
590
- Returns lightweight metadata instead of full review objects.
591
-
592
- This function employs time-based partitioning to navigate GitHub's 1000-result limit per query.
593
- It searches using the query pattern:
594
- - reviewed-by:{identifier} (PR reviews by the agent)
595
-
596
- After fetching reviews, it updates PR status to determine if PRs were merged or closed.
597
-
598
- Args:
599
- identifier: GitHub username or bot identifier
600
- agent_name: Human-readable name of the agent for metadata purposes
601
- token: GitHub API token for authentication
602
- start_from_date: Only fetch reviews created after this date (for incremental updates)
603
- exclude_dates: Set of date objects to exclude from mining (dates that have already been processed)
604
-
605
- Returns:
606
- List of dictionaries containing minimal PR review metadata with PR status
607
- """
608
- headers = {'Authorization': f'token {token}'} if token else {}
609
-
610
- # Debug mode: limit review retrieval for testing
611
- debug_limit_per_pattern = 10 if DEBUG_MODE else None
612
-
613
- if DEBUG_MODE:
614
- print(f"\nπŸ› DEBUG MODE ENABLED: Limiting to {debug_limit_per_pattern} reviews per query pattern")
615
-
616
- # Define query pattern for PR reviews:
617
- query_patterns = []
618
-
619
- # Add reviewed-by pattern for PR reviews
620
- query_patterns.append(f'is:pr reviewed-by:{identifier}')
621
-
622
- # Use a dict to deduplicate PRs by URL
623
- prs_by_url = {}
624
-
625
- # Define time range: past 6 months only (or from start_from_date if specified)
626
- current_time = datetime.now(timezone.utc)
627
- six_months_ago = current_time - timedelta(days=180) # ~6 months
628
-
629
- if start_from_date:
630
- # Use start_from_date but ensure it's not older than 6 months
631
- start_date = max(start_from_date, six_months_ago)
632
- else:
633
- start_date = six_months_ago
634
-
635
- # End date is current time
636
- end_date = current_time
637
-
638
- for query_pattern in query_patterns:
639
- print(f"\nπŸ” Searching with query: {query_pattern}")
640
- print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
641
-
642
- pattern_start_time = time.time()
643
- initial_count = len(prs_by_url)
644
-
645
- # Fetch with time partitioning
646
- reviews_found = fetch_reviews_with_time_partition(
647
- query_pattern,
648
- start_date,
649
- end_date,
650
- headers,
651
- prs_by_url,
652
- debug_limit_per_pattern
653
- )
654
-
655
- pattern_duration = time.time() - pattern_start_time
656
- new_reviews = len(prs_by_url) - initial_count
657
-
658
- print(f" βœ“ Pattern complete: {new_reviews} new PRs found ({reviews_found} total fetched, {len(prs_by_url) - initial_count - (reviews_found - new_reviews)} duplicates)")
659
- print(f" ⏱️ Time taken: {pattern_duration:.1f} seconds")
660
-
661
- # Delay between different query patterns (shorter in debug mode)
662
- time.sleep(0.2 if DEBUG_MODE else 1.0)
663
-
664
- # Convert to lightweight metadata
665
- all_prs = list(prs_by_url.values())
666
-
667
- # Filter out PRs from excluded dates if specified
668
- if exclude_dates:
669
- filtered_prs = []
670
- excluded_count = 0
671
- for pr in all_prs:
672
- created_at = pr.get('created_at')
673
- if created_at:
674
- try:
675
- dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
676
- pr_date = dt.date()
677
- if pr_date not in exclude_dates:
678
- filtered_prs.append(pr)
679
- else:
680
- excluded_count += 1
681
- except Exception:
682
- filtered_prs.append(pr) # Keep PRs with unparseable dates
683
- else:
684
- filtered_prs.append(pr) # Keep PRs without created_at
685
-
686
- if excluded_count > 0:
687
- print(f" ⏭️ Skipped {excluded_count} PRs from already-mined dates")
688
- all_prs = filtered_prs
689
-
690
- if DEBUG_MODE:
691
- print(f"\nβœ… COMPLETE (DEBUG MODE): Found {len(all_prs)} unique PRs reviewed by {identifier}")
692
- print(f" Note: In production mode, this would fetch ALL PRs")
693
- else:
694
- print(f"\nβœ… COMPLETE: Found {len(all_prs)} unique PRs reviewed by {identifier}")
695
- print(f"πŸ“¦ Extracting minimal metadata and updating PR status...")
696
-
697
- # Extract metadata for each PR review
698
- metadata_list = [extract_review_metadata(pr) for pr in all_prs]
699
-
700
- # Update PR status to get current merged/closed state
701
- print(f"πŸ” Updating PR status for reviewed PRs...")
702
- metadata_list = update_pr_status(metadata_list, headers, token)
703
-
704
- # Calculate memory savings
705
- import sys
706
- original_size = sys.getsizeof(str(all_prs))
707
- metadata_size = sys.getsizeof(str(metadata_list))
708
- savings_pct = ((original_size - metadata_size) / original_size * 100) if original_size > 0 else 0
709
-
710
- print(f"πŸ’Ύ Memory efficiency: {original_size // 1024}KB β†’ {metadata_size // 1024}KB (saved {savings_pct:.1f}%)")
711
-
712
- return metadata_list
713
 
714
 
715
  def calculate_review_stats_from_metadata(metadata_list):
@@ -1197,59 +1071,6 @@ def get_daily_files_last_n_months(agent_identifier, n_months=6):
1197
  return []
1198
 
1199
 
1200
- def get_already_mined_dates(agent_identifier, n_months=6):
1201
- """
1202
- Get set of dates that have already been mined for an agent.
1203
-
1204
- Args:
1205
- agent_identifier: GitHub identifier of the agent
1206
- n_months: Number of months to look back (default: 6)
1207
-
1208
- Returns:
1209
- Set of date objects (datetime.date) that already have data files
1210
- """
1211
- try:
1212
- api = HfApi()
1213
-
1214
- # Calculate date range
1215
- today = datetime.now(timezone.utc)
1216
- n_months_ago = today - timedelta(days=30 * n_months)
1217
-
1218
- # List all files in the repository
1219
- files = api.list_repo_files(repo_id=REVIEW_METADATA_REPO, repo_type="dataset")
1220
-
1221
- # Filter for files in this agent's folder
1222
- agent_pattern = f"{agent_identifier}/"
1223
- agent_files = [f for f in files if f.startswith(agent_pattern) and f.endswith('.jsonl')]
1224
-
1225
- mined_dates = set()
1226
- for filename in agent_files:
1227
- try:
1228
- # Extract date from filename: [agent_identifier]/YYYY.MM.DD.jsonl
1229
- parts = filename.split('/')
1230
- if len(parts) != 2:
1231
- continue
1232
-
1233
- date_part = parts[1].replace('.jsonl', '') # Get YYYY.MM.DD
1234
- date_components = date_part.split('.')
1235
- if len(date_components) != 3:
1236
- continue
1237
-
1238
- file_year, file_month, file_day = map(int, date_components)
1239
- file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc).date()
1240
-
1241
- # Only include dates within the last n_months
1242
- if n_months_ago.date() <= file_date <= today.date():
1243
- mined_dates.add(file_date)
1244
- except Exception as e:
1245
- print(f" Warning: Could not parse date from filename {filename}: {e}")
1246
- continue
1247
-
1248
- return mined_dates
1249
-
1250
- except Exception as e:
1251
- print(f" Warning: Could not get already-mined dates for {agent_identifier}: {str(e)}")
1252
- return set()
1253
 
1254
 
1255
  def fetch_review_current_status(review_url, token):
@@ -1554,98 +1375,39 @@ def save_agent_to_hf(data):
1554
 
1555
  def update_all_agents_incremental():
1556
  """
1557
- Comprehensive update of review statistics for all agents.
1558
 
1559
  Strategy:
1560
- 1. For each agent, re-mine ALL reviews within the 6-month window
1561
- 2. This ensures review metadata is always fresh and up-to-date
1562
- 3. Critical for catching status changes (closed/reverted PRs)
1563
- 4. Overwrites existing day files with current data from GitHub API
1564
- 5. Store minimal metadata (not full review objects) to avoid storage limits
1565
- 6. Construct leaderboard from ALL stored metadata (last 6 months)
1566
-
1567
- Note: Unlike the old approach, this does NOT skip already-mined dates.
1568
- This is essential to prevent stale metadata (e.g., reviews closed after initial mining).
1569
-
1570
- Returns dictionary of all agent data with current stats.
1571
  """
1572
- token = get_github_token()
1573
-
1574
- # Load agent metadata from HuggingFace
1575
- agents = load_agents_from_hf()
1576
- if not agents:
1577
- print("No agents found in HuggingFace dataset")
1578
- return {}
1579
-
1580
- cache_dict = {}
1581
-
1582
- # Update each agent
1583
- for agent in agents:
1584
- identifier = agent.get('github_identifier')
1585
- agent_name = agent.get('agent_name', 'Unknown')
1586
-
1587
- if not identifier:
1588
- print(f"Warning: Skipping agent without identifier: {agent}")
1589
- continue
1590
-
1591
- try:
1592
- print(f"\n{'='*80}")
1593
- print(f"Processing: {agent_name} ({identifier})")
1594
- print(f"{'='*80}")
1595
-
1596
- # Get already-mined dates for this agent (last 6 months)
1597
- already_mined_dates = get_already_mined_dates(identifier, n_months=6)
1598
-
1599
- # Always re-mine ALL dates within 6-month window to ensure fresh data
1600
- # This is critical because review metadata can become stale:
1601
- # - PRs can be closed/reverted after initial mining
1602
- # - Status changes need to be captured in daily files
1603
- print(f"πŸ“… Re-mining ALL dates within 6-month window (including {len(already_mined_dates)} existing dates)")
1604
- print(f" This ensures all review metadata is up-to-date...")
1605
-
1606
- # Fetch ALL reviews (no exclusions) to refresh metadata
1607
- new_metadata = fetch_all_reviews_metadata(
1608
- identifier,
1609
- agent_name,
1610
- token,
1611
- start_from_date=None, # Use full 6-month range
1612
- exclude_dates=None # DO NOT exclude - always refresh everything
1613
- )
1614
-
1615
- if new_metadata:
1616
- # Save new metadata to HuggingFace (organized by agent_identifier/YYYY.MM.DD.jsonl)
1617
- print(f"πŸ’Ύ Saving {len(new_metadata)} new review records...")
1618
- save_review_metadata_to_hf(new_metadata, identifier)
1619
- else:
1620
- print(f" No new reviews to save")
1621
-
1622
- # Load ALL metadata to calculate stats (aggregates entire last 6 months)
1623
- print(f"πŸ“Š Calculating statistics from ALL stored metadata (last 6 months)...")
1624
- all_metadata = load_review_metadata()
1625
 
1626
- # Filter for this specific agent
1627
- agent_metadata = [review for review in all_metadata if review.get("agent_identifier") == identifier]
 
1628
 
1629
- # Calculate stats from metadata
1630
- stats = calculate_review_stats_from_metadata(agent_metadata)
 
1631
 
1632
- # Merge metadata with stats
1633
- cache_dict[identifier] = {
1634
- 'agent_name': agent_name,
1635
- 'website': agent.get('website', 'N/A'),
1636
- 'github_identifier': identifier,
1637
- **stats
1638
- }
1639
 
1640
- print(f"βœ“ Updated {identifier}: {stats['total_reviews']} reviews, {stats['acceptance_rate']}% acceptance rate")
1641
 
1642
- except Exception as e:
1643
- print(f"βœ— Error updating {identifier}: {str(e)}")
1644
- import traceback
1645
- traceback.print_exc()
1646
- continue
1647
-
1648
- return cache_dict
1649
 
1650
 
1651
  def construct_leaderboard_from_metadata():
@@ -1687,57 +1449,6 @@ def construct_leaderboard_from_metadata():
1687
  return cache_dict
1688
 
1689
 
1690
- def initialize_data():
1691
- """
1692
- Initialize data on application startup.
1693
- Constructs leaderboard from review metadata.
1694
-
1695
- In DEBUG MODE:
1696
- - If no data available, automatically mine up to 10 reviews per query per agent
1697
- - Does NOT save to HuggingFace datasets
1698
- """
1699
- print("πŸš€ Initializing leaderboard data...")
1700
-
1701
- # Try constructing from review metadata (fast, memory-efficient)
1702
- print(f"πŸ“‚ Checking {REVIEW_METADATA_REPO} for existing data...")
1703
- try:
1704
- cache_dict = construct_leaderboard_from_metadata()
1705
- # Check if there's actually meaningful data (at least one agent with reviews)
1706
- has_data = any(entry.get('total_reviews', 0) > 0 for entry in cache_dict.values())
1707
- if cache_dict and has_data:
1708
- print(f"βœ“ Found existing review metadata. Leaderboard constructed from {REVIEW_METADATA_REPO}")
1709
- return
1710
- else:
1711
- print(f" No meaningful data found in {REVIEW_METADATA_REPO}")
1712
- except Exception as e:
1713
- print(f" Could not construct from metadata: {e}")
1714
-
1715
- # If in debug mode and no data available, mine immediately
1716
- if DEBUG_MODE:
1717
- print("\nπŸ› DEBUG MODE: No data available, mining immediately (up to 10 reviews per query per agent)...")
1718
- agents = load_agents_from_hf()
1719
- if agents:
1720
- print(f"βœ“ Loaded {len(agents)} agents from HuggingFace")
1721
- print("⛏️ Mining GitHub data in debug mode (limited to 10 reviews per query)...")
1722
- cache_dict = update_all_agents_incremental()
1723
- print("βœ“ Debug mining complete (data NOT saved to HuggingFace)")
1724
- return
1725
- else:
1726
- print("⚠️ No agents found. Waiting for first submission...")
1727
- return
1728
-
1729
- # Production mode: Fallback to full incremental mining from GitHub
1730
- agents = load_agents_from_hf()
1731
- if agents:
1732
- print(f"βœ“ Loaded {len(agents)} agents from HuggingFace")
1733
- print("⛏️ Mining GitHub data (this may take a while)...")
1734
- cache_dict = update_all_agents_incremental()
1735
- return
1736
-
1737
- # No data available
1738
- print("⚠️ No data sources available. Waiting for first submission...")
1739
-
1740
-
1741
  # =============================================================================
1742
  # UI FUNCTIONS
1743
  # =============================================================================
@@ -1953,72 +1664,131 @@ def submit_agent(identifier, agent_name, organization, description, website):
1953
  if not save_agent_to_hf(submission):
1954
  return "❌ Failed to save submission", get_leaderboard_dataframe(), create_monthly_metrics_plot()
1955
 
1956
- # Fetch review metadata immediately (memory-efficient)
1957
- token = get_github_token()
1958
- try:
1959
- print(f"Fetching review metadata for {agent_name}...")
1960
-
1961
- # Fetch lightweight metadata
1962
- metadata_list = fetch_all_reviews_metadata(identifier, agent_name, token)
1963
-
1964
- if metadata_list:
1965
- # Save metadata to HuggingFace
1966
- save_review_metadata_to_hf(metadata_list, identifier)
1967
-
1968
- # Calculate stats from metadata
1969
- stats = calculate_review_stats_from_metadata(metadata_list)
1970
-
1971
- return f"βœ… Successfully submitted {agent_name}! Stats: {stats['total_reviews']} reviews, {stats['acceptance_rate']}% acceptance rate", get_leaderboard_dataframe(), create_monthly_metrics_plot()
1972
-
1973
- except Exception as e:
1974
- error_msg = f"⚠️ Submitted {agent_name}, but failed to fetch review data: {str(e)}"
1975
- print(error_msg)
1976
- import traceback
1977
- traceback.print_exc()
1978
- return error_msg, get_leaderboard_dataframe(), create_monthly_metrics_plot()
1979
 
1980
 
1981
  # =============================================================================
1982
  # BACKGROUND TASKS
1983
  # =============================================================================
1984
 
1985
- def daily_update_task():
1986
  """
1987
- Daily scheduled task (runs at 12:00 AM UTC) for comprehensive review mining.
1988
 
1989
  Strategy:
1990
- 1. Re-mine ALL reviews within the 6-month window for all agents
1991
- 2. This ensures review metadata is always fresh, catching:
1992
- - PRs that were closed/reverted since last mining
1993
- - Status changes (is_closed, state_reason, closed_at)
1994
- - Any other metadata updates
1995
- 3. Updates ALL day files within LEADERBOARD_TIME_FRAME_DAYS
1996
-
1997
- Unlike the old selective refresh approach, this guarantees no stale data.
1998
  """
1999
- print(f"\n{'='*80}")
2000
- print(f"πŸ•› Daily Regular Mining started at {datetime.now(timezone.utc).isoformat()}")
2001
- print(f"{'='*80}")
2002
 
2003
- try:
2004
- # Run full incremental update for all agents
2005
- # This will re-mine everything in the 6-month window
2006
- print(f"πŸ“‹ Starting comprehensive re-mining of all agents (6-month window)...")
 
2007
 
2008
- update_all_agents_incremental()
 
 
 
 
2009
 
2010
- print(f"\n{'='*80}")
2011
- print(f"πŸ“Š Mining Summary:")
2012
- print(f" All agents re-mined successfully within 6-month window")
2013
- print(f" All review metadata refreshed and up-to-date")
2014
- print(f"{'='*80}")
2015
 
2016
- print(f"\nβœ… Daily Regular Mining completed at {datetime.now(timezone.utc).isoformat()}")
 
 
2017
 
2018
- except Exception as e:
2019
- print(f"βœ— Daily update failed: {str(e)}")
2020
- import traceback
2021
- traceback.print_exc()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2022
 
2023
 
2024
  # =============================================================================
@@ -2047,19 +1817,17 @@ else:
2047
  print(" (Explicitly set via '--no-debug' flag)")
2048
  print()
2049
 
2050
- initialize_data()
2051
-
2052
  # Start APScheduler for daily updates at 12:00 AM UTC
2053
  scheduler = BackgroundScheduler(timezone="UTC")
2054
  scheduler.add_job(
2055
- daily_update_task,
2056
  trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
2057
  id='daily_review_mining',
2058
  name='Daily Regular Review Mining',
2059
  replace_existing=True
2060
  )
2061
  scheduler.start()
2062
- print("βœ“ Scheduler started: Daily Regular Mining at 12:00 AM UTC (re-mines all reviews within 6-month window)")
2063
 
2064
  # Create Gradio interface
2065
  with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app:
 
584
  return metadata_list
585
 
586
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
 
588
 
589
  def calculate_review_stats_from_metadata(metadata_list):
 
1071
  return []
1072
 
1073
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1074
 
1075
 
1076
  def fetch_review_current_status(review_url, token):
 
1375
 
1376
  def update_all_agents_incremental():
1377
  """
1378
+ Daily scheduled task for incremental review mining and statistics update.
1379
 
1380
  Strategy:
1381
+ 1. Update PR status for all existing metadata (last LEADERBOARD_TIME_FRAME_DAYS - 1)
1382
+ 2. Fetch yesterday's new reviews
1383
+ 3. Save all updated/new metadata back to HuggingFace
1384
+ 4. Reload statistics from updated metadata
 
 
 
 
 
 
 
1385
  """
1386
+ print(f"\n{'='*80}")
1387
+ print(f"πŸ•› Daily Incremental Update started at {datetime.now(timezone.utc).isoformat()}")
1388
+ print(f"{'='*80}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1389
 
1390
+ try:
1391
+ # Fetch and update reviews
1392
+ fetch_and_update_daily_reviews()
1393
 
1394
+ # Reload statistics from updated metadata
1395
+ print(f"\nπŸ“‹ Reloading statistics from updated review metadata...")
1396
+ construct_leaderboard_from_metadata()
1397
 
1398
+ print(f"\n{'='*80}")
1399
+ print(f"πŸ“Š Update Summary:")
1400
+ print(f" βœ“ Updated existing review statuses")
1401
+ print(f" βœ“ Fetched yesterday's new reviews")
1402
+ print(f" βœ“ Statistics reloaded")
1403
+ print(f"{'='*80}")
 
1404
 
1405
+ print(f"\nβœ… Daily Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
1406
 
1407
+ except Exception as e:
1408
+ print(f"βœ— Daily update failed: {str(e)}")
1409
+ import traceback
1410
+ traceback.print_exc()
 
 
 
1411
 
1412
 
1413
  def construct_leaderboard_from_metadata():
 
1449
  return cache_dict
1450
 
1451
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1452
  # =============================================================================
1453
  # UI FUNCTIONS
1454
  # =============================================================================
 
1664
  if not save_agent_to_hf(submission):
1665
  return "❌ Failed to save submission", get_leaderboard_dataframe(), create_monthly_metrics_plot()
1666
 
1667
+ # Return success message - data will be populated by daily incremental updates
1668
+ return f"βœ… Successfully submitted {agent_name}! Review data will be populated by the next daily incremental update.", get_leaderboard_dataframe(), create_monthly_metrics_plot()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1669
 
1670
 
1671
  # =============================================================================
1672
  # BACKGROUND TASKS
1673
  # =============================================================================
1674
 
1675
+ def fetch_and_update_daily_reviews():
1676
  """
1677
+ Fetch and update reviews with comprehensive status checking.
1678
 
1679
  Strategy:
1680
+ 1. For each agent:
1681
+ - Examine ALL open reviews from last LEADERBOARD_TIME_FRAME_DAYS - 1 for their closed_at status
1682
+ - Update PR status for all existing metadata (last LEADERBOARD_TIME_FRAME_DAYS - 1)
1683
+ - Fetch new reviews from yesterday 12am to today 12am
1684
+ - Save all updated/new metadata back to HuggingFace
 
 
 
1685
  """
1686
+ token = get_github_token()
1687
+ headers = {'Authorization': f'token {token}'} if token else {}
 
1688
 
1689
+ # Load all agents
1690
+ agents = load_agents_from_hf()
1691
+ if not agents:
1692
+ print("No agents found in HuggingFace dataset")
1693
+ return
1694
 
1695
+ # Calculate date range
1696
+ today_utc = datetime.now(timezone.utc)
1697
+ today_midnight = datetime.combine(today_utc.date(), datetime.min.time(), tzinfo=timezone.utc)
1698
+ yesterday_midnight = today_midnight - timedelta(days=1)
1699
+ cutoff_date = today_midnight - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS - 1)
1700
 
1701
+ print(f"πŸ“… Time Range Configuration:")
1702
+ print(f" Yesterday 12am UTC: {yesterday_midnight.isoformat()}")
1703
+ print(f" Today 12am UTC: {today_midnight.isoformat()}")
1704
+ print(f" Cutoff for existing reviews: {cutoff_date.isoformat()}")
1705
+ print(f" Examining reviews from: {cutoff_date.date()} to {today_midnight.date()}")
1706
 
1707
+ for agent in agents:
1708
+ identifier = agent.get('github_identifier')
1709
+ agent_name = agent.get('agent_name', 'Unknown')
1710
 
1711
+ if not identifier:
1712
+ print(f"Warning: Skipping agent without identifier: {agent}")
1713
+ continue
1714
+
1715
+ try:
1716
+ print(f"\n{'='*60}")
1717
+ print(f"Processing: {agent_name} ({identifier})")
1718
+ print(f"{'='*60}")
1719
+
1720
+ # Step 1: Load all existing metadata within timeframe
1721
+ print(f"πŸ“Š Loading existing metadata from last {LEADERBOARD_TIME_FRAME_DAYS - 1} days...")
1722
+ all_metadata = load_review_metadata()
1723
+ agent_metadata = [r for r in all_metadata if r.get("agent_identifier") == identifier]
1724
+
1725
+ # Filter to last LEADERBOARD_TIME_FRAME_DAYS - 1 days (from cutoff to today)
1726
+ recent_metadata = []
1727
+ for review in agent_metadata:
1728
+ reviewed_at = review.get('reviewed_at', '')
1729
+ if reviewed_at:
1730
+ try:
1731
+ review_date = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
1732
+ if cutoff_date <= review_date < today_midnight:
1733
+ recent_metadata.append(review)
1734
+ except Exception as e:
1735
+ print(f" Warning: Could not parse date '{reviewed_at}': {e}")
1736
+ continue
1737
+
1738
+ print(f" βœ“ Loaded {len(recent_metadata)} existing reviews from timeframe")
1739
+
1740
+ # Step 2: Examine ALL open reviews for their closed_at status
1741
+ # This ensures we capture any reviews that may have been closed/merged since last check
1742
+ if recent_metadata:
1743
+ print(f"πŸ” Examining {len(recent_metadata)} open reviews for status updates (checking closed_at)...")
1744
+ recent_metadata = update_pr_status(recent_metadata, headers, token)
1745
+ print(f" βœ“ Updated PR status for existing reviews")
1746
+
1747
+ # Step 3: Fetch NEW reviews from yesterday 12am to today 12am
1748
+ print(f"πŸ” Fetching new reviews from {yesterday_midnight.isoformat()} to {today_midnight.isoformat()}...")
1749
+
1750
+ base_query = f'is:pr review:approved author:{identifier} -is:draft'
1751
+ prs_by_url = {}
1752
+
1753
+ fetch_reviews_with_time_partition(
1754
+ base_query,
1755
+ yesterday_midnight,
1756
+ today_midnight,
1757
+ headers,
1758
+ prs_by_url,
1759
+ debug_limit=None
1760
+ )
1761
+
1762
+ # Extract metadata for new reviews
1763
+ yesterday_metadata = []
1764
+ for pr_url, pr in prs_by_url.items():
1765
+ metadata = extract_review_metadata(pr)
1766
+ if metadata:
1767
+ metadata['agent_identifier'] = identifier
1768
+ yesterday_metadata.append(metadata)
1769
+
1770
+ print(f" βœ“ Found {len(yesterday_metadata)} new reviews in 24-hour window")
1771
+
1772
+ # Step 4: Update PR status for new reviews
1773
+ if yesterday_metadata:
1774
+ print(f" Updating PR status for {len(yesterday_metadata)} new reviews...")
1775
+ yesterday_metadata = update_pr_status(yesterday_metadata, headers, token)
1776
+
1777
+ # Step 5: Combine and save all metadata
1778
+ all_updated_metadata = recent_metadata + yesterday_metadata
1779
+
1780
+ if all_updated_metadata:
1781
+ print(f"πŸ’Ύ Saving {len(all_updated_metadata)} total reviews to HuggingFace...")
1782
+ save_review_metadata_to_hf(all_updated_metadata, identifier)
1783
+ print(f"βœ“ Updated {identifier}: {len(recent_metadata)} existing (status checked) + {len(yesterday_metadata)} new = {len(all_updated_metadata)} total")
1784
+ else:
1785
+ print(f" No reviews to save for {identifier}")
1786
+
1787
+ except Exception as e:
1788
+ print(f"βœ— Error processing {identifier}: {str(e)}")
1789
+ import traceback
1790
+ traceback.print_exc()
1791
+ continue
1792
 
1793
 
1794
  # =============================================================================
 
1817
  print(" (Explicitly set via '--no-debug' flag)")
1818
  print()
1819
 
 
 
1820
  # Start APScheduler for daily updates at 12:00 AM UTC
1821
  scheduler = BackgroundScheduler(timezone="UTC")
1822
  scheduler.add_job(
1823
+ update_all_agents_incremental,
1824
  trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
1825
  id='daily_review_mining',
1826
  name='Daily Regular Review Mining',
1827
  replace_existing=True
1828
  )
1829
  scheduler.start()
1830
+ print("βœ“ Scheduler started: Daily Incremental Update at 12:00 AM UTC (updates existing metadata + mines yesterday's reviews)")
1831
 
1832
  # Create Gradio interface
1833
  with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app: