Spaces:
Sleeping
Sleeping
update daily msr
Browse files
app.py
CHANGED
|
@@ -584,132 +584,6 @@ def update_pr_status(metadata_list, headers, token):
|
|
| 584 |
return metadata_list
|
| 585 |
|
| 586 |
|
| 587 |
-
def fetch_all_reviews_metadata(identifier, agent_name, token=None, start_from_date=None, exclude_dates=None):
|
| 588 |
-
"""
|
| 589 |
-
Fetch PR reviews associated with a GitHub user or bot for the past 6 months.
|
| 590 |
-
Returns lightweight metadata instead of full review objects.
|
| 591 |
-
|
| 592 |
-
This function employs time-based partitioning to navigate GitHub's 1000-result limit per query.
|
| 593 |
-
It searches using the query pattern:
|
| 594 |
-
- reviewed-by:{identifier} (PR reviews by the agent)
|
| 595 |
-
|
| 596 |
-
After fetching reviews, it updates PR status to determine if PRs were merged or closed.
|
| 597 |
-
|
| 598 |
-
Args:
|
| 599 |
-
identifier: GitHub username or bot identifier
|
| 600 |
-
agent_name: Human-readable name of the agent for metadata purposes
|
| 601 |
-
token: GitHub API token for authentication
|
| 602 |
-
start_from_date: Only fetch reviews created after this date (for incremental updates)
|
| 603 |
-
exclude_dates: Set of date objects to exclude from mining (dates that have already been processed)
|
| 604 |
-
|
| 605 |
-
Returns:
|
| 606 |
-
List of dictionaries containing minimal PR review metadata with PR status
|
| 607 |
-
"""
|
| 608 |
-
headers = {'Authorization': f'token {token}'} if token else {}
|
| 609 |
-
|
| 610 |
-
# Debug mode: limit review retrieval for testing
|
| 611 |
-
debug_limit_per_pattern = 10 if DEBUG_MODE else None
|
| 612 |
-
|
| 613 |
-
if DEBUG_MODE:
|
| 614 |
-
print(f"\nπ DEBUG MODE ENABLED: Limiting to {debug_limit_per_pattern} reviews per query pattern")
|
| 615 |
-
|
| 616 |
-
# Define query pattern for PR reviews:
|
| 617 |
-
query_patterns = []
|
| 618 |
-
|
| 619 |
-
# Add reviewed-by pattern for PR reviews
|
| 620 |
-
query_patterns.append(f'is:pr reviewed-by:{identifier}')
|
| 621 |
-
|
| 622 |
-
# Use a dict to deduplicate PRs by URL
|
| 623 |
-
prs_by_url = {}
|
| 624 |
-
|
| 625 |
-
# Define time range: past 6 months only (or from start_from_date if specified)
|
| 626 |
-
current_time = datetime.now(timezone.utc)
|
| 627 |
-
six_months_ago = current_time - timedelta(days=180) # ~6 months
|
| 628 |
-
|
| 629 |
-
if start_from_date:
|
| 630 |
-
# Use start_from_date but ensure it's not older than 6 months
|
| 631 |
-
start_date = max(start_from_date, six_months_ago)
|
| 632 |
-
else:
|
| 633 |
-
start_date = six_months_ago
|
| 634 |
-
|
| 635 |
-
# End date is current time
|
| 636 |
-
end_date = current_time
|
| 637 |
-
|
| 638 |
-
for query_pattern in query_patterns:
|
| 639 |
-
print(f"\nπ Searching with query: {query_pattern}")
|
| 640 |
-
print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}")
|
| 641 |
-
|
| 642 |
-
pattern_start_time = time.time()
|
| 643 |
-
initial_count = len(prs_by_url)
|
| 644 |
-
|
| 645 |
-
# Fetch with time partitioning
|
| 646 |
-
reviews_found = fetch_reviews_with_time_partition(
|
| 647 |
-
query_pattern,
|
| 648 |
-
start_date,
|
| 649 |
-
end_date,
|
| 650 |
-
headers,
|
| 651 |
-
prs_by_url,
|
| 652 |
-
debug_limit_per_pattern
|
| 653 |
-
)
|
| 654 |
-
|
| 655 |
-
pattern_duration = time.time() - pattern_start_time
|
| 656 |
-
new_reviews = len(prs_by_url) - initial_count
|
| 657 |
-
|
| 658 |
-
print(f" β Pattern complete: {new_reviews} new PRs found ({reviews_found} total fetched, {len(prs_by_url) - initial_count - (reviews_found - new_reviews)} duplicates)")
|
| 659 |
-
print(f" β±οΈ Time taken: {pattern_duration:.1f} seconds")
|
| 660 |
-
|
| 661 |
-
# Delay between different query patterns (shorter in debug mode)
|
| 662 |
-
time.sleep(0.2 if DEBUG_MODE else 1.0)
|
| 663 |
-
|
| 664 |
-
# Convert to lightweight metadata
|
| 665 |
-
all_prs = list(prs_by_url.values())
|
| 666 |
-
|
| 667 |
-
# Filter out PRs from excluded dates if specified
|
| 668 |
-
if exclude_dates:
|
| 669 |
-
filtered_prs = []
|
| 670 |
-
excluded_count = 0
|
| 671 |
-
for pr in all_prs:
|
| 672 |
-
created_at = pr.get('created_at')
|
| 673 |
-
if created_at:
|
| 674 |
-
try:
|
| 675 |
-
dt = datetime.fromisoformat(created_at.replace('Z', '+00:00'))
|
| 676 |
-
pr_date = dt.date()
|
| 677 |
-
if pr_date not in exclude_dates:
|
| 678 |
-
filtered_prs.append(pr)
|
| 679 |
-
else:
|
| 680 |
-
excluded_count += 1
|
| 681 |
-
except Exception:
|
| 682 |
-
filtered_prs.append(pr) # Keep PRs with unparseable dates
|
| 683 |
-
else:
|
| 684 |
-
filtered_prs.append(pr) # Keep PRs without created_at
|
| 685 |
-
|
| 686 |
-
if excluded_count > 0:
|
| 687 |
-
print(f" βοΈ Skipped {excluded_count} PRs from already-mined dates")
|
| 688 |
-
all_prs = filtered_prs
|
| 689 |
-
|
| 690 |
-
if DEBUG_MODE:
|
| 691 |
-
print(f"\nβ
COMPLETE (DEBUG MODE): Found {len(all_prs)} unique PRs reviewed by {identifier}")
|
| 692 |
-
print(f" Note: In production mode, this would fetch ALL PRs")
|
| 693 |
-
else:
|
| 694 |
-
print(f"\nβ
COMPLETE: Found {len(all_prs)} unique PRs reviewed by {identifier}")
|
| 695 |
-
print(f"π¦ Extracting minimal metadata and updating PR status...")
|
| 696 |
-
|
| 697 |
-
# Extract metadata for each PR review
|
| 698 |
-
metadata_list = [extract_review_metadata(pr) for pr in all_prs]
|
| 699 |
-
|
| 700 |
-
# Update PR status to get current merged/closed state
|
| 701 |
-
print(f"π Updating PR status for reviewed PRs...")
|
| 702 |
-
metadata_list = update_pr_status(metadata_list, headers, token)
|
| 703 |
-
|
| 704 |
-
# Calculate memory savings
|
| 705 |
-
import sys
|
| 706 |
-
original_size = sys.getsizeof(str(all_prs))
|
| 707 |
-
metadata_size = sys.getsizeof(str(metadata_list))
|
| 708 |
-
savings_pct = ((original_size - metadata_size) / original_size * 100) if original_size > 0 else 0
|
| 709 |
-
|
| 710 |
-
print(f"πΎ Memory efficiency: {original_size // 1024}KB β {metadata_size // 1024}KB (saved {savings_pct:.1f}%)")
|
| 711 |
-
|
| 712 |
-
return metadata_list
|
| 713 |
|
| 714 |
|
| 715 |
def calculate_review_stats_from_metadata(metadata_list):
|
|
@@ -1197,59 +1071,6 @@ def get_daily_files_last_n_months(agent_identifier, n_months=6):
|
|
| 1197 |
return []
|
| 1198 |
|
| 1199 |
|
| 1200 |
-
def get_already_mined_dates(agent_identifier, n_months=6):
|
| 1201 |
-
"""
|
| 1202 |
-
Get set of dates that have already been mined for an agent.
|
| 1203 |
-
|
| 1204 |
-
Args:
|
| 1205 |
-
agent_identifier: GitHub identifier of the agent
|
| 1206 |
-
n_months: Number of months to look back (default: 6)
|
| 1207 |
-
|
| 1208 |
-
Returns:
|
| 1209 |
-
Set of date objects (datetime.date) that already have data files
|
| 1210 |
-
"""
|
| 1211 |
-
try:
|
| 1212 |
-
api = HfApi()
|
| 1213 |
-
|
| 1214 |
-
# Calculate date range
|
| 1215 |
-
today = datetime.now(timezone.utc)
|
| 1216 |
-
n_months_ago = today - timedelta(days=30 * n_months)
|
| 1217 |
-
|
| 1218 |
-
# List all files in the repository
|
| 1219 |
-
files = api.list_repo_files(repo_id=REVIEW_METADATA_REPO, repo_type="dataset")
|
| 1220 |
-
|
| 1221 |
-
# Filter for files in this agent's folder
|
| 1222 |
-
agent_pattern = f"{agent_identifier}/"
|
| 1223 |
-
agent_files = [f for f in files if f.startswith(agent_pattern) and f.endswith('.jsonl')]
|
| 1224 |
-
|
| 1225 |
-
mined_dates = set()
|
| 1226 |
-
for filename in agent_files:
|
| 1227 |
-
try:
|
| 1228 |
-
# Extract date from filename: [agent_identifier]/YYYY.MM.DD.jsonl
|
| 1229 |
-
parts = filename.split('/')
|
| 1230 |
-
if len(parts) != 2:
|
| 1231 |
-
continue
|
| 1232 |
-
|
| 1233 |
-
date_part = parts[1].replace('.jsonl', '') # Get YYYY.MM.DD
|
| 1234 |
-
date_components = date_part.split('.')
|
| 1235 |
-
if len(date_components) != 3:
|
| 1236 |
-
continue
|
| 1237 |
-
|
| 1238 |
-
file_year, file_month, file_day = map(int, date_components)
|
| 1239 |
-
file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc).date()
|
| 1240 |
-
|
| 1241 |
-
# Only include dates within the last n_months
|
| 1242 |
-
if n_months_ago.date() <= file_date <= today.date():
|
| 1243 |
-
mined_dates.add(file_date)
|
| 1244 |
-
except Exception as e:
|
| 1245 |
-
print(f" Warning: Could not parse date from filename {filename}: {e}")
|
| 1246 |
-
continue
|
| 1247 |
-
|
| 1248 |
-
return mined_dates
|
| 1249 |
-
|
| 1250 |
-
except Exception as e:
|
| 1251 |
-
print(f" Warning: Could not get already-mined dates for {agent_identifier}: {str(e)}")
|
| 1252 |
-
return set()
|
| 1253 |
|
| 1254 |
|
| 1255 |
def fetch_review_current_status(review_url, token):
|
|
@@ -1554,98 +1375,39 @@ def save_agent_to_hf(data):
|
|
| 1554 |
|
| 1555 |
def update_all_agents_incremental():
|
| 1556 |
"""
|
| 1557 |
-
|
| 1558 |
|
| 1559 |
Strategy:
|
| 1560 |
-
1.
|
| 1561 |
-
2.
|
| 1562 |
-
3.
|
| 1563 |
-
4.
|
| 1564 |
-
5. Store minimal metadata (not full review objects) to avoid storage limits
|
| 1565 |
-
6. Construct leaderboard from ALL stored metadata (last 6 months)
|
| 1566 |
-
|
| 1567 |
-
Note: Unlike the old approach, this does NOT skip already-mined dates.
|
| 1568 |
-
This is essential to prevent stale metadata (e.g., reviews closed after initial mining).
|
| 1569 |
-
|
| 1570 |
-
Returns dictionary of all agent data with current stats.
|
| 1571 |
"""
|
| 1572 |
-
|
| 1573 |
-
|
| 1574 |
-
|
| 1575 |
-
agents = load_agents_from_hf()
|
| 1576 |
-
if not agents:
|
| 1577 |
-
print("No agents found in HuggingFace dataset")
|
| 1578 |
-
return {}
|
| 1579 |
-
|
| 1580 |
-
cache_dict = {}
|
| 1581 |
-
|
| 1582 |
-
# Update each agent
|
| 1583 |
-
for agent in agents:
|
| 1584 |
-
identifier = agent.get('github_identifier')
|
| 1585 |
-
agent_name = agent.get('agent_name', 'Unknown')
|
| 1586 |
-
|
| 1587 |
-
if not identifier:
|
| 1588 |
-
print(f"Warning: Skipping agent without identifier: {agent}")
|
| 1589 |
-
continue
|
| 1590 |
-
|
| 1591 |
-
try:
|
| 1592 |
-
print(f"\n{'='*80}")
|
| 1593 |
-
print(f"Processing: {agent_name} ({identifier})")
|
| 1594 |
-
print(f"{'='*80}")
|
| 1595 |
-
|
| 1596 |
-
# Get already-mined dates for this agent (last 6 months)
|
| 1597 |
-
already_mined_dates = get_already_mined_dates(identifier, n_months=6)
|
| 1598 |
-
|
| 1599 |
-
# Always re-mine ALL dates within 6-month window to ensure fresh data
|
| 1600 |
-
# This is critical because review metadata can become stale:
|
| 1601 |
-
# - PRs can be closed/reverted after initial mining
|
| 1602 |
-
# - Status changes need to be captured in daily files
|
| 1603 |
-
print(f"π
Re-mining ALL dates within 6-month window (including {len(already_mined_dates)} existing dates)")
|
| 1604 |
-
print(f" This ensures all review metadata is up-to-date...")
|
| 1605 |
-
|
| 1606 |
-
# Fetch ALL reviews (no exclusions) to refresh metadata
|
| 1607 |
-
new_metadata = fetch_all_reviews_metadata(
|
| 1608 |
-
identifier,
|
| 1609 |
-
agent_name,
|
| 1610 |
-
token,
|
| 1611 |
-
start_from_date=None, # Use full 6-month range
|
| 1612 |
-
exclude_dates=None # DO NOT exclude - always refresh everything
|
| 1613 |
-
)
|
| 1614 |
-
|
| 1615 |
-
if new_metadata:
|
| 1616 |
-
# Save new metadata to HuggingFace (organized by agent_identifier/YYYY.MM.DD.jsonl)
|
| 1617 |
-
print(f"πΎ Saving {len(new_metadata)} new review records...")
|
| 1618 |
-
save_review_metadata_to_hf(new_metadata, identifier)
|
| 1619 |
-
else:
|
| 1620 |
-
print(f" No new reviews to save")
|
| 1621 |
-
|
| 1622 |
-
# Load ALL metadata to calculate stats (aggregates entire last 6 months)
|
| 1623 |
-
print(f"π Calculating statistics from ALL stored metadata (last 6 months)...")
|
| 1624 |
-
all_metadata = load_review_metadata()
|
| 1625 |
|
| 1626 |
-
|
| 1627 |
-
|
|
|
|
| 1628 |
|
| 1629 |
-
|
| 1630 |
-
|
|
|
|
| 1631 |
|
| 1632 |
-
|
| 1633 |
-
|
| 1634 |
-
|
| 1635 |
-
|
| 1636 |
-
|
| 1637 |
-
|
| 1638 |
-
}
|
| 1639 |
|
| 1640 |
-
|
| 1641 |
|
| 1642 |
-
|
| 1643 |
-
|
| 1644 |
-
|
| 1645 |
-
|
| 1646 |
-
continue
|
| 1647 |
-
|
| 1648 |
-
return cache_dict
|
| 1649 |
|
| 1650 |
|
| 1651 |
def construct_leaderboard_from_metadata():
|
|
@@ -1687,57 +1449,6 @@ def construct_leaderboard_from_metadata():
|
|
| 1687 |
return cache_dict
|
| 1688 |
|
| 1689 |
|
| 1690 |
-
def initialize_data():
|
| 1691 |
-
"""
|
| 1692 |
-
Initialize data on application startup.
|
| 1693 |
-
Constructs leaderboard from review metadata.
|
| 1694 |
-
|
| 1695 |
-
In DEBUG MODE:
|
| 1696 |
-
- If no data available, automatically mine up to 10 reviews per query per agent
|
| 1697 |
-
- Does NOT save to HuggingFace datasets
|
| 1698 |
-
"""
|
| 1699 |
-
print("π Initializing leaderboard data...")
|
| 1700 |
-
|
| 1701 |
-
# Try constructing from review metadata (fast, memory-efficient)
|
| 1702 |
-
print(f"π Checking {REVIEW_METADATA_REPO} for existing data...")
|
| 1703 |
-
try:
|
| 1704 |
-
cache_dict = construct_leaderboard_from_metadata()
|
| 1705 |
-
# Check if there's actually meaningful data (at least one agent with reviews)
|
| 1706 |
-
has_data = any(entry.get('total_reviews', 0) > 0 for entry in cache_dict.values())
|
| 1707 |
-
if cache_dict and has_data:
|
| 1708 |
-
print(f"β Found existing review metadata. Leaderboard constructed from {REVIEW_METADATA_REPO}")
|
| 1709 |
-
return
|
| 1710 |
-
else:
|
| 1711 |
-
print(f" No meaningful data found in {REVIEW_METADATA_REPO}")
|
| 1712 |
-
except Exception as e:
|
| 1713 |
-
print(f" Could not construct from metadata: {e}")
|
| 1714 |
-
|
| 1715 |
-
# If in debug mode and no data available, mine immediately
|
| 1716 |
-
if DEBUG_MODE:
|
| 1717 |
-
print("\nπ DEBUG MODE: No data available, mining immediately (up to 10 reviews per query per agent)...")
|
| 1718 |
-
agents = load_agents_from_hf()
|
| 1719 |
-
if agents:
|
| 1720 |
-
print(f"β Loaded {len(agents)} agents from HuggingFace")
|
| 1721 |
-
print("βοΈ Mining GitHub data in debug mode (limited to 10 reviews per query)...")
|
| 1722 |
-
cache_dict = update_all_agents_incremental()
|
| 1723 |
-
print("β Debug mining complete (data NOT saved to HuggingFace)")
|
| 1724 |
-
return
|
| 1725 |
-
else:
|
| 1726 |
-
print("β οΈ No agents found. Waiting for first submission...")
|
| 1727 |
-
return
|
| 1728 |
-
|
| 1729 |
-
# Production mode: Fallback to full incremental mining from GitHub
|
| 1730 |
-
agents = load_agents_from_hf()
|
| 1731 |
-
if agents:
|
| 1732 |
-
print(f"β Loaded {len(agents)} agents from HuggingFace")
|
| 1733 |
-
print("βοΈ Mining GitHub data (this may take a while)...")
|
| 1734 |
-
cache_dict = update_all_agents_incremental()
|
| 1735 |
-
return
|
| 1736 |
-
|
| 1737 |
-
# No data available
|
| 1738 |
-
print("β οΈ No data sources available. Waiting for first submission...")
|
| 1739 |
-
|
| 1740 |
-
|
| 1741 |
# =============================================================================
|
| 1742 |
# UI FUNCTIONS
|
| 1743 |
# =============================================================================
|
|
@@ -1953,72 +1664,131 @@ def submit_agent(identifier, agent_name, organization, description, website):
|
|
| 1953 |
if not save_agent_to_hf(submission):
|
| 1954 |
return "β Failed to save submission", get_leaderboard_dataframe(), create_monthly_metrics_plot()
|
| 1955 |
|
| 1956 |
-
#
|
| 1957 |
-
|
| 1958 |
-
try:
|
| 1959 |
-
print(f"Fetching review metadata for {agent_name}...")
|
| 1960 |
-
|
| 1961 |
-
# Fetch lightweight metadata
|
| 1962 |
-
metadata_list = fetch_all_reviews_metadata(identifier, agent_name, token)
|
| 1963 |
-
|
| 1964 |
-
if metadata_list:
|
| 1965 |
-
# Save metadata to HuggingFace
|
| 1966 |
-
save_review_metadata_to_hf(metadata_list, identifier)
|
| 1967 |
-
|
| 1968 |
-
# Calculate stats from metadata
|
| 1969 |
-
stats = calculate_review_stats_from_metadata(metadata_list)
|
| 1970 |
-
|
| 1971 |
-
return f"β
Successfully submitted {agent_name}! Stats: {stats['total_reviews']} reviews, {stats['acceptance_rate']}% acceptance rate", get_leaderboard_dataframe(), create_monthly_metrics_plot()
|
| 1972 |
-
|
| 1973 |
-
except Exception as e:
|
| 1974 |
-
error_msg = f"β οΈ Submitted {agent_name}, but failed to fetch review data: {str(e)}"
|
| 1975 |
-
print(error_msg)
|
| 1976 |
-
import traceback
|
| 1977 |
-
traceback.print_exc()
|
| 1978 |
-
return error_msg, get_leaderboard_dataframe(), create_monthly_metrics_plot()
|
| 1979 |
|
| 1980 |
|
| 1981 |
# =============================================================================
|
| 1982 |
# BACKGROUND TASKS
|
| 1983 |
# =============================================================================
|
| 1984 |
|
| 1985 |
-
def
|
| 1986 |
"""
|
| 1987 |
-
|
| 1988 |
|
| 1989 |
Strategy:
|
| 1990 |
-
1.
|
| 1991 |
-
|
| 1992 |
-
-
|
| 1993 |
-
-
|
| 1994 |
-
-
|
| 1995 |
-
3. Updates ALL day files within LEADERBOARD_TIME_FRAME_DAYS
|
| 1996 |
-
|
| 1997 |
-
Unlike the old selective refresh approach, this guarantees no stale data.
|
| 1998 |
"""
|
| 1999 |
-
|
| 2000 |
-
|
| 2001 |
-
print(f"{'='*80}")
|
| 2002 |
|
| 2003 |
-
|
| 2004 |
-
|
| 2005 |
-
|
| 2006 |
-
print(
|
|
|
|
| 2007 |
|
| 2008 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2009 |
|
| 2010 |
-
|
| 2011 |
-
|
| 2012 |
-
|
| 2013 |
-
|
| 2014 |
-
|
| 2015 |
|
| 2016 |
-
|
|
|
|
|
|
|
| 2017 |
|
| 2018 |
-
|
| 2019 |
-
|
| 2020 |
-
|
| 2021 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2022 |
|
| 2023 |
|
| 2024 |
# =============================================================================
|
|
@@ -2047,19 +1817,17 @@ else:
|
|
| 2047 |
print(" (Explicitly set via '--no-debug' flag)")
|
| 2048 |
print()
|
| 2049 |
|
| 2050 |
-
initialize_data()
|
| 2051 |
-
|
| 2052 |
# Start APScheduler for daily updates at 12:00 AM UTC
|
| 2053 |
scheduler = BackgroundScheduler(timezone="UTC")
|
| 2054 |
scheduler.add_job(
|
| 2055 |
-
|
| 2056 |
trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
|
| 2057 |
id='daily_review_mining',
|
| 2058 |
name='Daily Regular Review Mining',
|
| 2059 |
replace_existing=True
|
| 2060 |
)
|
| 2061 |
scheduler.start()
|
| 2062 |
-
print("β Scheduler started: Daily
|
| 2063 |
|
| 2064 |
# Create Gradio interface
|
| 2065 |
with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app:
|
|
|
|
| 584 |
return metadata_list
|
| 585 |
|
| 586 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 587 |
|
| 588 |
|
| 589 |
def calculate_review_stats_from_metadata(metadata_list):
|
|
|
|
| 1071 |
return []
|
| 1072 |
|
| 1073 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1074 |
|
| 1075 |
|
| 1076 |
def fetch_review_current_status(review_url, token):
|
|
|
|
| 1375 |
|
| 1376 |
def update_all_agents_incremental():
|
| 1377 |
"""
|
| 1378 |
+
Daily scheduled task for incremental review mining and statistics update.
|
| 1379 |
|
| 1380 |
Strategy:
|
| 1381 |
+
1. Update PR status for all existing metadata (last LEADERBOARD_TIME_FRAME_DAYS - 1)
|
| 1382 |
+
2. Fetch yesterday's new reviews
|
| 1383 |
+
3. Save all updated/new metadata back to HuggingFace
|
| 1384 |
+
4. Reload statistics from updated metadata
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1385 |
"""
|
| 1386 |
+
print(f"\n{'='*80}")
|
| 1387 |
+
print(f"π Daily Incremental Update started at {datetime.now(timezone.utc).isoformat()}")
|
| 1388 |
+
print(f"{'='*80}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1389 |
|
| 1390 |
+
try:
|
| 1391 |
+
# Fetch and update reviews
|
| 1392 |
+
fetch_and_update_daily_reviews()
|
| 1393 |
|
| 1394 |
+
# Reload statistics from updated metadata
|
| 1395 |
+
print(f"\nπ Reloading statistics from updated review metadata...")
|
| 1396 |
+
construct_leaderboard_from_metadata()
|
| 1397 |
|
| 1398 |
+
print(f"\n{'='*80}")
|
| 1399 |
+
print(f"π Update Summary:")
|
| 1400 |
+
print(f" β Updated existing review statuses")
|
| 1401 |
+
print(f" β Fetched yesterday's new reviews")
|
| 1402 |
+
print(f" β Statistics reloaded")
|
| 1403 |
+
print(f"{'='*80}")
|
|
|
|
| 1404 |
|
| 1405 |
+
print(f"\nβ
Daily Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
|
| 1406 |
|
| 1407 |
+
except Exception as e:
|
| 1408 |
+
print(f"β Daily update failed: {str(e)}")
|
| 1409 |
+
import traceback
|
| 1410 |
+
traceback.print_exc()
|
|
|
|
|
|
|
|
|
|
| 1411 |
|
| 1412 |
|
| 1413 |
def construct_leaderboard_from_metadata():
|
|
|
|
| 1449 |
return cache_dict
|
| 1450 |
|
| 1451 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1452 |
# =============================================================================
|
| 1453 |
# UI FUNCTIONS
|
| 1454 |
# =============================================================================
|
|
|
|
| 1664 |
if not save_agent_to_hf(submission):
|
| 1665 |
return "β Failed to save submission", get_leaderboard_dataframe(), create_monthly_metrics_plot()
|
| 1666 |
|
| 1667 |
+
# Return success message - data will be populated by daily incremental updates
|
| 1668 |
+
return f"β
Successfully submitted {agent_name}! Review data will be populated by the next daily incremental update.", get_leaderboard_dataframe(), create_monthly_metrics_plot()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1669 |
|
| 1670 |
|
| 1671 |
# =============================================================================
|
| 1672 |
# BACKGROUND TASKS
|
| 1673 |
# =============================================================================
|
| 1674 |
|
| 1675 |
+
def fetch_and_update_daily_reviews():
|
| 1676 |
"""
|
| 1677 |
+
Fetch and update reviews with comprehensive status checking.
|
| 1678 |
|
| 1679 |
Strategy:
|
| 1680 |
+
1. For each agent:
|
| 1681 |
+
- Examine ALL open reviews from last LEADERBOARD_TIME_FRAME_DAYS - 1 for their closed_at status
|
| 1682 |
+
- Update PR status for all existing metadata (last LEADERBOARD_TIME_FRAME_DAYS - 1)
|
| 1683 |
+
- Fetch new reviews from yesterday 12am to today 12am
|
| 1684 |
+
- Save all updated/new metadata back to HuggingFace
|
|
|
|
|
|
|
|
|
|
| 1685 |
"""
|
| 1686 |
+
token = get_github_token()
|
| 1687 |
+
headers = {'Authorization': f'token {token}'} if token else {}
|
|
|
|
| 1688 |
|
| 1689 |
+
# Load all agents
|
| 1690 |
+
agents = load_agents_from_hf()
|
| 1691 |
+
if not agents:
|
| 1692 |
+
print("No agents found in HuggingFace dataset")
|
| 1693 |
+
return
|
| 1694 |
|
| 1695 |
+
# Calculate date range
|
| 1696 |
+
today_utc = datetime.now(timezone.utc)
|
| 1697 |
+
today_midnight = datetime.combine(today_utc.date(), datetime.min.time(), tzinfo=timezone.utc)
|
| 1698 |
+
yesterday_midnight = today_midnight - timedelta(days=1)
|
| 1699 |
+
cutoff_date = today_midnight - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS - 1)
|
| 1700 |
|
| 1701 |
+
print(f"π
Time Range Configuration:")
|
| 1702 |
+
print(f" Yesterday 12am UTC: {yesterday_midnight.isoformat()}")
|
| 1703 |
+
print(f" Today 12am UTC: {today_midnight.isoformat()}")
|
| 1704 |
+
print(f" Cutoff for existing reviews: {cutoff_date.isoformat()}")
|
| 1705 |
+
print(f" Examining reviews from: {cutoff_date.date()} to {today_midnight.date()}")
|
| 1706 |
|
| 1707 |
+
for agent in agents:
|
| 1708 |
+
identifier = agent.get('github_identifier')
|
| 1709 |
+
agent_name = agent.get('agent_name', 'Unknown')
|
| 1710 |
|
| 1711 |
+
if not identifier:
|
| 1712 |
+
print(f"Warning: Skipping agent without identifier: {agent}")
|
| 1713 |
+
continue
|
| 1714 |
+
|
| 1715 |
+
try:
|
| 1716 |
+
print(f"\n{'='*60}")
|
| 1717 |
+
print(f"Processing: {agent_name} ({identifier})")
|
| 1718 |
+
print(f"{'='*60}")
|
| 1719 |
+
|
| 1720 |
+
# Step 1: Load all existing metadata within timeframe
|
| 1721 |
+
print(f"π Loading existing metadata from last {LEADERBOARD_TIME_FRAME_DAYS - 1} days...")
|
| 1722 |
+
all_metadata = load_review_metadata()
|
| 1723 |
+
agent_metadata = [r for r in all_metadata if r.get("agent_identifier") == identifier]
|
| 1724 |
+
|
| 1725 |
+
# Filter to last LEADERBOARD_TIME_FRAME_DAYS - 1 days (from cutoff to today)
|
| 1726 |
+
recent_metadata = []
|
| 1727 |
+
for review in agent_metadata:
|
| 1728 |
+
reviewed_at = review.get('reviewed_at', '')
|
| 1729 |
+
if reviewed_at:
|
| 1730 |
+
try:
|
| 1731 |
+
review_date = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
|
| 1732 |
+
if cutoff_date <= review_date < today_midnight:
|
| 1733 |
+
recent_metadata.append(review)
|
| 1734 |
+
except Exception as e:
|
| 1735 |
+
print(f" Warning: Could not parse date '{reviewed_at}': {e}")
|
| 1736 |
+
continue
|
| 1737 |
+
|
| 1738 |
+
print(f" β Loaded {len(recent_metadata)} existing reviews from timeframe")
|
| 1739 |
+
|
| 1740 |
+
# Step 2: Examine ALL open reviews for their closed_at status
|
| 1741 |
+
# This ensures we capture any reviews that may have been closed/merged since last check
|
| 1742 |
+
if recent_metadata:
|
| 1743 |
+
print(f"π Examining {len(recent_metadata)} open reviews for status updates (checking closed_at)...")
|
| 1744 |
+
recent_metadata = update_pr_status(recent_metadata, headers, token)
|
| 1745 |
+
print(f" β Updated PR status for existing reviews")
|
| 1746 |
+
|
| 1747 |
+
# Step 3: Fetch NEW reviews from yesterday 12am to today 12am
|
| 1748 |
+
print(f"π Fetching new reviews from {yesterday_midnight.isoformat()} to {today_midnight.isoformat()}...")
|
| 1749 |
+
|
| 1750 |
+
base_query = f'is:pr review:approved author:{identifier} -is:draft'
|
| 1751 |
+
prs_by_url = {}
|
| 1752 |
+
|
| 1753 |
+
fetch_reviews_with_time_partition(
|
| 1754 |
+
base_query,
|
| 1755 |
+
yesterday_midnight,
|
| 1756 |
+
today_midnight,
|
| 1757 |
+
headers,
|
| 1758 |
+
prs_by_url,
|
| 1759 |
+
debug_limit=None
|
| 1760 |
+
)
|
| 1761 |
+
|
| 1762 |
+
# Extract metadata for new reviews
|
| 1763 |
+
yesterday_metadata = []
|
| 1764 |
+
for pr_url, pr in prs_by_url.items():
|
| 1765 |
+
metadata = extract_review_metadata(pr)
|
| 1766 |
+
if metadata:
|
| 1767 |
+
metadata['agent_identifier'] = identifier
|
| 1768 |
+
yesterday_metadata.append(metadata)
|
| 1769 |
+
|
| 1770 |
+
print(f" β Found {len(yesterday_metadata)} new reviews in 24-hour window")
|
| 1771 |
+
|
| 1772 |
+
# Step 4: Update PR status for new reviews
|
| 1773 |
+
if yesterday_metadata:
|
| 1774 |
+
print(f" Updating PR status for {len(yesterday_metadata)} new reviews...")
|
| 1775 |
+
yesterday_metadata = update_pr_status(yesterday_metadata, headers, token)
|
| 1776 |
+
|
| 1777 |
+
# Step 5: Combine and save all metadata
|
| 1778 |
+
all_updated_metadata = recent_metadata + yesterday_metadata
|
| 1779 |
+
|
| 1780 |
+
if all_updated_metadata:
|
| 1781 |
+
print(f"πΎ Saving {len(all_updated_metadata)} total reviews to HuggingFace...")
|
| 1782 |
+
save_review_metadata_to_hf(all_updated_metadata, identifier)
|
| 1783 |
+
print(f"β Updated {identifier}: {len(recent_metadata)} existing (status checked) + {len(yesterday_metadata)} new = {len(all_updated_metadata)} total")
|
| 1784 |
+
else:
|
| 1785 |
+
print(f" No reviews to save for {identifier}")
|
| 1786 |
+
|
| 1787 |
+
except Exception as e:
|
| 1788 |
+
print(f"β Error processing {identifier}: {str(e)}")
|
| 1789 |
+
import traceback
|
| 1790 |
+
traceback.print_exc()
|
| 1791 |
+
continue
|
| 1792 |
|
| 1793 |
|
| 1794 |
# =============================================================================
|
|
|
|
| 1817 |
print(" (Explicitly set via '--no-debug' flag)")
|
| 1818 |
print()
|
| 1819 |
|
|
|
|
|
|
|
| 1820 |
# Start APScheduler for daily updates at 12:00 AM UTC
|
| 1821 |
scheduler = BackgroundScheduler(timezone="UTC")
|
| 1822 |
scheduler.add_job(
|
| 1823 |
+
update_all_agents_incremental,
|
| 1824 |
trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
|
| 1825 |
id='daily_review_mining',
|
| 1826 |
name='Daily Regular Review Mining',
|
| 1827 |
replace_existing=True
|
| 1828 |
)
|
| 1829 |
scheduler.start()
|
| 1830 |
+
print("β Scheduler started: Daily Incremental Update at 12:00 AM UTC (updates existing metadata + mines yesterday's reviews)")
|
| 1831 |
|
| 1832 |
# Create Gradio interface
|
| 1833 |
with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app:
|