|
|
import gradio as gr |
|
|
from gradio_leaderboard import Leaderboard, ColumnFilter |
|
|
import json |
|
|
import os |
|
|
import time |
|
|
import tempfile |
|
|
import requests |
|
|
from datetime import datetime, timezone, timedelta |
|
|
from collections import defaultdict |
|
|
from huggingface_hub import HfApi, hf_hub_download |
|
|
from huggingface_hub.errors import HfHubHTTPError |
|
|
from dotenv import load_dotenv |
|
|
import pandas as pd |
|
|
import random |
|
|
import plotly.graph_objects as go |
|
|
from plotly.subplots import make_subplots |
|
|
from apscheduler.schedulers.background import BackgroundScheduler |
|
|
from apscheduler.triggers.cron import CronTrigger |
|
|
from google.cloud import bigquery |
|
|
import backoff |
|
|
|
|
|
|
|
|
load_dotenv() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
AGENTS_REPO = "SWE-Arena/bot_metadata" |
|
|
ISSUE_METADATA_REPO = "SWE-Arena/issue_metadata" |
|
|
LEADERBOARD_REPO = "SWE-Arena/leaderboard_metadata" |
|
|
LEADERBOARD_TIME_FRAME_DAYS = 180 |
|
|
UPDATE_TIME_FRAME_DAYS = 30 |
|
|
|
|
|
LEADERBOARD_COLUMNS = [ |
|
|
("Agent Name", "string"), |
|
|
("Website", "string"), |
|
|
("Total Issues", "number"), |
|
|
("Resolved Issues", "number"), |
|
|
("Resolved Rate (%)", "number"), |
|
|
] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def is_rate_limit_error(e): |
|
|
"""Check if the exception is a rate limit error (429).""" |
|
|
return isinstance(e, HfHubHTTPError) and e.response.status_code == 429 |
|
|
|
|
|
|
|
|
@backoff.on_exception( |
|
|
backoff.expo, |
|
|
HfHubHTTPError, |
|
|
giveup=lambda e: not is_rate_limit_error(e), |
|
|
max_tries=8, |
|
|
base=300, |
|
|
max_value=3600, |
|
|
jitter=backoff.full_jitter, |
|
|
on_backoff=lambda details: print(f" ⏳ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/{8}...") |
|
|
) |
|
|
def list_repo_files_with_backoff(api, **kwargs): |
|
|
"""List repo files with exponential backoff on rate limit errors.""" |
|
|
return api.list_repo_files(**kwargs) |
|
|
|
|
|
@backoff.on_exception( |
|
|
backoff.expo, |
|
|
HfHubHTTPError, |
|
|
giveup=lambda e: not is_rate_limit_error(e), |
|
|
max_tries=8, |
|
|
base=300, |
|
|
max_value=3600, |
|
|
jitter=backoff.full_jitter, |
|
|
on_backoff=lambda details: print(f" ⏳ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/{8}...") |
|
|
) |
|
|
def hf_hub_download_with_backoff(**kwargs): |
|
|
"""Download from HF Hub with exponential backoff on rate limit errors.""" |
|
|
return hf_hub_download(**kwargs) |
|
|
|
|
|
@backoff.on_exception( |
|
|
backoff.expo, |
|
|
HfHubHTTPError, |
|
|
giveup=lambda e: not is_rate_limit_error(e), |
|
|
max_tries=8, |
|
|
base=300, |
|
|
max_value=3600, |
|
|
jitter=backoff.full_jitter, |
|
|
on_backoff=lambda details: print(f" ⏳ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/{8}...") |
|
|
) |
|
|
def upload_file_with_backoff(api, **kwargs): |
|
|
"""Upload file with exponential backoff on rate limit errors.""" |
|
|
return api.upload_file(**kwargs) |
|
|
|
|
|
@backoff.on_exception( |
|
|
backoff.expo, |
|
|
HfHubHTTPError, |
|
|
giveup=lambda e: not is_rate_limit_error(e), |
|
|
max_tries=8, |
|
|
base=300, |
|
|
max_value=3600, |
|
|
jitter=backoff.full_jitter, |
|
|
on_backoff=lambda details: print(f" ⏳ Rate limited. Retrying in {details['wait']/60:.1f} minutes ({details['wait']:.0f}s) - attempt {details['tries']}/{8}...") |
|
|
) |
|
|
def upload_folder_with_backoff(api, **kwargs): |
|
|
"""Upload folder with exponential backoff on rate limit errors.""" |
|
|
return api.upload_folder(**kwargs) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_jsonl(filename): |
|
|
"""Load JSONL file and return list of dictionaries.""" |
|
|
if not os.path.exists(filename): |
|
|
return [] |
|
|
|
|
|
data = [] |
|
|
with open(filename, 'r', encoding='utf-8') as f: |
|
|
for line in f: |
|
|
line = line.strip() |
|
|
if line: |
|
|
try: |
|
|
entry = json.loads(line) |
|
|
data.append(entry) |
|
|
except json.JSONDecodeError as e: |
|
|
print(f"Warning: Skipping invalid JSON line: {e}") |
|
|
return data |
|
|
|
|
|
|
|
|
def save_jsonl(filename, data): |
|
|
"""Save list of dictionaries to JSONL file.""" |
|
|
with open(filename, 'w', encoding='utf-8') as f: |
|
|
for item in data: |
|
|
f.write(json.dumps(item) + '\n') |
|
|
|
|
|
|
|
|
def cache_to_dict(cache_list): |
|
|
"""Convert list of cache entries to dictionary by identifier.""" |
|
|
return {entry['github_identifier']: entry for entry in cache_list} |
|
|
|
|
|
|
|
|
def dict_to_cache(cache_dict): |
|
|
"""Convert dictionary back to list of values.""" |
|
|
return list(cache_dict.values()) |
|
|
|
|
|
|
|
|
def normalize_date_format(date_string): |
|
|
""" |
|
|
Convert date strings to standardized ISO 8601 format with Z suffix. |
|
|
Handles both old format (2025-10-15T23:23:47.983068) and new format (2025-10-15T23:23:47Z). |
|
|
Also handles space separator (2025-06-23 07:18:28) and incomplete timezone offsets (+00). |
|
|
""" |
|
|
if not date_string or date_string == 'N/A': |
|
|
return 'N/A' |
|
|
|
|
|
try: |
|
|
|
|
|
date_string = date_string.replace(' ', 'T') |
|
|
|
|
|
|
|
|
if date_string[-3:-2] in ('+', '-') and ':' not in date_string[-3:]: |
|
|
date_string = date_string + ':00' |
|
|
|
|
|
|
|
|
dt = datetime.fromisoformat(date_string.replace('Z', '+00:00')) |
|
|
|
|
|
|
|
|
return dt.strftime('%Y-%m-%dT%H:%M:%SZ') |
|
|
except Exception as e: |
|
|
print(f"Warning: Could not parse date '{date_string}': {e}") |
|
|
return date_string |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_bigquery_client(): |
|
|
""" |
|
|
Initialize BigQuery client using credentials from environment variable. |
|
|
|
|
|
Expects GOOGLE_APPLICATION_CREDENTIALS_JSON environment variable containing |
|
|
the service account JSON credentials as a string. |
|
|
""" |
|
|
|
|
|
creds_json = os.environ.get('GOOGLE_APPLICATION_CREDENTIALS_JSON') |
|
|
|
|
|
if creds_json: |
|
|
|
|
|
with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.json') as temp_file: |
|
|
temp_file.write(creds_json) |
|
|
temp_path = temp_file.name |
|
|
|
|
|
|
|
|
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = temp_path |
|
|
|
|
|
|
|
|
client = bigquery.Client() |
|
|
|
|
|
|
|
|
os.unlink(temp_path) |
|
|
|
|
|
return client |
|
|
else: |
|
|
raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment") |
|
|
|
|
|
|
|
|
def generate_table_union_statements(start_date, end_date): |
|
|
""" |
|
|
Generate UNION ALL statements for githubarchive.month tables in date range. |
|
|
|
|
|
Args: |
|
|
start_date: Start datetime |
|
|
end_date: End datetime |
|
|
|
|
|
Returns: |
|
|
String with UNION ALL SELECT statements for all monthly tables in range |
|
|
""" |
|
|
table_names = [] |
|
|
|
|
|
|
|
|
current_date = start_date.replace(day=1) |
|
|
end_month = end_date.replace(day=1) |
|
|
|
|
|
while current_date <= end_month: |
|
|
table_name = f"`githubarchive.month.{current_date.strftime('%Y%m')}`" |
|
|
table_names.append(table_name) |
|
|
|
|
|
|
|
|
if current_date.month == 12: |
|
|
current_date = current_date.replace(year=current_date.year + 1, month=1) |
|
|
else: |
|
|
current_date = current_date.replace(month=current_date.month + 1) |
|
|
|
|
|
|
|
|
union_parts = [f"SELECT * FROM {table}" for table in table_names] |
|
|
return " UNION ALL ".join(union_parts) |
|
|
|
|
|
|
|
|
def fetch_issue_metadata_batched(client, identifiers, start_date, end_date, batch_size=100, upload_immediately=True): |
|
|
""" |
|
|
Fetch issue metadata for ALL agents using BATCHED BigQuery queries. |
|
|
|
|
|
Splits agents into smaller batches to avoid performance issues with large UNNEST arrays |
|
|
and correlated subqueries. Each batch query runs much faster than one massive query. |
|
|
|
|
|
Args: |
|
|
client: BigQuery client instance |
|
|
identifiers: List of GitHub usernames/bot identifiers |
|
|
start_date: Start datetime (timezone-aware) |
|
|
end_date: End datetime (timezone-aware) |
|
|
batch_size: Number of agents per batch (default: 100) |
|
|
upload_immediately: Upload results to HuggingFace immediately after each batch (default: True) |
|
|
|
|
|
Returns: |
|
|
Dictionary mapping agent identifier to list of issue metadata |
|
|
""" |
|
|
print(f"\n🔍 Querying BigQuery for {len(identifiers)} agents using BATCHED approach") |
|
|
print(f" Batch size: {batch_size} agents per query") |
|
|
print(f" Upload mode: {'Immediate (after each batch)' if upload_immediately else 'Deferred (after all batches)'}") |
|
|
|
|
|
|
|
|
batches = [identifiers[i:i + batch_size] for i in range(0, len(identifiers), batch_size)] |
|
|
print(f" Total batches: {len(batches)}") |
|
|
|
|
|
|
|
|
all_metadata = {} |
|
|
|
|
|
for batch_num, batch_identifiers in enumerate(batches, 1): |
|
|
print(f"\n{'─'*80}") |
|
|
print(f"📦 Processing Batch {batch_num}/{len(batches)} ({len(batch_identifiers)} agents)") |
|
|
print(f"{'─'*80}") |
|
|
|
|
|
try: |
|
|
batch_results = fetch_all_issue_metadata_single_query( |
|
|
client, batch_identifiers, start_date, end_date |
|
|
) |
|
|
|
|
|
|
|
|
for identifier, metadata_list in batch_results.items(): |
|
|
if identifier in all_metadata: |
|
|
all_metadata[identifier].extend(metadata_list) |
|
|
else: |
|
|
all_metadata[identifier] = metadata_list |
|
|
|
|
|
print(f" ✓ Batch {batch_num} completed: {len(batch_results)} agents with data") |
|
|
|
|
|
|
|
|
if upload_immediately and batch_results: |
|
|
print(f"\n 🤗 Uploading batch {batch_num}/{len(batches)} results to HuggingFace...") |
|
|
upload_success = 0 |
|
|
upload_errors = 0 |
|
|
|
|
|
for identifier, metadata_list in batch_results.items(): |
|
|
if metadata_list: |
|
|
if save_issue_metadata_to_hf(metadata_list, identifier): |
|
|
upload_success += 1 |
|
|
else: |
|
|
upload_errors += 1 |
|
|
|
|
|
print(f" ✓ Batch {batch_num}/{len(batches)} upload complete ({upload_success} agents uploaded, {upload_errors} errors)") |
|
|
|
|
|
except Exception as e: |
|
|
print(f" ✗ Batch {batch_num} failed: {str(e)}") |
|
|
print(f" Continuing with remaining batches...") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
continue |
|
|
|
|
|
print(f"\n{'='*80}") |
|
|
print(f"✅ All batches completed!") |
|
|
print(f" Total agents with data: {len(all_metadata)}") |
|
|
total_issues = sum(len(issues) for issues in all_metadata.values()) |
|
|
print(f" Total issues found: {total_issues}") |
|
|
print(f"{'='*80}\n") |
|
|
|
|
|
return all_metadata |
|
|
|
|
|
|
|
|
def fetch_all_issue_metadata_single_query(client, identifiers, start_date, end_date): |
|
|
""" |
|
|
Fetch issue metadata for a batch of agents using ONE comprehensive BigQuery query. |
|
|
|
|
|
This query fetches IssuesEvent and IssueCommentEvent from GitHub Archive and |
|
|
deduplicates to get the latest state of each issue. Filters by issue author, |
|
|
commenter, or assignee. |
|
|
|
|
|
NOTE: This function is designed for smaller batches (~100 agents). For large |
|
|
numbers of agents, use fetch_issue_metadata_batched() instead. |
|
|
|
|
|
Args: |
|
|
client: BigQuery client instance |
|
|
identifiers: List of GitHub usernames/bot identifiers (recommended: <100) |
|
|
start_date: Start datetime (timezone-aware) |
|
|
end_date: End datetime (timezone-aware) |
|
|
|
|
|
Returns: |
|
|
Dictionary mapping agent identifier to list of issue metadata: |
|
|
{ |
|
|
'agent-identifier': [ |
|
|
{ |
|
|
'url': Issue URL, |
|
|
'created_at': Issue creation timestamp, |
|
|
'closed_at': Close timestamp (if closed, else None), |
|
|
'state_reason': Reason for closure (completed/not_planned/etc.) |
|
|
}, |
|
|
... |
|
|
], |
|
|
... |
|
|
} |
|
|
""" |
|
|
print(f"\n🔍 Querying BigQuery for {len(identifiers)} agents in SINGLE QUERY") |
|
|
print(f" Time range: {start_date.strftime('%Y-%m-%d')} to {end_date.strftime('%Y-%m-%d')}") |
|
|
|
|
|
|
|
|
issue_tables = generate_table_union_statements(start_date, end_date) |
|
|
|
|
|
|
|
|
identifier_set = set() |
|
|
for id in identifiers: |
|
|
identifier_set.add(id) |
|
|
|
|
|
stripped = id.replace('[bot]', '') |
|
|
if stripped != id: |
|
|
identifier_set.add(stripped) |
|
|
|
|
|
|
|
|
identifier_array = '[' + ', '.join([f'"{id}"' for id in identifier_set]) + ']' |
|
|
|
|
|
print(f" Total identifiers (including bot/non-bot variants): {len(identifier_set)}") |
|
|
|
|
|
|
|
|
query = f""" |
|
|
WITH agent_identifiers AS ( |
|
|
-- Create a table of all agent identifiers using UNNEST |
|
|
-- This avoids hitting BigQuery's 256KB query size limit with large IN clauses |
|
|
SELECT identifier |
|
|
FROM UNNEST({identifier_array}) AS identifier |
|
|
), |
|
|
|
|
|
issue_events AS ( |
|
|
-- Get all issue events and comment events for ALL agents |
|
|
SELECT |
|
|
JSON_EXTRACT_SCALAR(payload, '$.issue.html_url') as url, |
|
|
JSON_EXTRACT_SCALAR(payload, '$.issue.created_at') as created_at, |
|
|
JSON_EXTRACT_SCALAR(payload, '$.issue.closed_at') as closed_at, |
|
|
JSON_EXTRACT_SCALAR(payload, '$.issue.state_reason') as state_reason, |
|
|
JSON_EXTRACT_SCALAR(payload, '$.issue.user.login') as author, |
|
|
JSON_EXTRACT_SCALAR(payload, '$.issue.assignee.login') as assignee, |
|
|
JSON_EXTRACT_SCALAR(payload, '$.comment.user.login') as commenter, |
|
|
JSON_EXTRACT_SCALAR(payload, '$.issue.number') as issue_number, |
|
|
repo.name as repo_name, |
|
|
created_at as event_time |
|
|
FROM ( |
|
|
{issue_tables} |
|
|
) |
|
|
WHERE |
|
|
type IN ('IssuesEvent', 'IssueCommentEvent') |
|
|
-- Exclude pull requests (they have pull_request field) |
|
|
AND JSON_EXTRACT(payload, '$.issue.pull_request') IS NULL |
|
|
AND JSON_EXTRACT_SCALAR(payload, '$.issue.html_url') IS NOT NULL |
|
|
-- Filter by author OR commenter OR assignee |
|
|
AND ( |
|
|
JSON_EXTRACT_SCALAR(payload, '$.issue.user.login') IN (SELECT identifier FROM agent_identifiers) |
|
|
OR JSON_EXTRACT_SCALAR(payload, '$.comment.user.login') IN (SELECT identifier FROM agent_identifiers) |
|
|
OR JSON_EXTRACT_SCALAR(payload, '$.issue.assignee.login') IN (SELECT identifier FROM agent_identifiers) |
|
|
) |
|
|
), |
|
|
|
|
|
latest_states AS ( |
|
|
-- Deduplicate to get latest state for each issue |
|
|
SELECT |
|
|
url, |
|
|
created_at, |
|
|
closed_at, |
|
|
state_reason, |
|
|
author, |
|
|
assignee, |
|
|
commenter |
|
|
FROM issue_events |
|
|
QUALIFY ROW_NUMBER() OVER ( |
|
|
PARTITION BY repo_name, issue_number |
|
|
ORDER BY event_time DESC |
|
|
) = 1 |
|
|
), |
|
|
|
|
|
agent_issues AS ( |
|
|
-- Map each issue to its relevant agent(s) |
|
|
SELECT DISTINCT |
|
|
CASE |
|
|
WHEN author IN (SELECT identifier FROM agent_identifiers) THEN author |
|
|
WHEN commenter IN (SELECT identifier FROM agent_identifiers) THEN commenter |
|
|
WHEN assignee IN (SELECT identifier FROM agent_identifiers) THEN assignee |
|
|
ELSE NULL |
|
|
END as agent_identifier, |
|
|
url, |
|
|
created_at, |
|
|
closed_at, |
|
|
state_reason |
|
|
FROM latest_states |
|
|
WHERE |
|
|
author IN (SELECT identifier FROM agent_identifiers) |
|
|
OR commenter IN (SELECT identifier FROM agent_identifiers) |
|
|
OR assignee IN (SELECT identifier FROM agent_identifiers) |
|
|
) |
|
|
|
|
|
SELECT |
|
|
agent_identifier, |
|
|
url, |
|
|
created_at, |
|
|
closed_at, |
|
|
state_reason |
|
|
FROM agent_issues |
|
|
WHERE agent_identifier IS NOT NULL |
|
|
ORDER BY agent_identifier, created_at DESC |
|
|
""" |
|
|
|
|
|
|
|
|
query_days = (end_date - start_date).days |
|
|
|
|
|
print(f" Querying {query_days} days for issue and comment events...") |
|
|
print(f" Agents: {', '.join(identifiers[:5])}{'...' if len(identifiers) > 5 else ''}") |
|
|
|
|
|
try: |
|
|
query_job = client.query(query) |
|
|
results = list(query_job.result()) |
|
|
|
|
|
print(f" ✓ Found {len(results)} total issue records across all agents") |
|
|
|
|
|
|
|
|
metadata_by_agent = defaultdict(list) |
|
|
|
|
|
for row in results: |
|
|
agent_id = row.agent_identifier |
|
|
|
|
|
|
|
|
created_at = row.created_at |
|
|
if hasattr(created_at, 'isoformat'): |
|
|
created_at = created_at.isoformat() |
|
|
|
|
|
closed_at = row.closed_at |
|
|
if hasattr(closed_at, 'isoformat'): |
|
|
closed_at = closed_at.isoformat() |
|
|
|
|
|
metadata_by_agent[agent_id].append({ |
|
|
'url': row.url, |
|
|
'created_at': created_at, |
|
|
'closed_at': closed_at, |
|
|
'state_reason': row.state_reason, |
|
|
}) |
|
|
|
|
|
|
|
|
print(f"\n 📊 Results breakdown by agent:") |
|
|
for identifier in identifiers: |
|
|
|
|
|
count = len(metadata_by_agent.get(identifier, [])) |
|
|
stripped = identifier.replace('[bot]', '') |
|
|
if stripped != identifier: |
|
|
count += len(metadata_by_agent.get(stripped, [])) |
|
|
|
|
|
if count > 0: |
|
|
|
|
|
all_metadata = metadata_by_agent.get(identifier, []) + metadata_by_agent.get(stripped, []) |
|
|
completed_count = sum(1 for m in all_metadata if m['state_reason'] == 'completed') |
|
|
closed_count = sum(1 for m in all_metadata if m['closed_at'] is not None) |
|
|
open_count = count - closed_count |
|
|
print(f" {identifier}: {count} issues ({completed_count} completed, {closed_count} closed, {open_count} open)") |
|
|
|
|
|
|
|
|
final_metadata = {} |
|
|
for identifier in identifiers: |
|
|
combined = metadata_by_agent.get(identifier, []) |
|
|
stripped = identifier.replace('[bot]', '') |
|
|
if stripped != identifier and stripped in metadata_by_agent: |
|
|
combined.extend(metadata_by_agent[stripped]) |
|
|
|
|
|
if combined: |
|
|
final_metadata[identifier] = combined |
|
|
|
|
|
return final_metadata |
|
|
|
|
|
except Exception as e: |
|
|
print(f" ✗ BigQuery error: {str(e)}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
return {} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_github_token(): |
|
|
"""Get GitHub token from environment variables for validation purposes.""" |
|
|
token = os.getenv('GITHUB_TOKEN') |
|
|
if not token: |
|
|
print("Warning: GITHUB_TOKEN not found for validation") |
|
|
return token |
|
|
|
|
|
|
|
|
def validate_github_username(identifier): |
|
|
"""Verify that a GitHub identifier exists (simple validation for submission).""" |
|
|
try: |
|
|
token = get_github_token() |
|
|
headers = {'Authorization': f'token {token}'} if token else {} |
|
|
url = f'https://api.github.com/users/{identifier}' |
|
|
response = requests.get(url, headers=headers, timeout=10) |
|
|
|
|
|
if response.status_code == 200: |
|
|
return True, "Username is valid" |
|
|
elif response.status_code == 404: |
|
|
return False, "GitHub identifier not found" |
|
|
else: |
|
|
return False, f"Validation error: HTTP {response.status_code}" |
|
|
except Exception as e: |
|
|
return False, f"Validation error: {str(e)}" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def extract_issue_metadata(issue): |
|
|
""" |
|
|
Extract minimal issue metadata for efficient storage. |
|
|
Only keeps essential fields: url, created_at, closed_at, state_reason. |
|
|
Note: agent_name is not stored as it's inferred from the folder structure. |
|
|
|
|
|
Issue states: |
|
|
- state: "open" or "closed" |
|
|
- state_reason: "completed" (resolved), "not_planned" (closed as not planned), or None (still open) |
|
|
""" |
|
|
|
|
|
created_at = issue.get('created_at') |
|
|
closed_at = issue.get('closed_at') |
|
|
state = issue.get('state') |
|
|
state_reason = issue.get('state_reason') |
|
|
|
|
|
return { |
|
|
'url': issue.get('url'), |
|
|
'created_at': created_at, |
|
|
'closed_at': closed_at, |
|
|
'state': state, |
|
|
'state_reason': state_reason |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def calculate_issue_stats_from_metadata(metadata_list): |
|
|
""" |
|
|
Calculate statistics from a list of issue metadata (lightweight objects). |
|
|
Works with minimal metadata: url, created_at, closed_at, state, state_reason. |
|
|
|
|
|
Returns a dictionary with comprehensive issue metrics. |
|
|
|
|
|
Resolved Rate is calculated as: |
|
|
completed issues / closed issues * 100 |
|
|
|
|
|
Completed Issues = issues closed as completed (state_reason="completed") |
|
|
Closed Issues = all issues that have been closed (closed_at is not None) |
|
|
We do NOT count issues closed as not planned (state_reason="not_planned") as resolved, |
|
|
but they ARE counted in the denominator as closed issues. |
|
|
""" |
|
|
total_issues = len(metadata_list) |
|
|
|
|
|
|
|
|
closed_issues = sum(1 for issue_meta in metadata_list |
|
|
if issue_meta.get('closed_at') is not None) |
|
|
|
|
|
|
|
|
completed = sum(1 for issue_meta in metadata_list |
|
|
if issue_meta.get('state_reason') == 'completed') |
|
|
|
|
|
|
|
|
resolved_rate = (completed / closed_issues * 100) if closed_issues > 0 else 0 |
|
|
|
|
|
return { |
|
|
'total_issues': total_issues, |
|
|
'closed_issues': closed_issues, |
|
|
'resolved_issues': completed, |
|
|
'resolved_rate': round(resolved_rate, 2), |
|
|
} |
|
|
|
|
|
|
|
|
def calculate_monthly_metrics_by_agent(top_n=None): |
|
|
""" |
|
|
Calculate monthly metrics for all agents (or top N agents) for visualization. |
|
|
Loads data directly from SWE-Arena/issue_metadata dataset. |
|
|
|
|
|
Args: |
|
|
top_n: If specified, only return metrics for the top N agents by total issues. |
|
|
Agents are ranked by their total issue count across all months. |
|
|
|
|
|
Returns: |
|
|
dict: { |
|
|
'agents': list of agent names, |
|
|
'months': list of month labels (e.g., '2025-01'), |
|
|
'data': { |
|
|
agent_name: { |
|
|
'resolved_rates': list of resolved rates by month, |
|
|
'total_issues': list of issue counts by month, |
|
|
'resolved_issues': list of resolved issue counts by month |
|
|
} |
|
|
} |
|
|
} |
|
|
""" |
|
|
|
|
|
agents = load_agents_from_hf() |
|
|
|
|
|
|
|
|
identifier_to_name = {agent.get('github_identifier'): agent.get('name') for agent in agents if agent.get('github_identifier')} |
|
|
|
|
|
|
|
|
all_metadata = load_issue_metadata() |
|
|
|
|
|
if not all_metadata: |
|
|
return {'agents': [], 'months': [], 'data': {}} |
|
|
|
|
|
|
|
|
agent_month_data = defaultdict(lambda: defaultdict(list)) |
|
|
|
|
|
for issue_meta in all_metadata: |
|
|
agent_identifier = issue_meta.get('agent_identifier') |
|
|
created_at = issue_meta.get('created_at') |
|
|
|
|
|
if not agent_identifier or not created_at: |
|
|
continue |
|
|
|
|
|
|
|
|
agent_name = identifier_to_name.get(agent_identifier, agent_identifier) |
|
|
|
|
|
try: |
|
|
dt = datetime.fromisoformat(created_at.replace('Z', '+00:00')) |
|
|
month_key = f"{dt.year}-{dt.month:02d}" |
|
|
agent_month_data[agent_name][month_key].append(issue_meta) |
|
|
except Exception as e: |
|
|
print(f"Warning: Could not parse date '{created_at}': {e}") |
|
|
continue |
|
|
|
|
|
|
|
|
all_months = set() |
|
|
for agent_data in agent_month_data.values(): |
|
|
all_months.update(agent_data.keys()) |
|
|
months = sorted(list(all_months)) |
|
|
|
|
|
|
|
|
result_data = {} |
|
|
for agent_name, month_dict in agent_month_data.items(): |
|
|
resolved_rates = [] |
|
|
total_issues_list = [] |
|
|
resolved_issues_list = [] |
|
|
|
|
|
for month in months: |
|
|
issues_in_month = month_dict.get(month, []) |
|
|
|
|
|
|
|
|
completed_count = sum(1 for issue in issues_in_month if issue.get('state_reason') == 'completed') |
|
|
|
|
|
|
|
|
closed_count = sum(1 for issue in issues_in_month if issue.get('closed_at') is not None) |
|
|
|
|
|
|
|
|
total_count = len(issues_in_month) |
|
|
|
|
|
|
|
|
resolved_rate = (completed_count / closed_count * 100) if closed_count > 0 else None |
|
|
|
|
|
resolved_rates.append(resolved_rate) |
|
|
total_issues_list.append(total_count) |
|
|
resolved_issues_list.append(completed_count) |
|
|
|
|
|
result_data[agent_name] = { |
|
|
'resolved_rates': resolved_rates, |
|
|
'total_issues': total_issues_list, |
|
|
'resolved_issues': resolved_issues_list |
|
|
} |
|
|
|
|
|
|
|
|
agents_list = sorted(list(agent_month_data.keys())) |
|
|
if top_n is not None and top_n > 0: |
|
|
|
|
|
agent_totals = [] |
|
|
for agent_name in agents_list: |
|
|
total_issues = sum(result_data[agent_name]['total_issues']) |
|
|
agent_totals.append((agent_name, total_issues)) |
|
|
|
|
|
|
|
|
agent_totals.sort(key=lambda x: x[1], reverse=True) |
|
|
top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]] |
|
|
|
|
|
|
|
|
result_data = {agent: result_data[agent] for agent in top_agents if agent in result_data} |
|
|
agents_list = top_agents |
|
|
|
|
|
return { |
|
|
'agents': agents_list, |
|
|
'months': months, |
|
|
'data': result_data |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def group_metadata_by_date(metadata_list): |
|
|
""" |
|
|
Group issue metadata by exact date (year.month.day) for efficient daily storage. |
|
|
Returns dict: {(year, month, day): [metadata_list]} |
|
|
""" |
|
|
grouped = defaultdict(list) |
|
|
|
|
|
for issue_meta in metadata_list: |
|
|
created_at = issue_meta.get('created_at') |
|
|
if not created_at: |
|
|
continue |
|
|
|
|
|
try: |
|
|
dt = datetime.fromisoformat(created_at.replace('Z', '+00:00')) |
|
|
key = (dt.year, dt.month, dt.day) |
|
|
grouped[key].append(issue_meta) |
|
|
except Exception as e: |
|
|
print(f"Warning: Could not parse date '{created_at}': {e}") |
|
|
|
|
|
return dict(grouped) |
|
|
|
|
|
|
|
|
def save_issue_metadata_to_hf(metadata_list, agent_identifier): |
|
|
""" |
|
|
Save issue metadata to HuggingFace dataset, organized by [agent_identifier]/YYYY.MM.DD.jsonl. |
|
|
Each file is stored in the agent's folder and named YYYY.MM.DD.jsonl for that day's issues. |
|
|
|
|
|
This function uses COMPLETE OVERWRITE strategy (not append/deduplicate). |
|
|
Uses upload_folder for single-commit batch uploads (avoids rate limit issues). |
|
|
|
|
|
Args: |
|
|
metadata_list: List of issue metadata dictionaries |
|
|
agent_identifier: GitHub identifier of the agent (used as folder name) |
|
|
""" |
|
|
import tempfile |
|
|
import shutil |
|
|
|
|
|
temp_dir = None |
|
|
try: |
|
|
token = get_hf_token() |
|
|
if not token: |
|
|
raise Exception("No HuggingFace token found") |
|
|
|
|
|
api = HfApi(token=token) |
|
|
|
|
|
|
|
|
grouped = group_metadata_by_date(metadata_list) |
|
|
|
|
|
if not grouped: |
|
|
print(f" No valid metadata to save for {agent_identifier}") |
|
|
return False |
|
|
|
|
|
|
|
|
temp_dir = tempfile.mkdtemp() |
|
|
agent_folder = os.path.join(temp_dir, agent_identifier) |
|
|
os.makedirs(agent_folder, exist_ok=True) |
|
|
|
|
|
print(f"📦 Preparing batch upload for {agent_identifier} ({len(grouped)} daily files)...") |
|
|
|
|
|
|
|
|
for (issue_year, month, day), day_metadata in grouped.items(): |
|
|
filename = f"{agent_identifier}/{issue_year}.{month:02d}.{day:02d}.jsonl" |
|
|
local_filename = os.path.join(agent_folder, f"{issue_year}.{month:02d}.{day:02d}.jsonl") |
|
|
|
|
|
|
|
|
day_metadata.sort(key=lambda x: x.get('created_at', ''), reverse=True) |
|
|
|
|
|
|
|
|
save_jsonl(local_filename, day_metadata) |
|
|
print(f" Prepared {len(day_metadata)} issues for {filename}") |
|
|
|
|
|
|
|
|
print(f"🤗 Uploading {len(grouped)} files ({len(metadata_list)} total issues)...") |
|
|
upload_folder_with_backoff( |
|
|
api, |
|
|
folder_path=temp_dir, |
|
|
repo_id=ISSUE_METADATA_REPO, |
|
|
repo_type="dataset", |
|
|
commit_message=f"Update issue metadata for {agent_identifier} - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC" |
|
|
) |
|
|
print(f" ✓ Batch upload complete for {agent_identifier}") |
|
|
|
|
|
return True |
|
|
|
|
|
except Exception as e: |
|
|
print(f"✗ Error saving issue metadata: {str(e)}") |
|
|
return False |
|
|
finally: |
|
|
|
|
|
if temp_dir and os.path.exists(temp_dir): |
|
|
shutil.rmtree(temp_dir) |
|
|
|
|
|
|
|
|
def load_issue_metadata(): |
|
|
""" |
|
|
Load issue metadata from the last LEADERBOARD_TIME_FRAME_DAYS only. |
|
|
|
|
|
Structure: [agent_identifier]/YYYY.MM.DD.jsonl |
|
|
|
|
|
Returns: |
|
|
List of dictionaries with 'agent_identifier' added to each issue metadata. |
|
|
Only includes issues within the last LEADERBOARD_TIME_FRAME_DAYS. |
|
|
""" |
|
|
|
|
|
current_time = datetime.now(timezone.utc) |
|
|
cutoff_date = current_time - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS) |
|
|
|
|
|
try: |
|
|
api = HfApi() |
|
|
token = get_hf_token() |
|
|
|
|
|
|
|
|
files = list_repo_files_with_backoff(api, repo_id=ISSUE_METADATA_REPO, repo_type="dataset") |
|
|
|
|
|
|
|
|
|
|
|
time_frame_files = [] |
|
|
for f in files: |
|
|
if f.endswith('.jsonl'): |
|
|
parts = f.split('/') |
|
|
if len(parts) == 2: |
|
|
filename = parts[1] |
|
|
try: |
|
|
|
|
|
date_part = filename.replace('.jsonl', '') |
|
|
date_components = date_part.split('.') |
|
|
if len(date_components) == 3: |
|
|
file_year, file_month, file_day = map(int, date_components) |
|
|
file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc) |
|
|
|
|
|
|
|
|
if file_date >= cutoff_date: |
|
|
time_frame_files.append(f) |
|
|
except Exception: |
|
|
|
|
|
continue |
|
|
|
|
|
print(f"📥 [LOAD] Reading cached issue metadata from HuggingFace ({len(time_frame_files)} files, last {LEADERBOARD_TIME_FRAME_DAYS} days)...") |
|
|
|
|
|
all_metadata = [] |
|
|
for filename in time_frame_files: |
|
|
try: |
|
|
|
|
|
|
|
|
parts = filename.split('/') |
|
|
if len(parts) != 2: |
|
|
print(f" Warning: Unexpected filename format: {filename}") |
|
|
continue |
|
|
|
|
|
agent_identifier = parts[0] |
|
|
|
|
|
file_path = hf_hub_download_with_backoff( |
|
|
repo_id=ISSUE_METADATA_REPO, |
|
|
filename=filename, |
|
|
repo_type="dataset", |
|
|
token=token |
|
|
) |
|
|
day_metadata = load_jsonl(file_path) |
|
|
|
|
|
|
|
|
for issue_meta in day_metadata: |
|
|
|
|
|
created_at = issue_meta.get('created_at') |
|
|
if created_at: |
|
|
try: |
|
|
dt = datetime.fromisoformat(created_at.replace('Z', '+00:00')) |
|
|
if dt < cutoff_date: |
|
|
continue |
|
|
except Exception: |
|
|
pass |
|
|
|
|
|
issue_meta['agent_identifier'] = agent_identifier |
|
|
all_metadata.append(issue_meta) |
|
|
|
|
|
print(f" ✓ Loaded {len(day_metadata)} issues from {filename}") |
|
|
except Exception as e: |
|
|
print(f" Warning: Could not load {filename}: {str(e)}") |
|
|
|
|
|
print(f"✓ Loaded {len(all_metadata)} total issues from last {LEADERBOARD_TIME_FRAME_DAYS} days") |
|
|
return all_metadata |
|
|
|
|
|
except Exception as e: |
|
|
print(f"✗ Error loading issue metadata from last {LEADERBOARD_TIME_FRAME_DAYS} days: {str(e)}") |
|
|
return [] |
|
|
|
|
|
|
|
|
def get_latest_issue_date_for_agent(agent_identifier): |
|
|
""" |
|
|
Get the latest issue creation date for an agent from stored metadata. |
|
|
Used for incremental updates - only fetch issues newer than this date. |
|
|
|
|
|
Structure: [agent_identifier]/YYYY.MM.DD.jsonl |
|
|
|
|
|
Args: |
|
|
agent_identifier: GitHub identifier of the agent |
|
|
|
|
|
Returns: |
|
|
datetime or None if no existing issues found. |
|
|
""" |
|
|
try: |
|
|
api = HfApi() |
|
|
token = get_hf_token() |
|
|
|
|
|
|
|
|
files = list_repo_files_with_backoff(api, repo_id=ISSUE_METADATA_REPO, repo_type="dataset") |
|
|
|
|
|
|
|
|
|
|
|
agent_pattern = f"{agent_identifier}/" |
|
|
agent_files = [f for f in files if f.startswith(agent_pattern) and f.endswith('.jsonl')] |
|
|
|
|
|
if not agent_files: |
|
|
return None |
|
|
|
|
|
|
|
|
latest_date = None |
|
|
for filename in agent_files: |
|
|
try: |
|
|
file_path = hf_hub_download_with_backoff( |
|
|
repo_id=ISSUE_METADATA_REPO, |
|
|
filename=filename, |
|
|
repo_type="dataset", |
|
|
token=token |
|
|
) |
|
|
metadata = load_jsonl(file_path) |
|
|
|
|
|
for issue in metadata: |
|
|
created_at = issue.get('created_at') |
|
|
if created_at: |
|
|
try: |
|
|
dt = datetime.fromisoformat(created_at.replace('Z', '+00:00')) |
|
|
if latest_date is None or dt > latest_date: |
|
|
latest_date = dt |
|
|
except Exception: |
|
|
continue |
|
|
except Exception: |
|
|
continue |
|
|
|
|
|
return latest_date |
|
|
|
|
|
except Exception: |
|
|
return None |
|
|
|
|
|
|
|
|
def get_daily_files_last_time_frame(agent_identifier): |
|
|
""" |
|
|
Get list of daily file paths for an agent from the configured time frame. |
|
|
|
|
|
Args: |
|
|
agent_identifier: GitHub identifier of the agent |
|
|
|
|
|
Returns: |
|
|
List of file paths in format: [agent_identifier]/YYYY.MM.DD.jsonl |
|
|
""" |
|
|
try: |
|
|
api = HfApi() |
|
|
token = get_hf_token() |
|
|
|
|
|
|
|
|
today = datetime.now(timezone.utc) |
|
|
cutoff_date = today - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS) |
|
|
|
|
|
|
|
|
files = list_repo_files_with_backoff(api, repo_id=ISSUE_METADATA_REPO, repo_type="dataset") |
|
|
|
|
|
|
|
|
agent_pattern = f"{agent_identifier}/" |
|
|
agent_files = [f for f in files if f.startswith(agent_pattern) and f.endswith('.jsonl')] |
|
|
|
|
|
|
|
|
recent_files = [] |
|
|
for filename in agent_files: |
|
|
try: |
|
|
|
|
|
parts = filename.split('/') |
|
|
if len(parts) != 2: |
|
|
continue |
|
|
|
|
|
date_part = parts[1].replace('.jsonl', '') |
|
|
date_components = date_part.split('.') |
|
|
if len(date_components) != 3: |
|
|
continue |
|
|
|
|
|
file_year, file_month, file_day = map(int, date_components) |
|
|
file_date = datetime(file_year, file_month, file_day, tzinfo=timezone.utc) |
|
|
|
|
|
|
|
|
if cutoff_date <= file_date <= today: |
|
|
recent_files.append(filename) |
|
|
except Exception: |
|
|
continue |
|
|
|
|
|
return recent_files |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Error getting daily files: {str(e)}") |
|
|
return [] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def load_agents_from_hf(): |
|
|
"""Load all agent metadata JSON files from HuggingFace dataset.""" |
|
|
try: |
|
|
api = HfApi() |
|
|
agents = [] |
|
|
|
|
|
|
|
|
files = list_repo_files_with_backoff(api, repo_id=AGENTS_REPO, repo_type="dataset") |
|
|
|
|
|
|
|
|
json_files = [f for f in files if f.endswith('.json')] |
|
|
|
|
|
|
|
|
for json_file in json_files: |
|
|
try: |
|
|
file_path = hf_hub_download_with_backoff( |
|
|
repo_id=AGENTS_REPO, |
|
|
filename=json_file, |
|
|
repo_type="dataset" |
|
|
) |
|
|
|
|
|
with open(file_path, 'r') as f: |
|
|
agent_data = json.load(f) |
|
|
|
|
|
|
|
|
if agent_data.get('status') != 'public': |
|
|
continue |
|
|
|
|
|
|
|
|
filename_identifier = json_file.replace('.json', '') |
|
|
|
|
|
|
|
|
agent_data['github_identifier'] = filename_identifier |
|
|
|
|
|
|
|
|
if 'name' in agent_data: |
|
|
agent_data['name'] = agent_data['name'] |
|
|
elif 'name' not in agent_data: |
|
|
agent_data['name'] = filename_identifier |
|
|
|
|
|
agents.append(agent_data) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Warning: Could not load {json_file}: {str(e)}") |
|
|
continue |
|
|
|
|
|
print(f"✓ Loaded {len(agents)} agents from HuggingFace") |
|
|
return agents |
|
|
|
|
|
except Exception as e: |
|
|
print(f"Could not load agents from HuggingFace: {str(e)}") |
|
|
return None |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def get_hf_token(): |
|
|
"""Get HuggingFace token from environment variables.""" |
|
|
token = os.getenv('HF_TOKEN') |
|
|
if not token: |
|
|
print("Warning: HF_TOKEN not found in environment variables") |
|
|
return token |
|
|
|
|
|
|
|
|
def load_cached_leaderboard_and_metrics(): |
|
|
""" |
|
|
Load cached leaderboard and monthly metrics data from HuggingFace. |
|
|
This is much faster than constructing from scratch on every app launch. |
|
|
|
|
|
Returns: |
|
|
dict: { |
|
|
'leaderboard': dict of agent stats, |
|
|
'monthly_metrics': dict with agents, months, and data, |
|
|
'metadata': dict with last_updated, time_frame_days, total_agents |
|
|
} |
|
|
Returns None if cache doesn't exist or fails to load. |
|
|
""" |
|
|
try: |
|
|
token = get_hf_token() |
|
|
|
|
|
print("📥 Loading cached leaderboard and metrics from HuggingFace...") |
|
|
|
|
|
|
|
|
cached_path = hf_hub_download_with_backoff( |
|
|
repo_id=LEADERBOARD_REPO, |
|
|
filename="swe-issue.json", |
|
|
repo_type="dataset", |
|
|
token=token |
|
|
) |
|
|
|
|
|
|
|
|
with open(cached_path, 'r', encoding='utf-8') as f: |
|
|
data = json.load(f) |
|
|
|
|
|
print(f" ✓ Loaded cached data (last updated: {data.get('metadata', {}).get('last_updated', 'Unknown')})") |
|
|
print(f" ✓ Leaderboard entries: {len(data.get('leaderboard', {}))}") |
|
|
print(f" ✓ Monthly metrics for: {len(data.get('monthly_metrics', {}).get('agents', []))} agents") |
|
|
|
|
|
return data |
|
|
|
|
|
except Exception as e: |
|
|
print(f"⚠️ Could not load cached data: {str(e)}") |
|
|
print(f" Falling back to constructing from issue metadata...") |
|
|
return None |
|
|
|
|
|
|
|
|
def upload_with_retry(api, path_or_fileobj, path_in_repo, repo_id, repo_type, token, max_retries=5): |
|
|
""" |
|
|
Upload file to HuggingFace with exponential backoff retry logic. |
|
|
|
|
|
Args: |
|
|
api: HfApi instance |
|
|
path_or_fileobj: Local file path to upload |
|
|
path_in_repo: Target path in the repository |
|
|
repo_id: Repository ID |
|
|
repo_type: Type of repository (e.g., "dataset") |
|
|
token: HuggingFace token |
|
|
max_retries: Maximum number of retry attempts |
|
|
|
|
|
Returns: |
|
|
True if upload succeeded, raises exception if all retries failed |
|
|
""" |
|
|
delay = 2.0 |
|
|
|
|
|
for attempt in range(max_retries): |
|
|
try: |
|
|
api.upload_file( |
|
|
path_or_fileobj=path_or_fileobj, |
|
|
path_in_repo=path_in_repo, |
|
|
repo_id=repo_id, |
|
|
repo_type=repo_type, |
|
|
token=token |
|
|
) |
|
|
if attempt > 0: |
|
|
print(f" ✓ Upload succeeded on attempt {attempt + 1}/{max_retries}") |
|
|
return True |
|
|
|
|
|
except Exception as e: |
|
|
if attempt < max_retries - 1: |
|
|
wait_time = delay + random.uniform(0, 1.0) |
|
|
print(f" ⚠️ Upload failed (attempt {attempt + 1}/{max_retries}): {str(e)}") |
|
|
print(f" ⏳ Retrying in {wait_time:.1f} seconds...") |
|
|
time.sleep(wait_time) |
|
|
delay = min(delay * 2, 60.0) |
|
|
else: |
|
|
print(f" ✗ Upload failed after {max_retries} attempts: {str(e)}") |
|
|
raise |
|
|
|
|
|
|
|
|
def save_agent_to_hf(data): |
|
|
"""Save a new agent to HuggingFace dataset as {identifier}.json in root.""" |
|
|
try: |
|
|
api = HfApi() |
|
|
token = get_hf_token() |
|
|
|
|
|
if not token: |
|
|
raise Exception("No HuggingFace token found. Please set HF_TOKEN in your Space settings.") |
|
|
|
|
|
identifier = data['github_identifier'] |
|
|
filename = f"{identifier}.json" |
|
|
|
|
|
|
|
|
with open(filename, 'w') as f: |
|
|
json.dump(data, f, indent=2) |
|
|
|
|
|
try: |
|
|
|
|
|
upload_with_retry( |
|
|
api=api, |
|
|
path_or_fileobj=filename, |
|
|
path_in_repo=filename, |
|
|
repo_id=AGENTS_REPO, |
|
|
repo_type="dataset", |
|
|
token=token |
|
|
) |
|
|
print(f"✓ Saved agent to HuggingFace: {filename}") |
|
|
return True |
|
|
finally: |
|
|
|
|
|
if os.path.exists(filename): |
|
|
os.remove(filename) |
|
|
|
|
|
except Exception as e: |
|
|
print(f"✗ Error saving agent: {str(e)}") |
|
|
return False |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def save_leaderboard_and_metrics_to_hf(): |
|
|
""" |
|
|
Creates a comprehensive JSON file with both leaderboard stats and monthly metrics. |
|
|
If the file exists, it will be overwritten. |
|
|
|
|
|
Returns: |
|
|
bool: True if successful, False otherwise |
|
|
""" |
|
|
import io |
|
|
|
|
|
try: |
|
|
token = get_hf_token() |
|
|
if not token: |
|
|
raise Exception("No HuggingFace token found") |
|
|
|
|
|
api = HfApi(token=token) |
|
|
|
|
|
print(f"\n{'='*80}") |
|
|
print(f"📊 Preparing leaderboard and metrics data for upload...") |
|
|
print(f"{'='*80}\n") |
|
|
|
|
|
|
|
|
print(" Constructing leaderboard data...") |
|
|
leaderboard_data = construct_leaderboard_from_metadata() |
|
|
|
|
|
|
|
|
print(" Calculating monthly metrics...") |
|
|
monthly_metrics = calculate_monthly_metrics_by_agent(top_n=None) |
|
|
|
|
|
|
|
|
combined_data = { |
|
|
"leaderboard": leaderboard_data, |
|
|
"monthly_metrics": monthly_metrics, |
|
|
"metadata": { |
|
|
"last_updated": datetime.now(timezone.utc).isoformat(), |
|
|
"time_frame_days": LEADERBOARD_TIME_FRAME_DAYS, |
|
|
"total_agents": len(leaderboard_data) |
|
|
} |
|
|
} |
|
|
|
|
|
print(f" Leaderboard entries: {len(leaderboard_data)}") |
|
|
print(f" Monthly metrics for: {len(monthly_metrics['agents'])} agents") |
|
|
print(f" Time frame: {LEADERBOARD_TIME_FRAME_DAYS} days") |
|
|
|
|
|
|
|
|
json_content = json.dumps(combined_data, indent=2) |
|
|
file_like_object = io.BytesIO(json_content.encode('utf-8')) |
|
|
|
|
|
|
|
|
print(f"\n🤗 Uploading to {LEADERBOARD_REPO}...") |
|
|
upload_file_with_backoff( |
|
|
api, |
|
|
path_or_fileobj=file_like_object, |
|
|
path_in_repo="swe-issue.json", |
|
|
repo_id=LEADERBOARD_REPO, |
|
|
repo_type="dataset", |
|
|
token=token, |
|
|
commit_message=f"Update leaderboard data - {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC" |
|
|
) |
|
|
|
|
|
print(f" ✓ Successfully uploaded swe-issue.json") |
|
|
print(f"{'='*80}\n") |
|
|
|
|
|
return True |
|
|
|
|
|
except Exception as e: |
|
|
print(f"✗ Error saving leaderboard and metrics: {str(e)}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
return False |
|
|
|
|
|
|
|
|
def mine_all_agents(): |
|
|
""" |
|
|
Mine issue metadata for all agents within UPDATE_TIME_FRAME_DAYS and save to HuggingFace. |
|
|
Uses BATCHED BigQuery queries for all agents (efficient approach). |
|
|
""" |
|
|
|
|
|
agents = load_agents_from_hf() |
|
|
if not agents: |
|
|
print("No agents found in HuggingFace dataset") |
|
|
return |
|
|
|
|
|
|
|
|
identifiers = [agent['github_identifier'] for agent in agents if agent.get('github_identifier')] |
|
|
if not identifiers: |
|
|
print("No valid agent identifiers found") |
|
|
return |
|
|
|
|
|
print(f"\n{'='*80}") |
|
|
print(f"⛏️ [MINE] Starting BigQuery data mining for {len(identifiers)} agents") |
|
|
print(f"Time frame: Last {LEADERBOARD_TIME_FRAME_DAYS} days") |
|
|
print(f"Data source: BigQuery + GitHub Archive (BATCHED QUERIES)") |
|
|
print(f"⚠️ This will query BigQuery and may take several minutes") |
|
|
print(f"{'='*80}\n") |
|
|
|
|
|
|
|
|
try: |
|
|
client = get_bigquery_client() |
|
|
except Exception as e: |
|
|
print(f"✗ Failed to initialize BigQuery client: {str(e)}") |
|
|
return |
|
|
|
|
|
|
|
|
current_time = datetime.now(timezone.utc) |
|
|
end_date = current_time.replace(hour=0, minute=0, second=0, microsecond=0) |
|
|
start_date = end_date - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS) |
|
|
|
|
|
try: |
|
|
|
|
|
|
|
|
all_metadata = fetch_issue_metadata_batched( |
|
|
client, identifiers, start_date, end_date, batch_size=100, upload_immediately=True |
|
|
) |
|
|
|
|
|
|
|
|
total_prs = sum(len(metadata_list) for metadata_list in all_metadata.values()) |
|
|
agents_with_data = sum(1 for metadata_list in all_metadata.values() if metadata_list) |
|
|
|
|
|
print(f"\n{'='*80}") |
|
|
print(f"✅ BigQuery mining and upload complete!") |
|
|
print(f" Total agents: {len(agents)}") |
|
|
print(f" Agents with data: {agents_with_data}") |
|
|
print(f" Total PRs found: {total_prs}") |
|
|
print(f"{'='*80}\n") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"✗ Error during BigQuery fetch: {str(e)}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
return |
|
|
|
|
|
|
|
|
print(f"📤 Uploading leaderboard and metrics data...") |
|
|
if save_leaderboard_and_metrics_to_hf(): |
|
|
print(f"✓ Leaderboard and metrics successfully uploaded to {LEADERBOARD_REPO}") |
|
|
else: |
|
|
print(f"⚠️ Failed to upload leaderboard and metrics data") |
|
|
|
|
|
|
|
|
def construct_leaderboard_from_metadata(): |
|
|
""" |
|
|
Construct leaderboard from stored issue metadata instead of fetching all issues. |
|
|
Much more memory-efficient and faster. |
|
|
|
|
|
Returns dictionary of agent stats. |
|
|
""" |
|
|
print("📊 Constructing leaderboard from issue metadata...") |
|
|
|
|
|
agents = load_agents_from_hf() |
|
|
if not agents: |
|
|
print("No agents found") |
|
|
return {} |
|
|
|
|
|
|
|
|
all_metadata = load_issue_metadata() |
|
|
|
|
|
cache_dict = {} |
|
|
|
|
|
for agent in agents: |
|
|
identifier = agent.get('github_identifier') |
|
|
agent_name = agent.get('name', 'Unknown') |
|
|
|
|
|
|
|
|
bot_metadata = [issue for issue in all_metadata if issue.get('agent_identifier') == identifier] |
|
|
|
|
|
|
|
|
stats = calculate_issue_stats_from_metadata(bot_metadata) |
|
|
|
|
|
cache_dict[identifier] = { |
|
|
'name': agent_name, |
|
|
'website': agent.get('website', 'N/A'), |
|
|
'github_identifier': identifier, |
|
|
**stats |
|
|
} |
|
|
|
|
|
return cache_dict |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def generate_color(index, total): |
|
|
"""Generate distinct colors using HSL color space for better distribution""" |
|
|
hue = (index * 360 / total) % 360 |
|
|
saturation = 70 + (index % 3) * 10 |
|
|
lightness = 45 + (index % 2) * 10 |
|
|
return f'hsl({hue}, {saturation}%, {lightness}%)' |
|
|
|
|
|
|
|
|
def create_monthly_metrics_plot(top_n=5): |
|
|
""" |
|
|
Create a Plotly figure with dual y-axes showing: |
|
|
- Left y-axis: Resolved Rate (%) as line curves |
|
|
- Right y-axis: Total Issues created as bar charts |
|
|
|
|
|
Each agent gets a unique color for both their line and bars. |
|
|
|
|
|
Args: |
|
|
top_n: Number of top agents to show (default: 5) |
|
|
""" |
|
|
|
|
|
cached_data = load_cached_leaderboard_and_metrics() |
|
|
|
|
|
if cached_data and 'monthly_metrics' in cached_data: |
|
|
|
|
|
all_metrics = cached_data['monthly_metrics'] |
|
|
|
|
|
|
|
|
if all_metrics.get('agents') and all_metrics.get('data'): |
|
|
|
|
|
agent_totals = [] |
|
|
for agent_name in all_metrics['agents']: |
|
|
total_issues = sum(all_metrics['data'][agent_name]['total_issues']) |
|
|
agent_totals.append((agent_name, total_issues)) |
|
|
|
|
|
|
|
|
agent_totals.sort(key=lambda x: x[1], reverse=True) |
|
|
top_agents = [agent_name for agent_name, _ in agent_totals[:top_n]] |
|
|
|
|
|
|
|
|
metrics = { |
|
|
'agents': top_agents, |
|
|
'months': all_metrics['months'], |
|
|
'data': {agent: all_metrics['data'][agent] for agent in top_agents if agent in all_metrics['data']} |
|
|
} |
|
|
else: |
|
|
metrics = all_metrics |
|
|
else: |
|
|
|
|
|
print(" Calculating monthly metrics from issue metadata...") |
|
|
metrics = calculate_monthly_metrics_by_agent(top_n=top_n) |
|
|
|
|
|
if not metrics['agents'] or not metrics['months']: |
|
|
|
|
|
fig = go.Figure() |
|
|
fig.add_annotation( |
|
|
text="No data available for visualization", |
|
|
xref="paper", yref="paper", |
|
|
x=0.5, y=0.5, showarrow=False, |
|
|
font=dict(size=16) |
|
|
) |
|
|
fig.update_layout( |
|
|
title=None, |
|
|
xaxis_title=None, |
|
|
height=500 |
|
|
) |
|
|
return fig |
|
|
|
|
|
|
|
|
fig = make_subplots(specs=[[{"secondary_y": True}]]) |
|
|
|
|
|
agents = metrics['agents'] |
|
|
months = metrics['months'] |
|
|
data = metrics['data'] |
|
|
|
|
|
|
|
|
agent_colors = {agent: generate_color(idx, len(agents)) for idx, agent in enumerate(agents)} |
|
|
|
|
|
|
|
|
for agent_name in agents: |
|
|
color = agent_colors[agent_name] |
|
|
agent_data = data[agent_name] |
|
|
|
|
|
|
|
|
resolved_rates = agent_data['resolved_rates'] |
|
|
|
|
|
x_resolved = [month for month, rate in zip(months, resolved_rates) if rate is not None] |
|
|
y_resolved = [rate for rate in resolved_rates if rate is not None] |
|
|
|
|
|
if x_resolved and y_resolved: |
|
|
fig.add_trace( |
|
|
go.Scatter( |
|
|
x=x_resolved, |
|
|
y=y_resolved, |
|
|
name=agent_name, |
|
|
mode='lines+markers', |
|
|
line=dict(color=color, width=2), |
|
|
marker=dict(size=6), |
|
|
legendgroup=agent_name, |
|
|
showlegend=True, |
|
|
hovertemplate='<b>%{fullData.name}</b><br>' + |
|
|
'Resolved Rate: %{y:.2f}%<br>' + |
|
|
'<extra></extra>' |
|
|
), |
|
|
secondary_y=False |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
x_bars = [] |
|
|
y_bars = [] |
|
|
for month, count in zip(months, agent_data['total_issues']): |
|
|
if count > 0: |
|
|
x_bars.append(month) |
|
|
y_bars.append(count) |
|
|
|
|
|
if x_bars and y_bars: |
|
|
fig.add_trace( |
|
|
go.Bar( |
|
|
x=x_bars, |
|
|
y=y_bars, |
|
|
name=agent_name, |
|
|
marker=dict(color=color, opacity=0.6), |
|
|
legendgroup=agent_name, |
|
|
showlegend=False, |
|
|
hovertemplate='<b>%{fullData.name}</b><br>' + |
|
|
'Total Issues: %{y}<br>' + |
|
|
'<extra></extra>', |
|
|
offsetgroup=agent_name |
|
|
), |
|
|
secondary_y=True |
|
|
) |
|
|
|
|
|
|
|
|
fig.update_xaxes(title_text=None) |
|
|
fig.update_yaxes(title_text="<b>Resolved Rate (%)</b>", secondary_y=False) |
|
|
fig.update_yaxes(title_text="<b>Total Issues</b>", secondary_y=True) |
|
|
|
|
|
|
|
|
fig.update_layout( |
|
|
title=None, |
|
|
hovermode='closest', |
|
|
barmode='group', |
|
|
height=600, |
|
|
legend=dict( |
|
|
orientation="h", |
|
|
yanchor="bottom", |
|
|
y=1.02, |
|
|
xanchor="right", |
|
|
x=1 |
|
|
), |
|
|
margin=dict(l=50, r=50, t=100, b=50) |
|
|
) |
|
|
|
|
|
return fig |
|
|
|
|
|
|
|
|
def get_leaderboard_dataframe(): |
|
|
""" |
|
|
Load leaderboard from cached data and convert to pandas DataFrame for display. |
|
|
Falls back to constructing from issue metadata if cache is unavailable. |
|
|
Returns formatted DataFrame sorted by total issues. |
|
|
""" |
|
|
|
|
|
cached_data = load_cached_leaderboard_and_metrics() |
|
|
|
|
|
if cached_data and 'leaderboard' in cached_data: |
|
|
cache_dict = cached_data['leaderboard'] |
|
|
else: |
|
|
|
|
|
print(" Constructing leaderboard from issue metadata...") |
|
|
cache_dict = construct_leaderboard_from_metadata() |
|
|
|
|
|
if not cache_dict: |
|
|
|
|
|
column_names = [col[0] for col in LEADERBOARD_COLUMNS] |
|
|
return pd.DataFrame(columns=column_names) |
|
|
|
|
|
rows = [] |
|
|
for data in cache_dict.values(): |
|
|
|
|
|
if data.get('total_issues', 0) == 0: |
|
|
continue |
|
|
|
|
|
rows.append([ |
|
|
data.get('name', 'Unknown'), |
|
|
data.get('website', 'N/A'), |
|
|
data.get('total_issues', 0), |
|
|
data.get('resolved_issues', 0), |
|
|
data.get('resolved_rate', 0.0), |
|
|
]) |
|
|
|
|
|
|
|
|
column_names = [col[0] for col in LEADERBOARD_COLUMNS] |
|
|
df = pd.DataFrame(rows, columns=column_names) |
|
|
|
|
|
|
|
|
numeric_cols = ["Total Issues", "Resolved Issues", "Resolved Rate (%)"] |
|
|
for col in numeric_cols: |
|
|
if col in df.columns: |
|
|
df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0) |
|
|
|
|
|
|
|
|
if "Total Issues" in df.columns and not df.empty: |
|
|
df = df.sort_values(by="Total Issues", ascending=False).reset_index(drop=True) |
|
|
|
|
|
return df |
|
|
|
|
|
|
|
|
def submit_agent(identifier, agent_name, developer, website): |
|
|
""" |
|
|
Submit a new agent to the leaderboard. |
|
|
Validates input and saves submission. |
|
|
Issue data will be populated by the monthly mining task. |
|
|
""" |
|
|
|
|
|
if not identifier or not identifier.strip(): |
|
|
return "❌ GitHub identifier is required", get_leaderboard_dataframe(), create_monthly_metrics_plot() |
|
|
if not agent_name or not agent_name.strip(): |
|
|
return "❌ Agent name is required", get_leaderboard_dataframe(), create_monthly_metrics_plot() |
|
|
if not developer or not developer.strip(): |
|
|
return "❌ Developer name is required", get_leaderboard_dataframe(), create_monthly_metrics_plot() |
|
|
if not website or not website.strip(): |
|
|
return "❌ Website URL is required", get_leaderboard_dataframe(), create_monthly_metrics_plot() |
|
|
|
|
|
|
|
|
identifier = identifier.strip() |
|
|
agent_name = agent_name.strip() |
|
|
developer = developer.strip() |
|
|
website = website.strip() |
|
|
|
|
|
|
|
|
is_valid, message = validate_github_username(identifier) |
|
|
if not is_valid: |
|
|
return f"❌ {message}", get_leaderboard_dataframe(), create_monthly_metrics_plot() |
|
|
|
|
|
|
|
|
agents = load_agents_from_hf() |
|
|
if agents: |
|
|
existing_names = {agent['github_identifier'] for agent in agents} |
|
|
if identifier in existing_names: |
|
|
return f"⚠️ Agent with identifier '{identifier}' already exists", get_leaderboard_dataframe(), create_monthly_metrics_plot() |
|
|
|
|
|
|
|
|
submission = { |
|
|
'name': agent_name, |
|
|
'developer': developer, |
|
|
'github_identifier': identifier, |
|
|
'website': website, |
|
|
} |
|
|
|
|
|
|
|
|
if not save_agent_to_hf(submission): |
|
|
return "❌ Failed to save submission", get_leaderboard_dataframe(), create_monthly_metrics_plot() |
|
|
|
|
|
return f"✅ Successfully submitted {agent_name}! Issue data will be populated by daily incremental updates.", get_leaderboard_dataframe(), create_monthly_metrics_plot() |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(f"\n🚀 Starting SWE Agent PR Leaderboard") |
|
|
print(f" Leaderboard time frame: {LEADERBOARD_TIME_FRAME_DAYS} days ({LEADERBOARD_TIME_FRAME_DAYS // 30} months)") |
|
|
print(f" Mining update frequency: Every {UPDATE_TIME_FRAME_DAYS} days\n") |
|
|
|
|
|
|
|
|
scheduler = BackgroundScheduler(timezone="UTC") |
|
|
scheduler.add_job( |
|
|
mine_all_agents, |
|
|
trigger=CronTrigger(day=1, hour=0, minute=0), |
|
|
id='monthly_issue_mining', |
|
|
name='Monthly Issue Mining', |
|
|
replace_existing=True |
|
|
) |
|
|
scheduler.start() |
|
|
print(f"\n{'='*80}") |
|
|
print(f"✓ Scheduler initialized successfully") |
|
|
print(f"⛏️ Mining schedule: Every 1st of the month at 12:00 AM UTC") |
|
|
print(f"📥 On startup: Only loads cached data from HuggingFace (no mining)") |
|
|
print(f"{'='*80}\n") |
|
|
|
|
|
|
|
|
with gr.Blocks(title="SWE Agent Issue Leaderboard", theme=gr.themes.Soft()) as app: |
|
|
|
|
|
gr.Markdown("# 🏆 SWE Agent Issue Leaderboard") |
|
|
gr.Markdown(f"Track and compare GitHub issue resolution statistics for SWE agents") |
|
|
|
|
|
with gr.Tabs(): |
|
|
|
|
|
|
|
|
with gr.Tab("📊 Leaderboard"): |
|
|
gr.Markdown(f"*All statistics are based on issues from the last {LEADERBOARD_TIME_FRAME_DAYS // 30} months*") |
|
|
leaderboard_table = Leaderboard( |
|
|
value=get_leaderboard_dataframe(), |
|
|
datatype=LEADERBOARD_COLUMNS, |
|
|
search_columns=["Agent Name", "Website"], |
|
|
filter_columns=[ |
|
|
ColumnFilter( |
|
|
"Resolved Rate (%)", |
|
|
min=0, |
|
|
max=100, |
|
|
default=[0, 100], |
|
|
type="slider", |
|
|
label="Resolved Rate (%)" |
|
|
) |
|
|
] |
|
|
) |
|
|
|
|
|
gr.Markdown("### Monthly Metrics") |
|
|
gr.Markdown("Track resolution rates and issue activity over time") |
|
|
|
|
|
monthly_plot = gr.Plot( |
|
|
value=create_monthly_metrics_plot(), |
|
|
label="Monthly Issue Metrics" |
|
|
) |
|
|
|
|
|
|
|
|
with gr.Tab("➕ Submit Agent"): |
|
|
|
|
|
gr.Markdown("### Submit Your Agent") |
|
|
gr.Markdown("Fill in the details below to add your agent to the leaderboard. Make sure you're logged in to HuggingFace CLI on your machine.") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
github_input = gr.Textbox( |
|
|
label="GitHub Identifier*", |
|
|
placeholder="Your agent username (e.g., my-agent-bot)" |
|
|
) |
|
|
name_input = gr.Textbox( |
|
|
label="Agent Name*", |
|
|
placeholder="Your agent's display name" |
|
|
) |
|
|
|
|
|
with gr.Column(): |
|
|
developer_input = gr.Textbox( |
|
|
label="Developer*", |
|
|
placeholder="Your developer or team name" |
|
|
) |
|
|
website_input = gr.Textbox( |
|
|
label="Website", |
|
|
placeholder="https://your-agent-website.com" |
|
|
) |
|
|
|
|
|
submit_button = gr.Button( |
|
|
"Submit Agent", |
|
|
variant="primary" |
|
|
) |
|
|
submission_status = gr.Textbox( |
|
|
label="Submission Status", |
|
|
interactive=False |
|
|
) |
|
|
|
|
|
|
|
|
submit_button.click( |
|
|
fn=submit_agent, |
|
|
inputs=[github_input, name_input, developer_input, website_input], |
|
|
outputs=[submission_status, leaderboard_table, monthly_plot] |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
app.launch() |