Spaces:
Sleeping
Sleeping
refine
Browse files
app.py
CHANGED
|
@@ -2065,8 +2065,98 @@ def mine_all_agents():
|
|
| 2065 |
print(f"{'='*80}")
|
| 2066 |
|
| 2067 |
try:
|
| 2068 |
-
|
| 2069 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2070 |
|
| 2071 |
# After mining is complete, save leaderboard and metrics to HuggingFace
|
| 2072 |
print(f"\n📤 Uploading leaderboard and metrics data...")
|
|
@@ -2093,7 +2183,7 @@ def mine_all_agents():
|
|
| 2093 |
print(f"\n✅ Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
|
| 2094 |
|
| 2095 |
except Exception as e:
|
| 2096 |
-
print(f"✗
|
| 2097 |
import traceback
|
| 2098 |
traceback.print_exc()
|
| 2099 |
|
|
@@ -2435,177 +2525,28 @@ def submit_agent(identifier, agent_name, developer, website):
|
|
| 2435 |
return f"✅ Successfully submitted {agent_name}! Review data will be populated by the next daily incremental update.", get_leaderboard_dataframe()
|
| 2436 |
|
| 2437 |
|
| 2438 |
-
# =============================================================================
|
| 2439 |
-
# BACKGROUND TASKS
|
| 2440 |
-
# =============================================================================
|
| 2441 |
-
|
| 2442 |
-
def fetch_and_update_weekly_reviews():
|
| 2443 |
-
"""
|
| 2444 |
-
Fetch and update reviews with comprehensive status checking using BigQuery.
|
| 2445 |
-
|
| 2446 |
-
Strategy:
|
| 2447 |
-
1. For each agent:
|
| 2448 |
-
- Examine ALL open reviews from last LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS for their closed_at status
|
| 2449 |
-
- Update PR status for all existing metadata using BigQuery (last LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS)
|
| 2450 |
-
- Fetch new reviews from last UPDATE_TIME_FRAME_DAYS days using BigQuery
|
| 2451 |
-
- Save all updated/new metadata back to HuggingFace
|
| 2452 |
-
"""
|
| 2453 |
-
# Initialize BigQuery client
|
| 2454 |
-
try:
|
| 2455 |
-
client = get_bigquery_client()
|
| 2456 |
-
except Exception as e:
|
| 2457 |
-
print(f"✗ Failed to initialize BigQuery client: {str(e)}")
|
| 2458 |
-
return
|
| 2459 |
-
|
| 2460 |
-
# Load all agents
|
| 2461 |
-
agents = load_agents_from_hf()
|
| 2462 |
-
if not agents:
|
| 2463 |
-
print("No agents found in HuggingFace dataset")
|
| 2464 |
-
return
|
| 2465 |
-
|
| 2466 |
-
# Calculate date range
|
| 2467 |
-
today_utc = datetime.now(timezone.utc)
|
| 2468 |
-
today_midnight = datetime.combine(today_utc.date(), datetime.min.time(), tzinfo=timezone.utc)
|
| 2469 |
-
update_start_midnight = today_midnight - timedelta(days=UPDATE_TIME_FRAME_DAYS)
|
| 2470 |
-
cutoff_date = today_midnight - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS)
|
| 2471 |
-
|
| 2472 |
-
print(f"📅 Time Range Configuration:")
|
| 2473 |
-
print(f" Update period start (12am UTC): {update_start_midnight.isoformat()}")
|
| 2474 |
-
print(f" Today 12am UTC: {today_midnight.isoformat()}")
|
| 2475 |
-
print(f" Cutoff for existing reviews: {cutoff_date.isoformat()}")
|
| 2476 |
-
print(f" Examining reviews from: {cutoff_date.date()} to {today_midnight.date()}")
|
| 2477 |
-
|
| 2478 |
-
for agent in agents:
|
| 2479 |
-
identifier = agent.get('github_identifier')
|
| 2480 |
-
agent_name = agent.get('name', 'Unknown')
|
| 2481 |
-
|
| 2482 |
-
if not identifier:
|
| 2483 |
-
print(f"Warning: Skipping agent without identifier: {agent}")
|
| 2484 |
-
continue
|
| 2485 |
-
|
| 2486 |
-
try:
|
| 2487 |
-
print(f"\n{'='*60}")
|
| 2488 |
-
print(f"Processing: {agent_name} ({identifier})")
|
| 2489 |
-
print(f"{'='*60}")
|
| 2490 |
-
|
| 2491 |
-
# Step 1: Load all existing metadata within timeframe
|
| 2492 |
-
print(f"📊 Loading existing metadata from last {LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS} days...")
|
| 2493 |
-
all_metadata = load_review_metadata()
|
| 2494 |
-
agent_metadata = [r for r in all_metadata if r.get("agent_identifier") == identifier]
|
| 2495 |
-
|
| 2496 |
-
# Filter to last (LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS) days (from cutoff to today)
|
| 2497 |
-
recent_metadata = []
|
| 2498 |
-
for review in agent_metadata:
|
| 2499 |
-
reviewed_at = review.get('reviewed_at', '')
|
| 2500 |
-
if reviewed_at:
|
| 2501 |
-
try:
|
| 2502 |
-
review_date = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
|
| 2503 |
-
if cutoff_date <= review_date < today_midnight:
|
| 2504 |
-
recent_metadata.append(review)
|
| 2505 |
-
except Exception as e:
|
| 2506 |
-
print(f" Warning: Could not parse date '{reviewed_at}': {e}")
|
| 2507 |
-
continue
|
| 2508 |
-
|
| 2509 |
-
print(f" ✓ Loaded {len(recent_metadata)} existing reviews from timeframe")
|
| 2510 |
-
|
| 2511 |
-
# Step 2: Fetch NEW reviews from last UPDATE_TIME_FRAME_DAYS to today using BigQuery
|
| 2512 |
-
print(f"🔍 Fetching new reviews from {update_start_midnight.isoformat()} to {today_midnight.isoformat()} using BigQuery...")
|
| 2513 |
-
|
| 2514 |
-
review_rows = fetch_reviews_from_bigquery(client, identifier, update_start_midnight, today_midnight)
|
| 2515 |
-
|
| 2516 |
-
# Extract unique PRs
|
| 2517 |
-
urls = list(set([row.url for row in review_rows if row.url]))
|
| 2518 |
-
print(f" Found {len(review_rows)} review events across {len(urls)} unique PRs")
|
| 2519 |
-
|
| 2520 |
-
# Extract metadata for new reviews
|
| 2521 |
-
weekly_metadata = []
|
| 2522 |
-
seen_prs = set()
|
| 2523 |
-
for row in review_rows:
|
| 2524 |
-
url = row.url
|
| 2525 |
-
if url in seen_prs:
|
| 2526 |
-
continue
|
| 2527 |
-
seen_prs.add(url)
|
| 2528 |
-
|
| 2529 |
-
metadata = extract_review_metadata_from_bigquery(row)
|
| 2530 |
-
metadata['agent_identifier'] = identifier
|
| 2531 |
-
weekly_metadata.append(metadata)
|
| 2532 |
-
|
| 2533 |
-
print(f" ✓ Found {len(weekly_metadata)} unique PRs in {UPDATE_TIME_FRAME_DAYS}-day window")
|
| 2534 |
-
|
| 2535 |
-
# Step 3: Combine and save all metadata
|
| 2536 |
-
all_updated_metadata = recent_metadata + weekly_metadata
|
| 2537 |
-
|
| 2538 |
-
if all_updated_metadata:
|
| 2539 |
-
print(f"💾 Saving {len(all_updated_metadata)} total reviews to HuggingFace...")
|
| 2540 |
-
save_review_metadata_to_hf(all_updated_metadata, identifier)
|
| 2541 |
-
print(f"✓ Updated {identifier}: {len(recent_metadata)} existing + {len(weekly_metadata)} new = {len(all_updated_metadata)} total")
|
| 2542 |
-
else:
|
| 2543 |
-
print(f" No reviews to save for {identifier}")
|
| 2544 |
-
|
| 2545 |
-
except Exception as e:
|
| 2546 |
-
print(f"✗ Error processing {identifier}: {str(e)}")
|
| 2547 |
-
import traceback
|
| 2548 |
-
traceback.print_exc()
|
| 2549 |
-
continue
|
| 2550 |
-
|
| 2551 |
-
|
| 2552 |
-
# =============================================================================
|
| 2553 |
-
# STARTUP & INITIALIZATION
|
| 2554 |
-
# =============================================================================
|
| 2555 |
-
|
| 2556 |
-
def initialize_leaderboard_data():
|
| 2557 |
-
"""
|
| 2558 |
-
Initialize leaderboard data on startup.
|
| 2559 |
-
If saved data doesn't exist, construct from metadata and save.
|
| 2560 |
-
"""
|
| 2561 |
-
print(f"\n{'='*80}")
|
| 2562 |
-
print(f"🚀 Initializing leaderboard data...")
|
| 2563 |
-
print(f"{'='*80}\n")
|
| 2564 |
-
|
| 2565 |
-
# Try loading from saved dataset
|
| 2566 |
-
saved_data = load_leaderboard_data_from_hf()
|
| 2567 |
-
|
| 2568 |
-
if saved_data:
|
| 2569 |
-
print(f"✓ Leaderboard data already exists (last updated: {saved_data.get('last_updated', 'Unknown')})")
|
| 2570 |
-
else:
|
| 2571 |
-
print(f"⚠️ No saved leaderboard data found. Constructing from metadata...")
|
| 2572 |
-
try:
|
| 2573 |
-
# Save leaderboard and metrics to HuggingFace
|
| 2574 |
-
if save_leaderboard_and_metrics_to_hf():
|
| 2575 |
-
print(f"✓ Initial leaderboard data created and saved")
|
| 2576 |
-
else:
|
| 2577 |
-
print(f"⚠️ Failed to save initial leaderboard data")
|
| 2578 |
-
except Exception as e:
|
| 2579 |
-
print(f"✗ Failed to initialize leaderboard data: {str(e)}")
|
| 2580 |
-
import traceback
|
| 2581 |
-
traceback.print_exc()
|
| 2582 |
-
|
| 2583 |
-
print(f"\n{'='*80}")
|
| 2584 |
-
print(f"✓ Leaderboard initialization complete")
|
| 2585 |
-
print(f"{'='*80}\n")
|
| 2586 |
-
|
| 2587 |
-
|
| 2588 |
# =============================================================================
|
| 2589 |
# GRADIO APPLICATION
|
| 2590 |
# =============================================================================
|
| 2591 |
|
| 2592 |
-
|
| 2593 |
-
|
|
|
|
| 2594 |
|
| 2595 |
-
# Start APScheduler for
|
| 2596 |
scheduler = BackgroundScheduler(timezone="UTC")
|
| 2597 |
scheduler.add_job(
|
| 2598 |
mine_all_agents,
|
| 2599 |
-
trigger=CronTrigger(
|
| 2600 |
-
id='
|
| 2601 |
-
name='
|
| 2602 |
replace_existing=True
|
| 2603 |
)
|
| 2604 |
scheduler.start()
|
| 2605 |
print(f"\n{'='*80}")
|
| 2606 |
print(f"✓ Scheduler initialized successfully")
|
| 2607 |
-
print(f"⛏️ Mining schedule: Every
|
| 2608 |
-
print(f"📥 On startup:
|
| 2609 |
print(f"{'='*80}\n")
|
| 2610 |
|
| 2611 |
# Create Gradio interface
|
|
|
|
| 2065 |
print(f"{'='*80}")
|
| 2066 |
|
| 2067 |
try:
|
| 2068 |
+
client = get_bigquery_client()
|
| 2069 |
+
|
| 2070 |
+
# Load all agents
|
| 2071 |
+
agents = load_agents_from_hf()
|
| 2072 |
+
if not agents:
|
| 2073 |
+
print("No agents found in HuggingFace dataset")
|
| 2074 |
+
return
|
| 2075 |
+
|
| 2076 |
+
# Calculate date range
|
| 2077 |
+
today_utc = datetime.now(timezone.utc)
|
| 2078 |
+
today_midnight = datetime.combine(today_utc.date(), datetime.min.time(), tzinfo=timezone.utc)
|
| 2079 |
+
update_start_midnight = today_midnight - timedelta(days=UPDATE_TIME_FRAME_DAYS)
|
| 2080 |
+
cutoff_date = today_midnight - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS)
|
| 2081 |
+
|
| 2082 |
+
print(f"📅 Time Range Configuration:")
|
| 2083 |
+
print(f" Update period start (12am UTC): {update_start_midnight.isoformat()}")
|
| 2084 |
+
print(f" Today 12am UTC: {today_midnight.isoformat()}")
|
| 2085 |
+
print(f" Cutoff for existing reviews: {cutoff_date.isoformat()}")
|
| 2086 |
+
print(f" Examining reviews from: {cutoff_date.date()} to {today_midnight.date()}")
|
| 2087 |
+
|
| 2088 |
+
for agent in agents:
|
| 2089 |
+
identifier = agent.get('github_identifier')
|
| 2090 |
+
agent_name = agent.get('name', 'Unknown')
|
| 2091 |
+
|
| 2092 |
+
if not identifier:
|
| 2093 |
+
print(f"Warning: Skipping agent without identifier: {agent}")
|
| 2094 |
+
continue
|
| 2095 |
+
|
| 2096 |
+
try:
|
| 2097 |
+
print(f"\n{'='*60}")
|
| 2098 |
+
print(f"Processing: {agent_name} ({identifier})")
|
| 2099 |
+
print(f"{'='*60}")
|
| 2100 |
+
|
| 2101 |
+
# Step 1: Load all existing metadata within timeframe
|
| 2102 |
+
print(f"📊 Loading existing metadata from last {LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS} days...")
|
| 2103 |
+
all_metadata = load_review_metadata()
|
| 2104 |
+
agent_metadata = [r for r in all_metadata if r.get("agent_identifier") == identifier]
|
| 2105 |
+
|
| 2106 |
+
# Filter to last (LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS) days (from cutoff to today)
|
| 2107 |
+
recent_metadata = []
|
| 2108 |
+
for review in agent_metadata:
|
| 2109 |
+
reviewed_at = review.get('reviewed_at', '')
|
| 2110 |
+
if reviewed_at:
|
| 2111 |
+
try:
|
| 2112 |
+
review_date = datetime.fromisoformat(reviewed_at.replace('Z', '+00:00'))
|
| 2113 |
+
if cutoff_date <= review_date < today_midnight:
|
| 2114 |
+
recent_metadata.append(review)
|
| 2115 |
+
except Exception as e:
|
| 2116 |
+
print(f" Warning: Could not parse date '{reviewed_at}': {e}")
|
| 2117 |
+
continue
|
| 2118 |
+
|
| 2119 |
+
print(f" ✓ Loaded {len(recent_metadata)} existing reviews from timeframe")
|
| 2120 |
+
|
| 2121 |
+
# Step 2: Fetch NEW reviews from last UPDATE_TIME_FRAME_DAYS to today using BigQuery
|
| 2122 |
+
print(f"🔍 Fetching new reviews from {update_start_midnight.isoformat()} to {today_midnight.isoformat()} using BigQuery...")
|
| 2123 |
+
|
| 2124 |
+
review_rows = fetch_reviews_from_bigquery(client, identifier, update_start_midnight, today_midnight)
|
| 2125 |
+
|
| 2126 |
+
# Extract unique PRs
|
| 2127 |
+
urls = list(set([row.url for row in review_rows if row.url]))
|
| 2128 |
+
print(f" Found {len(review_rows)} review events across {len(urls)} unique PRs")
|
| 2129 |
+
|
| 2130 |
+
# Extract metadata for new reviews
|
| 2131 |
+
review_metadata = []
|
| 2132 |
+
seen_prs = set()
|
| 2133 |
+
for row in review_rows:
|
| 2134 |
+
url = row.url
|
| 2135 |
+
if url in seen_prs:
|
| 2136 |
+
continue
|
| 2137 |
+
seen_prs.add(url)
|
| 2138 |
+
|
| 2139 |
+
metadata = extract_review_metadata_from_bigquery(row)
|
| 2140 |
+
metadata['agent_identifier'] = identifier
|
| 2141 |
+
review_metadata.append(metadata)
|
| 2142 |
+
|
| 2143 |
+
print(f" ✓ Found {len(review_metadata)} unique PRs in {UPDATE_TIME_FRAME_DAYS}-day window")
|
| 2144 |
+
|
| 2145 |
+
# Step 3: Combine and save all metadata
|
| 2146 |
+
all_updated_metadata = recent_metadata + review_metadata
|
| 2147 |
+
|
| 2148 |
+
if all_updated_metadata:
|
| 2149 |
+
print(f"💾 Saving {len(all_updated_metadata)} total reviews to HuggingFace...")
|
| 2150 |
+
save_review_metadata_to_hf(all_updated_metadata, identifier)
|
| 2151 |
+
print(f"✓ Updated {identifier}: {len(recent_metadata)} existing + {len(review_metadata)} new = {len(all_updated_metadata)} total")
|
| 2152 |
+
else:
|
| 2153 |
+
print(f" No reviews to save for {identifier}")
|
| 2154 |
+
|
| 2155 |
+
except Exception as e:
|
| 2156 |
+
print(f"✗ Error processing {identifier}: {str(e)}")
|
| 2157 |
+
import traceback
|
| 2158 |
+
traceback.print_exc()
|
| 2159 |
+
continue
|
| 2160 |
|
| 2161 |
# After mining is complete, save leaderboard and metrics to HuggingFace
|
| 2162 |
print(f"\n📤 Uploading leaderboard and metrics data...")
|
|
|
|
| 2183 |
print(f"\n✅ Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
|
| 2184 |
|
| 2185 |
except Exception as e:
|
| 2186 |
+
print(f"✗ Monthly update failed: {str(e)}")
|
| 2187 |
import traceback
|
| 2188 |
traceback.print_exc()
|
| 2189 |
|
|
|
|
| 2525 |
return f"✅ Successfully submitted {agent_name}! Review data will be populated by the next daily incremental update.", get_leaderboard_dataframe()
|
| 2526 |
|
| 2527 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2528 |
# =============================================================================
|
| 2529 |
# GRADIO APPLICATION
|
| 2530 |
# =============================================================================
|
| 2531 |
|
| 2532 |
+
print(f"\n🚀 Starting SWE Agent PR Leaderboard")
|
| 2533 |
+
print(f" Leaderboard time frame: {LEADERBOARD_TIME_FRAME_DAYS} days ({LEADERBOARD_TIME_FRAME_DAYS // 30} months)")
|
| 2534 |
+
print(f" Mining update frequency: Every {UPDATE_TIME_FRAME_DAYS} days\n")
|
| 2535 |
|
| 2536 |
+
# Start APScheduler for monthly PR mining at 12:00 AM UTC every 1st of the month
|
| 2537 |
scheduler = BackgroundScheduler(timezone="UTC")
|
| 2538 |
scheduler.add_job(
|
| 2539 |
mine_all_agents,
|
| 2540 |
+
trigger=CronTrigger(day=1, hour=0, minute=0), # 12:00 AM UTC every 1st of the month
|
| 2541 |
+
id='monthly_review_mining',
|
| 2542 |
+
name='Monthly Review Mining',
|
| 2543 |
replace_existing=True
|
| 2544 |
)
|
| 2545 |
scheduler.start()
|
| 2546 |
print(f"\n{'='*80}")
|
| 2547 |
print(f"✓ Scheduler initialized successfully")
|
| 2548 |
+
print(f"⛏️ Mining schedule: Every 1st of the month at 12:00 AM UTC")
|
| 2549 |
+
print(f"📥 On startup: Only loads cached data from HuggingFace (no mining)")
|
| 2550 |
print(f"{'='*80}\n")
|
| 2551 |
|
| 2552 |
# Create Gradio interface
|