Spaces:
Running
Running
fix remine
Browse files
app.py
CHANGED
|
@@ -1554,16 +1554,19 @@ def save_agent_to_hf(data):
|
|
| 1554 |
|
| 1555 |
def update_all_agents_incremental():
|
| 1556 |
"""
|
| 1557 |
-
|
| 1558 |
|
| 1559 |
Strategy:
|
| 1560 |
-
1. For each agent,
|
| 1561 |
-
2.
|
| 1562 |
-
3.
|
| 1563 |
-
4.
|
| 1564 |
5. Store minimal metadata (not full review objects) to avoid storage limits
|
| 1565 |
6. Construct leaderboard from ALL stored metadata (last 6 months)
|
| 1566 |
|
|
|
|
|
|
|
|
|
|
| 1567 |
Returns dictionary of all agent data with current stats.
|
| 1568 |
"""
|
| 1569 |
token = get_github_token()
|
|
@@ -1593,26 +1596,21 @@ def update_all_agents_incremental():
|
|
| 1593 |
# Get already-mined dates for this agent (last 6 months)
|
| 1594 |
already_mined_dates = get_already_mined_dates(identifier, n_months=6)
|
| 1595 |
|
| 1596 |
-
|
| 1597 |
-
|
| 1598 |
-
|
| 1599 |
-
|
| 1600 |
-
|
| 1601 |
-
|
| 1602 |
-
|
| 1603 |
-
|
| 1604 |
-
|
| 1605 |
-
|
| 1606 |
-
|
| 1607 |
-
|
| 1608 |
-
|
| 1609 |
-
#
|
| 1610 |
-
|
| 1611 |
-
identifier,
|
| 1612 |
-
agent_name,
|
| 1613 |
-
token,
|
| 1614 |
-
start_from_date=None
|
| 1615 |
-
)
|
| 1616 |
|
| 1617 |
if new_metadata:
|
| 1618 |
# Save new metadata to HuggingFace (organized by agent_identifier/YYYY.MM.DD.jsonl)
|
|
@@ -1986,58 +1984,36 @@ def submit_agent(identifier, agent_name, organization, description, website):
|
|
| 1986 |
|
| 1987 |
def daily_update_task():
|
| 1988 |
"""
|
| 1989 |
-
Daily scheduled task (runs at 12:00 AM UTC) for
|
| 1990 |
|
| 1991 |
Strategy:
|
| 1992 |
-
1.
|
| 1993 |
-
2.
|
| 1994 |
-
|
| 1995 |
-
|
| 1996 |
-
|
| 1997 |
-
|
|
|
|
|
|
|
| 1998 |
"""
|
| 1999 |
print(f"\n{'='*80}")
|
| 2000 |
-
print(f"π Daily
|
| 2001 |
print(f"{'='*80}")
|
| 2002 |
|
| 2003 |
try:
|
| 2004 |
-
|
| 2005 |
-
|
| 2006 |
-
|
| 2007 |
-
agents = load_agents_from_hf()
|
| 2008 |
-
if not agents:
|
| 2009 |
-
print("No agents found")
|
| 2010 |
-
return
|
| 2011 |
-
|
| 2012 |
-
print(f"π Processing {len(agents)} agents...")
|
| 2013 |
-
|
| 2014 |
-
total_checked = 0
|
| 2015 |
-
total_updated = 0
|
| 2016 |
-
|
| 2017 |
-
# Refresh open reviews for each agent (last 6 months)
|
| 2018 |
-
for agent in agents:
|
| 2019 |
-
identifier = agent.get('github_identifier')
|
| 2020 |
-
agent_name = agent.get('agent_name', 'Unknown')
|
| 2021 |
-
|
| 2022 |
-
if not identifier:
|
| 2023 |
-
continue
|
| 2024 |
-
|
| 2025 |
-
print(f"\n{'='*60}")
|
| 2026 |
-
print(f"Processing: {agent_name} ({identifier})")
|
| 2027 |
-
print(f"{'='*60}")
|
| 2028 |
|
| 2029 |
-
|
| 2030 |
-
checked, updated = refresh_review_status_for_agent(identifier, token)
|
| 2031 |
-
total_checked += checked
|
| 2032 |
-
total_updated += updated
|
| 2033 |
|
| 2034 |
print(f"\n{'='*80}")
|
| 2035 |
-
print(f"π
|
| 2036 |
-
print(f"
|
| 2037 |
-
print(f"
|
| 2038 |
print(f"{'='*80}")
|
| 2039 |
|
| 2040 |
-
print(f"\nβ
Daily
|
| 2041 |
|
| 2042 |
except Exception as e:
|
| 2043 |
print(f"β Daily update failed: {str(e)}")
|
|
@@ -2078,12 +2054,12 @@ scheduler = BackgroundScheduler(timezone="UTC")
|
|
| 2078 |
scheduler.add_job(
|
| 2079 |
daily_update_task,
|
| 2080 |
trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
|
| 2081 |
-
id='
|
| 2082 |
-
name='Daily Review
|
| 2083 |
replace_existing=True
|
| 2084 |
)
|
| 2085 |
scheduler.start()
|
| 2086 |
-
print("β Scheduler started: Daily
|
| 2087 |
|
| 2088 |
# Create Gradio interface
|
| 2089 |
with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app:
|
|
|
|
| 1554 |
|
| 1555 |
def update_all_agents_incremental():
|
| 1556 |
"""
|
| 1557 |
+
Comprehensive update of review statistics for all agents.
|
| 1558 |
|
| 1559 |
Strategy:
|
| 1560 |
+
1. For each agent, re-mine ALL reviews within the 6-month window
|
| 1561 |
+
2. This ensures review metadata is always fresh and up-to-date
|
| 1562 |
+
3. Critical for catching status changes (closed/reverted PRs)
|
| 1563 |
+
4. Overwrites existing day files with current data from GitHub API
|
| 1564 |
5. Store minimal metadata (not full review objects) to avoid storage limits
|
| 1565 |
6. Construct leaderboard from ALL stored metadata (last 6 months)
|
| 1566 |
|
| 1567 |
+
Note: Unlike the old approach, this does NOT skip already-mined dates.
|
| 1568 |
+
This is essential to prevent stale metadata (e.g., reviews closed after initial mining).
|
| 1569 |
+
|
| 1570 |
Returns dictionary of all agent data with current stats.
|
| 1571 |
"""
|
| 1572 |
token = get_github_token()
|
|
|
|
| 1596 |
# Get already-mined dates for this agent (last 6 months)
|
| 1597 |
already_mined_dates = get_already_mined_dates(identifier, n_months=6)
|
| 1598 |
|
| 1599 |
+
# Always re-mine ALL dates within 6-month window to ensure fresh data
|
| 1600 |
+
# This is critical because review metadata can become stale:
|
| 1601 |
+
# - PRs can be closed/reverted after initial mining
|
| 1602 |
+
# - Status changes need to be captured in daily files
|
| 1603 |
+
print(f"π
Re-mining ALL dates within 6-month window (including {len(already_mined_dates)} existing dates)")
|
| 1604 |
+
print(f" This ensures all review metadata is up-to-date...")
|
| 1605 |
+
|
| 1606 |
+
# Fetch ALL reviews (no exclusions) to refresh metadata
|
| 1607 |
+
new_metadata = fetch_all_reviews_metadata(
|
| 1608 |
+
identifier,
|
| 1609 |
+
agent_name,
|
| 1610 |
+
token,
|
| 1611 |
+
start_from_date=None, # Use full 6-month range
|
| 1612 |
+
exclude_dates=None # DO NOT exclude - always refresh everything
|
| 1613 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1614 |
|
| 1615 |
if new_metadata:
|
| 1616 |
# Save new metadata to HuggingFace (organized by agent_identifier/YYYY.MM.DD.jsonl)
|
|
|
|
| 1984 |
|
| 1985 |
def daily_update_task():
|
| 1986 |
"""
|
| 1987 |
+
Daily scheduled task (runs at 12:00 AM UTC) for comprehensive review mining.
|
| 1988 |
|
| 1989 |
Strategy:
|
| 1990 |
+
1. Re-mine ALL reviews within the 6-month window for all agents
|
| 1991 |
+
2. This ensures review metadata is always fresh, catching:
|
| 1992 |
+
- PRs that were closed/reverted since last mining
|
| 1993 |
+
- Status changes (is_closed, state_reason, closed_at)
|
| 1994 |
+
- Any other metadata updates
|
| 1995 |
+
3. Updates ALL day files within LEADERBOARD_TIME_FRAME_DAYS
|
| 1996 |
+
|
| 1997 |
+
Unlike the old selective refresh approach, this guarantees no stale data.
|
| 1998 |
"""
|
| 1999 |
print(f"\n{'='*80}")
|
| 2000 |
+
print(f"π Daily Regular Mining started at {datetime.now(timezone.utc).isoformat()}")
|
| 2001 |
print(f"{'='*80}")
|
| 2002 |
|
| 2003 |
try:
|
| 2004 |
+
# Run full incremental update for all agents
|
| 2005 |
+
# This will re-mine everything in the 6-month window
|
| 2006 |
+
print(f"π Starting comprehensive re-mining of all agents (6-month window)...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2007 |
|
| 2008 |
+
update_all_agents_incremental()
|
|
|
|
|
|
|
|
|
|
| 2009 |
|
| 2010 |
print(f"\n{'='*80}")
|
| 2011 |
+
print(f"π Mining Summary:")
|
| 2012 |
+
print(f" All agents re-mined successfully within 6-month window")
|
| 2013 |
+
print(f" All review metadata refreshed and up-to-date")
|
| 2014 |
print(f"{'='*80}")
|
| 2015 |
|
| 2016 |
+
print(f"\nβ
Daily Regular Mining completed at {datetime.now(timezone.utc).isoformat()}")
|
| 2017 |
|
| 2018 |
except Exception as e:
|
| 2019 |
print(f"β Daily update failed: {str(e)}")
|
|
|
|
| 2054 |
scheduler.add_job(
|
| 2055 |
daily_update_task,
|
| 2056 |
trigger=CronTrigger(hour=0, minute=0), # 12:00 AM UTC daily
|
| 2057 |
+
id='daily_review_mining',
|
| 2058 |
+
name='Daily Regular Review Mining',
|
| 2059 |
replace_existing=True
|
| 2060 |
)
|
| 2061 |
scheduler.start()
|
| 2062 |
+
print("β Scheduler started: Daily Regular Mining at 12:00 AM UTC (re-mines all reviews within 6-month window)")
|
| 2063 |
|
| 2064 |
# Create Gradio interface
|
| 2065 |
with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app:
|