Spaces:
Running
Running
refine mining mechanism
Browse files
app.py
CHANGED
|
@@ -33,7 +33,8 @@ args = parser.parse_args()
|
|
| 33 |
|
| 34 |
AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
|
| 35 |
REVIEW_METADATA_REPO = "SWE-Arena/review_metadata" # HuggingFace dataset for review metadata
|
| 36 |
-
LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for leaderboard
|
|
|
|
| 37 |
|
| 38 |
LEADERBOARD_COLUMNS = [
|
| 39 |
("Agent Name", "string"),
|
|
@@ -1711,16 +1712,16 @@ def save_agent_to_hf(data):
|
|
| 1711 |
|
| 1712 |
def update_all_agents_incremental():
|
| 1713 |
"""
|
| 1714 |
-
|
| 1715 |
|
| 1716 |
Strategy:
|
| 1717 |
-
1. Update PR status for all existing metadata (last LEADERBOARD_TIME_FRAME_DAYS -
|
| 1718 |
-
2. Fetch last
|
| 1719 |
3. Save all updated/new metadata back to HuggingFace
|
| 1720 |
4. Reload statistics from updated metadata
|
| 1721 |
"""
|
| 1722 |
print(f"\n{'='*80}")
|
| 1723 |
-
print(f"π
|
| 1724 |
print(f"{'='*80}")
|
| 1725 |
|
| 1726 |
try:
|
|
@@ -1734,11 +1735,11 @@ def update_all_agents_incremental():
|
|
| 1734 |
print(f"\n{'='*80}")
|
| 1735 |
print(f"π Update Summary:")
|
| 1736 |
print(f" β Updated existing review statuses")
|
| 1737 |
-
print(f" β Fetched last
|
| 1738 |
print(f" β Statistics reloaded")
|
| 1739 |
print(f"{'='*80}")
|
| 1740 |
|
| 1741 |
-
print(f"\nβ
|
| 1742 |
|
| 1743 |
except Exception as e:
|
| 1744 |
print(f"β Weekly update failed: {str(e)}")
|
|
@@ -2061,9 +2062,9 @@ def fetch_and_update_weekly_reviews():
|
|
| 2061 |
|
| 2062 |
Strategy:
|
| 2063 |
1. For each agent:
|
| 2064 |
-
- Examine ALL open reviews from last LEADERBOARD_TIME_FRAME_DAYS -
|
| 2065 |
-
- Update PR status for all existing metadata using BigQuery (last LEADERBOARD_TIME_FRAME_DAYS -
|
| 2066 |
-
- Fetch new reviews from last
|
| 2067 |
- Save all updated/new metadata back to HuggingFace
|
| 2068 |
"""
|
| 2069 |
# Initialize BigQuery client
|
|
@@ -2082,11 +2083,11 @@ def fetch_and_update_weekly_reviews():
|
|
| 2082 |
# Calculate date range
|
| 2083 |
today_utc = datetime.now(timezone.utc)
|
| 2084 |
today_midnight = datetime.combine(today_utc.date(), datetime.min.time(), tzinfo=timezone.utc)
|
| 2085 |
-
|
| 2086 |
-
cutoff_date = today_midnight - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS -
|
| 2087 |
|
| 2088 |
print(f"π
Time Range Configuration:")
|
| 2089 |
-
print(f"
|
| 2090 |
print(f" Today 12am UTC: {today_midnight.isoformat()}")
|
| 2091 |
print(f" Cutoff for existing reviews: {cutoff_date.isoformat()}")
|
| 2092 |
print(f" Examining reviews from: {cutoff_date.date()} to {today_midnight.date()}")
|
|
@@ -2105,11 +2106,11 @@ def fetch_and_update_weekly_reviews():
|
|
| 2105 |
print(f"{'='*60}")
|
| 2106 |
|
| 2107 |
# Step 1: Load all existing metadata within timeframe
|
| 2108 |
-
print(f"π Loading existing metadata from last {LEADERBOARD_TIME_FRAME_DAYS -
|
| 2109 |
all_metadata = load_review_metadata()
|
| 2110 |
agent_metadata = [r for r in all_metadata if r.get("agent_identifier") == identifier]
|
| 2111 |
|
| 2112 |
-
# Filter to last LEADERBOARD_TIME_FRAME_DAYS -
|
| 2113 |
recent_metadata = []
|
| 2114 |
for review in agent_metadata:
|
| 2115 |
reviewed_at = review.get('reviewed_at', '')
|
|
@@ -2124,10 +2125,10 @@ def fetch_and_update_weekly_reviews():
|
|
| 2124 |
|
| 2125 |
print(f" β Loaded {len(recent_metadata)} existing reviews from timeframe")
|
| 2126 |
|
| 2127 |
-
# Step 2: Fetch NEW reviews from last
|
| 2128 |
-
print(f"π Fetching new reviews from {
|
| 2129 |
|
| 2130 |
-
review_rows = fetch_reviews_from_bigquery(client, identifier,
|
| 2131 |
|
| 2132 |
# Extract unique PRs
|
| 2133 |
urls = list(set([row.url for row in review_rows if row.url]))
|
|
@@ -2146,7 +2147,7 @@ def fetch_and_update_weekly_reviews():
|
|
| 2146 |
metadata['agent_identifier'] = identifier
|
| 2147 |
weekly_metadata.append(metadata)
|
| 2148 |
|
| 2149 |
-
print(f" β Found {len(weekly_metadata)} unique PRs in
|
| 2150 |
|
| 2151 |
# Step 3: Combine and save all metadata
|
| 2152 |
all_updated_metadata = recent_metadata + weekly_metadata
|
|
@@ -2169,17 +2170,17 @@ def fetch_and_update_weekly_reviews():
|
|
| 2169 |
# GRADIO APPLICATION
|
| 2170 |
# =============================================================================
|
| 2171 |
|
| 2172 |
-
# Start APScheduler for
|
| 2173 |
scheduler = BackgroundScheduler(timezone="UTC")
|
| 2174 |
scheduler.add_job(
|
| 2175 |
update_all_agents_incremental,
|
| 2176 |
trigger=CronTrigger(day_of_week='mon', hour=0, minute=0), # 12:00 AM UTC every Monday
|
| 2177 |
-
id='
|
| 2178 |
-
name='
|
| 2179 |
replace_existing=True
|
| 2180 |
)
|
| 2181 |
scheduler.start()
|
| 2182 |
-
print("β Scheduler started:
|
| 2183 |
|
| 2184 |
# Create Gradio interface
|
| 2185 |
with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app:
|
|
|
|
| 33 |
|
| 34 |
AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
|
| 35 |
REVIEW_METADATA_REPO = "SWE-Arena/review_metadata" # HuggingFace dataset for review metadata
|
| 36 |
+
LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for constructing leaderboard
|
| 37 |
+
UPDATE_TIME_FRAME_DAYS = 30 # Time frame for mining new reviews
|
| 38 |
|
| 39 |
LEADERBOARD_COLUMNS = [
|
| 40 |
("Agent Name", "string"),
|
|
|
|
| 1712 |
|
| 1713 |
def update_all_agents_incremental():
|
| 1714 |
"""
|
| 1715 |
+
Scheduled task for incremental review mining and statistics update.
|
| 1716 |
|
| 1717 |
Strategy:
|
| 1718 |
+
1. Update PR status for all existing metadata (last LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS)
|
| 1719 |
+
2. Fetch new reviews from the last UPDATE_TIME_FRAME_DAYS days
|
| 1720 |
3. Save all updated/new metadata back to HuggingFace
|
| 1721 |
4. Reload statistics from updated metadata
|
| 1722 |
"""
|
| 1723 |
print(f"\n{'='*80}")
|
| 1724 |
+
print(f"π Incremental Update started at {datetime.now(timezone.utc).isoformat()}")
|
| 1725 |
print(f"{'='*80}")
|
| 1726 |
|
| 1727 |
try:
|
|
|
|
| 1735 |
print(f"\n{'='*80}")
|
| 1736 |
print(f"π Update Summary:")
|
| 1737 |
print(f" β Updated existing review statuses")
|
| 1738 |
+
print(f" β Fetched new reviews from last {UPDATE_TIME_FRAME_DAYS} days")
|
| 1739 |
print(f" β Statistics reloaded")
|
| 1740 |
print(f"{'='*80}")
|
| 1741 |
|
| 1742 |
+
print(f"\nβ
Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
|
| 1743 |
|
| 1744 |
except Exception as e:
|
| 1745 |
print(f"β Weekly update failed: {str(e)}")
|
|
|
|
| 2062 |
|
| 2063 |
Strategy:
|
| 2064 |
1. For each agent:
|
| 2065 |
+
- Examine ALL open reviews from last LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS for their closed_at status
|
| 2066 |
+
- Update PR status for all existing metadata using BigQuery (last LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS)
|
| 2067 |
+
- Fetch new reviews from last UPDATE_TIME_FRAME_DAYS days using BigQuery
|
| 2068 |
- Save all updated/new metadata back to HuggingFace
|
| 2069 |
"""
|
| 2070 |
# Initialize BigQuery client
|
|
|
|
| 2083 |
# Calculate date range
|
| 2084 |
today_utc = datetime.now(timezone.utc)
|
| 2085 |
today_midnight = datetime.combine(today_utc.date(), datetime.min.time(), tzinfo=timezone.utc)
|
| 2086 |
+
update_start_midnight = today_midnight - timedelta(days=UPDATE_TIME_FRAME_DAYS)
|
| 2087 |
+
cutoff_date = today_midnight - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS)
|
| 2088 |
|
| 2089 |
print(f"π
Time Range Configuration:")
|
| 2090 |
+
print(f" Update period start (12am UTC): {update_start_midnight.isoformat()}")
|
| 2091 |
print(f" Today 12am UTC: {today_midnight.isoformat()}")
|
| 2092 |
print(f" Cutoff for existing reviews: {cutoff_date.isoformat()}")
|
| 2093 |
print(f" Examining reviews from: {cutoff_date.date()} to {today_midnight.date()}")
|
|
|
|
| 2106 |
print(f"{'='*60}")
|
| 2107 |
|
| 2108 |
# Step 1: Load all existing metadata within timeframe
|
| 2109 |
+
print(f"π Loading existing metadata from last {LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS} days...")
|
| 2110 |
all_metadata = load_review_metadata()
|
| 2111 |
agent_metadata = [r for r in all_metadata if r.get("agent_identifier") == identifier]
|
| 2112 |
|
| 2113 |
+
# Filter to last (LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS) days (from cutoff to today)
|
| 2114 |
recent_metadata = []
|
| 2115 |
for review in agent_metadata:
|
| 2116 |
reviewed_at = review.get('reviewed_at', '')
|
|
|
|
| 2125 |
|
| 2126 |
print(f" β Loaded {len(recent_metadata)} existing reviews from timeframe")
|
| 2127 |
|
| 2128 |
+
# Step 2: Fetch NEW reviews from last UPDATE_TIME_FRAME_DAYS to today using BigQuery
|
| 2129 |
+
print(f"π Fetching new reviews from {update_start_midnight.isoformat()} to {today_midnight.isoformat()} using BigQuery...")
|
| 2130 |
|
| 2131 |
+
review_rows = fetch_reviews_from_bigquery(client, identifier, update_start_midnight, today_midnight)
|
| 2132 |
|
| 2133 |
# Extract unique PRs
|
| 2134 |
urls = list(set([row.url for row in review_rows if row.url]))
|
|
|
|
| 2147 |
metadata['agent_identifier'] = identifier
|
| 2148 |
weekly_metadata.append(metadata)
|
| 2149 |
|
| 2150 |
+
print(f" β Found {len(weekly_metadata)} unique PRs in {UPDATE_TIME_FRAME_DAYS}-day window")
|
| 2151 |
|
| 2152 |
# Step 3: Combine and save all metadata
|
| 2153 |
all_updated_metadata = recent_metadata + weekly_metadata
|
|
|
|
| 2170 |
# GRADIO APPLICATION
|
| 2171 |
# =============================================================================
|
| 2172 |
|
| 2173 |
+
# Start APScheduler for incremental updates at 12:00 AM UTC every Monday
|
| 2174 |
scheduler = BackgroundScheduler(timezone="UTC")
|
| 2175 |
scheduler.add_job(
|
| 2176 |
update_all_agents_incremental,
|
| 2177 |
trigger=CronTrigger(day_of_week='mon', hour=0, minute=0), # 12:00 AM UTC every Monday
|
| 2178 |
+
id='incremental_review_mining',
|
| 2179 |
+
name='Incremental Review Mining',
|
| 2180 |
replace_existing=True
|
| 2181 |
)
|
| 2182 |
scheduler.start()
|
| 2183 |
+
print(f"β Scheduler started: Incremental Update at 12:00 AM UTC every Monday (updates existing metadata + mines last {UPDATE_TIME_FRAME_DAYS} days of reviews)")
|
| 2184 |
|
| 2185 |
# Create Gradio interface
|
| 2186 |
with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app:
|
msr.py
CHANGED
|
@@ -21,7 +21,7 @@ load_dotenv()
|
|
| 21 |
|
| 22 |
AGENTS_REPO = "SWE-Arena/swe_agents"
|
| 23 |
REVIEW_METADATA_REPO = "SWE-Arena/review_metadata"
|
| 24 |
-
LEADERBOARD_TIME_FRAME_DAYS = 180 #
|
| 25 |
|
| 26 |
# =============================================================================
|
| 27 |
# UTILITY FUNCTIONS
|
|
@@ -175,9 +175,42 @@ def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date
|
|
| 175 |
FROM (
|
| 176 |
{review_tables}
|
| 177 |
)
|
| 178 |
-
WHERE
|
| 179 |
type = 'PullRequestReviewEvent'
|
| 180 |
AND actor.login IN ({identifier_list})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
),
|
| 182 |
|
| 183 |
pr_status AS (
|
|
@@ -317,7 +350,7 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
|
|
| 317 |
if not token:
|
| 318 |
raise Exception("No HuggingFace token found")
|
| 319 |
|
| 320 |
-
api = HfApi()
|
| 321 |
|
| 322 |
# Group by date (year, month, day)
|
| 323 |
grouped = group_metadata_by_date(metadata_list)
|
|
@@ -352,7 +385,6 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
|
|
| 352 |
folder_path=temp_dir,
|
| 353 |
repo_id=REVIEW_METADATA_REPO,
|
| 354 |
repo_type="dataset",
|
| 355 |
-
token=token,
|
| 356 |
commit_message=f"Update: {agent_identifier} ({len(grouped)} daily files, {len(metadata_list)} total reviews)"
|
| 357 |
)
|
| 358 |
print(f" β Batch upload complete for {agent_identifier}")
|
|
|
|
| 21 |
|
| 22 |
AGENTS_REPO = "SWE-Arena/swe_agents"
|
| 23 |
REVIEW_METADATA_REPO = "SWE-Arena/review_metadata"
|
| 24 |
+
LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for leaderboard
|
| 25 |
|
| 26 |
# =============================================================================
|
| 27 |
# UTILITY FUNCTIONS
|
|
|
|
| 175 |
FROM (
|
| 176 |
{review_tables}
|
| 177 |
)
|
| 178 |
+
WHERE
|
| 179 |
type = 'PullRequestReviewEvent'
|
| 180 |
AND actor.login IN ({identifier_list})
|
| 181 |
+
|
| 182 |
+
UNION ALL
|
| 183 |
+
|
| 184 |
+
-- Get PR comments (IssueCommentEvent on PRs)
|
| 185 |
+
SELECT
|
| 186 |
+
JSON_EXTRACT_SCALAR(payload, '$.issue.html_url') as url,
|
| 187 |
+
CAST(created_at AS STRING) as reviewed_at,
|
| 188 |
+
actor.login as reviewer,
|
| 189 |
+
repo.name as repo_name,
|
| 190 |
+
CAST(JSON_EXTRACT_SCALAR(payload, '$.issue.number') AS INT64) as pr_number
|
| 191 |
+
FROM (
|
| 192 |
+
{review_tables}
|
| 193 |
+
)
|
| 194 |
+
WHERE
|
| 195 |
+
type = 'IssueCommentEvent'
|
| 196 |
+
AND actor.login IN ({identifier_list})
|
| 197 |
+
AND JSON_EXTRACT_SCALAR(payload, '$.issue.pull_request.url') IS NOT NULL
|
| 198 |
+
|
| 199 |
+
UNION ALL
|
| 200 |
+
|
| 201 |
+
-- Get review comments (PullRequestReviewCommentEvent)
|
| 202 |
+
SELECT
|
| 203 |
+
JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as url,
|
| 204 |
+
CAST(created_at AS STRING) as reviewed_at,
|
| 205 |
+
actor.login as reviewer,
|
| 206 |
+
repo.name as repo_name,
|
| 207 |
+
CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') AS INT64) as pr_number
|
| 208 |
+
FROM (
|
| 209 |
+
{review_tables}
|
| 210 |
+
)
|
| 211 |
+
WHERE
|
| 212 |
+
type = 'PullRequestReviewCommentEvent'
|
| 213 |
+
AND actor.login IN ({identifier_list})
|
| 214 |
),
|
| 215 |
|
| 216 |
pr_status AS (
|
|
|
|
| 350 |
if not token:
|
| 351 |
raise Exception("No HuggingFace token found")
|
| 352 |
|
| 353 |
+
api = HfApi(token=token)
|
| 354 |
|
| 355 |
# Group by date (year, month, day)
|
| 356 |
grouped = group_metadata_by_date(metadata_list)
|
|
|
|
| 385 |
folder_path=temp_dir,
|
| 386 |
repo_id=REVIEW_METADATA_REPO,
|
| 387 |
repo_type="dataset",
|
|
|
|
| 388 |
commit_message=f"Update: {agent_identifier} ({len(grouped)} daily files, {len(metadata_list)} total reviews)"
|
| 389 |
)
|
| 390 |
print(f" β Batch upload complete for {agent_identifier}")
|