zhiminy commited on
Commit
94818e2
Β·
1 Parent(s): 5750bb3

refine mining mechanism

Browse files
Files changed (2) hide show
  1. app.py +24 -23
  2. msr.py +36 -4
app.py CHANGED
@@ -33,7 +33,8 @@ args = parser.parse_args()
33
 
34
  AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
35
  REVIEW_METADATA_REPO = "SWE-Arena/review_metadata" # HuggingFace dataset for review metadata
36
- LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for leaderboard
 
37
 
38
  LEADERBOARD_COLUMNS = [
39
  ("Agent Name", "string"),
@@ -1711,16 +1712,16 @@ def save_agent_to_hf(data):
1711
 
1712
  def update_all_agents_incremental():
1713
  """
1714
- Weekly scheduled task for incremental review mining and statistics update.
1715
 
1716
  Strategy:
1717
- 1. Update PR status for all existing metadata (last LEADERBOARD_TIME_FRAME_DAYS - 7)
1718
- 2. Fetch last week's new reviews
1719
  3. Save all updated/new metadata back to HuggingFace
1720
  4. Reload statistics from updated metadata
1721
  """
1722
  print(f"\n{'='*80}")
1723
- print(f"πŸ•› Weekly Incremental Update started at {datetime.now(timezone.utc).isoformat()}")
1724
  print(f"{'='*80}")
1725
 
1726
  try:
@@ -1734,11 +1735,11 @@ def update_all_agents_incremental():
1734
  print(f"\n{'='*80}")
1735
  print(f"πŸ“Š Update Summary:")
1736
  print(f" βœ“ Updated existing review statuses")
1737
- print(f" βœ“ Fetched last week's new reviews")
1738
  print(f" βœ“ Statistics reloaded")
1739
  print(f"{'='*80}")
1740
 
1741
- print(f"\nβœ… Weekly Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
1742
 
1743
  except Exception as e:
1744
  print(f"βœ— Weekly update failed: {str(e)}")
@@ -2061,9 +2062,9 @@ def fetch_and_update_weekly_reviews():
2061
 
2062
  Strategy:
2063
  1. For each agent:
2064
- - Examine ALL open reviews from last LEADERBOARD_TIME_FRAME_DAYS - 7 for their closed_at status
2065
- - Update PR status for all existing metadata using BigQuery (last LEADERBOARD_TIME_FRAME_DAYS - 7)
2066
- - Fetch new reviews from last week using BigQuery
2067
  - Save all updated/new metadata back to HuggingFace
2068
  """
2069
  # Initialize BigQuery client
@@ -2082,11 +2083,11 @@ def fetch_and_update_weekly_reviews():
2082
  # Calculate date range
2083
  today_utc = datetime.now(timezone.utc)
2084
  today_midnight = datetime.combine(today_utc.date(), datetime.min.time(), tzinfo=timezone.utc)
2085
- last_week_midnight = today_midnight - timedelta(days=7)
2086
- cutoff_date = today_midnight - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS - 7)
2087
 
2088
  print(f"πŸ“… Time Range Configuration:")
2089
- print(f" Last week 12am UTC: {last_week_midnight.isoformat()}")
2090
  print(f" Today 12am UTC: {today_midnight.isoformat()}")
2091
  print(f" Cutoff for existing reviews: {cutoff_date.isoformat()}")
2092
  print(f" Examining reviews from: {cutoff_date.date()} to {today_midnight.date()}")
@@ -2105,11 +2106,11 @@ def fetch_and_update_weekly_reviews():
2105
  print(f"{'='*60}")
2106
 
2107
  # Step 1: Load all existing metadata within timeframe
2108
- print(f"πŸ“Š Loading existing metadata from last {LEADERBOARD_TIME_FRAME_DAYS - 1} days...")
2109
  all_metadata = load_review_metadata()
2110
  agent_metadata = [r for r in all_metadata if r.get("agent_identifier") == identifier]
2111
 
2112
- # Filter to last LEADERBOARD_TIME_FRAME_DAYS - 1 days (from cutoff to today)
2113
  recent_metadata = []
2114
  for review in agent_metadata:
2115
  reviewed_at = review.get('reviewed_at', '')
@@ -2124,10 +2125,10 @@ def fetch_and_update_weekly_reviews():
2124
 
2125
  print(f" βœ“ Loaded {len(recent_metadata)} existing reviews from timeframe")
2126
 
2127
- # Step 2: Fetch NEW reviews from last week to today using BigQuery
2128
- print(f"πŸ” Fetching new reviews from {last_week_midnight.isoformat()} to {today_midnight.isoformat()} using BigQuery...")
2129
 
2130
- review_rows = fetch_reviews_from_bigquery(client, identifier, last_week_midnight, today_midnight)
2131
 
2132
  # Extract unique PRs
2133
  urls = list(set([row.url for row in review_rows if row.url]))
@@ -2146,7 +2147,7 @@ def fetch_and_update_weekly_reviews():
2146
  metadata['agent_identifier'] = identifier
2147
  weekly_metadata.append(metadata)
2148
 
2149
- print(f" βœ“ Found {len(weekly_metadata)} unique PRs in 7-day window")
2150
 
2151
  # Step 3: Combine and save all metadata
2152
  all_updated_metadata = recent_metadata + weekly_metadata
@@ -2169,17 +2170,17 @@ def fetch_and_update_weekly_reviews():
2169
  # GRADIO APPLICATION
2170
  # =============================================================================
2171
 
2172
- # Start APScheduler for weekly updates at 12:00 AM UTC every Monday
2173
  scheduler = BackgroundScheduler(timezone="UTC")
2174
  scheduler.add_job(
2175
  update_all_agents_incremental,
2176
  trigger=CronTrigger(day_of_week='mon', hour=0, minute=0), # 12:00 AM UTC every Monday
2177
- id='weekly_review_mining',
2178
- name='Weekly Regular Review Mining',
2179
  replace_existing=True
2180
  )
2181
  scheduler.start()
2182
- print("βœ“ Scheduler started: Weekly Incremental Update at 12:00 AM UTC every Monday (updates existing metadata + mines last week's reviews)")
2183
 
2184
  # Create Gradio interface
2185
  with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app:
 
33
 
34
  AGENTS_REPO = "SWE-Arena/swe_agents" # HuggingFace dataset for agent metadata
35
  REVIEW_METADATA_REPO = "SWE-Arena/review_metadata" # HuggingFace dataset for review metadata
36
+ LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for constructing leaderboard
37
+ UPDATE_TIME_FRAME_DAYS = 30 # Time frame for mining new reviews
38
 
39
  LEADERBOARD_COLUMNS = [
40
  ("Agent Name", "string"),
 
1712
 
1713
  def update_all_agents_incremental():
1714
  """
1715
+ Scheduled task for incremental review mining and statistics update.
1716
 
1717
  Strategy:
1718
+ 1. Update PR status for all existing metadata (last LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS)
1719
+ 2. Fetch new reviews from the last UPDATE_TIME_FRAME_DAYS days
1720
  3. Save all updated/new metadata back to HuggingFace
1721
  4. Reload statistics from updated metadata
1722
  """
1723
  print(f"\n{'='*80}")
1724
+ print(f"πŸ•› Incremental Update started at {datetime.now(timezone.utc).isoformat()}")
1725
  print(f"{'='*80}")
1726
 
1727
  try:
 
1735
  print(f"\n{'='*80}")
1736
  print(f"πŸ“Š Update Summary:")
1737
  print(f" βœ“ Updated existing review statuses")
1738
+ print(f" βœ“ Fetched new reviews from last {UPDATE_TIME_FRAME_DAYS} days")
1739
  print(f" βœ“ Statistics reloaded")
1740
  print(f"{'='*80}")
1741
 
1742
+ print(f"\nβœ… Incremental Update completed at {datetime.now(timezone.utc).isoformat()}")
1743
 
1744
  except Exception as e:
1745
  print(f"βœ— Weekly update failed: {str(e)}")
 
2062
 
2063
  Strategy:
2064
  1. For each agent:
2065
+ - Examine ALL open reviews from last LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS for their closed_at status
2066
+ - Update PR status for all existing metadata using BigQuery (last LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS)
2067
+ - Fetch new reviews from last UPDATE_TIME_FRAME_DAYS days using BigQuery
2068
  - Save all updated/new metadata back to HuggingFace
2069
  """
2070
  # Initialize BigQuery client
 
2083
  # Calculate date range
2084
  today_utc = datetime.now(timezone.utc)
2085
  today_midnight = datetime.combine(today_utc.date(), datetime.min.time(), tzinfo=timezone.utc)
2086
+ update_start_midnight = today_midnight - timedelta(days=UPDATE_TIME_FRAME_DAYS)
2087
+ cutoff_date = today_midnight - timedelta(days=LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS)
2088
 
2089
  print(f"πŸ“… Time Range Configuration:")
2090
+ print(f" Update period start (12am UTC): {update_start_midnight.isoformat()}")
2091
  print(f" Today 12am UTC: {today_midnight.isoformat()}")
2092
  print(f" Cutoff for existing reviews: {cutoff_date.isoformat()}")
2093
  print(f" Examining reviews from: {cutoff_date.date()} to {today_midnight.date()}")
 
2106
  print(f"{'='*60}")
2107
 
2108
  # Step 1: Load all existing metadata within timeframe
2109
+ print(f"πŸ“Š Loading existing metadata from last {LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS} days...")
2110
  all_metadata = load_review_metadata()
2111
  agent_metadata = [r for r in all_metadata if r.get("agent_identifier") == identifier]
2112
 
2113
+ # Filter to last (LEADERBOARD_TIME_FRAME_DAYS - UPDATE_TIME_FRAME_DAYS) days (from cutoff to today)
2114
  recent_metadata = []
2115
  for review in agent_metadata:
2116
  reviewed_at = review.get('reviewed_at', '')
 
2125
 
2126
  print(f" βœ“ Loaded {len(recent_metadata)} existing reviews from timeframe")
2127
 
2128
+ # Step 2: Fetch NEW reviews from last UPDATE_TIME_FRAME_DAYS to today using BigQuery
2129
+ print(f"πŸ” Fetching new reviews from {update_start_midnight.isoformat()} to {today_midnight.isoformat()} using BigQuery...")
2130
 
2131
+ review_rows = fetch_reviews_from_bigquery(client, identifier, update_start_midnight, today_midnight)
2132
 
2133
  # Extract unique PRs
2134
  urls = list(set([row.url for row in review_rows if row.url]))
 
2147
  metadata['agent_identifier'] = identifier
2148
  weekly_metadata.append(metadata)
2149
 
2150
+ print(f" βœ“ Found {len(weekly_metadata)} unique PRs in {UPDATE_TIME_FRAME_DAYS}-day window")
2151
 
2152
  # Step 3: Combine and save all metadata
2153
  all_updated_metadata = recent_metadata + weekly_metadata
 
2170
  # GRADIO APPLICATION
2171
  # =============================================================================
2172
 
2173
+ # Start APScheduler for incremental updates at 12:00 AM UTC every Monday
2174
  scheduler = BackgroundScheduler(timezone="UTC")
2175
  scheduler.add_job(
2176
  update_all_agents_incremental,
2177
  trigger=CronTrigger(day_of_week='mon', hour=0, minute=0), # 12:00 AM UTC every Monday
2178
+ id='incremental_review_mining',
2179
+ name='Incremental Review Mining',
2180
  replace_existing=True
2181
  )
2182
  scheduler.start()
2183
+ print(f"βœ“ Scheduler started: Incremental Update at 12:00 AM UTC every Monday (updates existing metadata + mines last {UPDATE_TIME_FRAME_DAYS} days of reviews)")
2184
 
2185
  # Create Gradio interface
2186
  with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as app:
msr.py CHANGED
@@ -21,7 +21,7 @@ load_dotenv()
21
 
22
  AGENTS_REPO = "SWE-Arena/swe_agents"
23
  REVIEW_METADATA_REPO = "SWE-Arena/review_metadata"
24
- LEADERBOARD_TIME_FRAME_DAYS = 180 # 6 months
25
 
26
  # =============================================================================
27
  # UTILITY FUNCTIONS
@@ -175,9 +175,42 @@ def fetch_all_pr_metadata_single_query(client, identifiers, start_date, end_date
175
  FROM (
176
  {review_tables}
177
  )
178
- WHERE
179
  type = 'PullRequestReviewEvent'
180
  AND actor.login IN ({identifier_list})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  ),
182
 
183
  pr_status AS (
@@ -317,7 +350,7 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
317
  if not token:
318
  raise Exception("No HuggingFace token found")
319
 
320
- api = HfApi()
321
 
322
  # Group by date (year, month, day)
323
  grouped = group_metadata_by_date(metadata_list)
@@ -352,7 +385,6 @@ def save_review_metadata_to_hf(metadata_list, agent_identifier):
352
  folder_path=temp_dir,
353
  repo_id=REVIEW_METADATA_REPO,
354
  repo_type="dataset",
355
- token=token,
356
  commit_message=f"Update: {agent_identifier} ({len(grouped)} daily files, {len(metadata_list)} total reviews)"
357
  )
358
  print(f" βœ“ Batch upload complete for {agent_identifier}")
 
21
 
22
  AGENTS_REPO = "SWE-Arena/swe_agents"
23
  REVIEW_METADATA_REPO = "SWE-Arena/review_metadata"
24
+ LEADERBOARD_TIME_FRAME_DAYS = 180 # Time frame for leaderboard
25
 
26
  # =============================================================================
27
  # UTILITY FUNCTIONS
 
175
  FROM (
176
  {review_tables}
177
  )
178
+ WHERE
179
  type = 'PullRequestReviewEvent'
180
  AND actor.login IN ({identifier_list})
181
+
182
+ UNION ALL
183
+
184
+ -- Get PR comments (IssueCommentEvent on PRs)
185
+ SELECT
186
+ JSON_EXTRACT_SCALAR(payload, '$.issue.html_url') as url,
187
+ CAST(created_at AS STRING) as reviewed_at,
188
+ actor.login as reviewer,
189
+ repo.name as repo_name,
190
+ CAST(JSON_EXTRACT_SCALAR(payload, '$.issue.number') AS INT64) as pr_number
191
+ FROM (
192
+ {review_tables}
193
+ )
194
+ WHERE
195
+ type = 'IssueCommentEvent'
196
+ AND actor.login IN ({identifier_list})
197
+ AND JSON_EXTRACT_SCALAR(payload, '$.issue.pull_request.url') IS NOT NULL
198
+
199
+ UNION ALL
200
+
201
+ -- Get review comments (PullRequestReviewCommentEvent)
202
+ SELECT
203
+ JSON_EXTRACT_SCALAR(payload, '$.pull_request.html_url') as url,
204
+ CAST(created_at AS STRING) as reviewed_at,
205
+ actor.login as reviewer,
206
+ repo.name as repo_name,
207
+ CAST(JSON_EXTRACT_SCALAR(payload, '$.pull_request.number') AS INT64) as pr_number
208
+ FROM (
209
+ {review_tables}
210
+ )
211
+ WHERE
212
+ type = 'PullRequestReviewCommentEvent'
213
+ AND actor.login IN ({identifier_list})
214
  ),
215
 
216
  pr_status AS (
 
350
  if not token:
351
  raise Exception("No HuggingFace token found")
352
 
353
+ api = HfApi(token=token)
354
 
355
  # Group by date (year, month, day)
356
  grouped = group_metadata_by_date(metadata_list)
 
385
  folder_path=temp_dir,
386
  repo_id=REVIEW_METADATA_REPO,
387
  repo_type="dataset",
 
388
  commit_message=f"Update: {agent_identifier} ({len(grouped)} daily files, {len(metadata_list)} total reviews)"
389
  )
390
  print(f" βœ“ Batch upload complete for {agent_identifier}")