zhiminy commited on
Commit
3bf98ae
Β·
1 Parent(s): 83f71b6

refine msr

Browse files
Files changed (2) hide show
  1. app.py +59 -23
  2. msr.py +55 -25
app.py CHANGED
@@ -198,12 +198,48 @@ def request_with_backoff(method, url, *, headers=None, params=None, json_body=No
198
  print(f"Exceeded max retries for {url}")
199
  return None
200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
201
  def get_github_token():
202
- """Get GitHub token from environment variables."""
203
- token = os.getenv('GITHUB_TOKEN')
204
- if not token:
205
- print("Warning: GITHUB_TOKEN not found. API rate limits: 60/hour (authenticated: 5000/hour)")
206
- return token
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
 
208
 
209
  def validate_github_username(identifier):
@@ -225,7 +261,7 @@ def validate_github_username(identifier):
225
  return False, f"Validation error: {str(e)}"
226
 
227
 
228
- def fetch_reviews_with_time_partition(base_query, start_date, end_date, headers, prs_by_url, debug_limit=None, depth=0):
229
  """
230
  Fetch reviews within a specific time range using time-based partitioning.
231
  Recursively splits the time range if hitting the 1000-result limit.
@@ -282,10 +318,10 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, headers,
282
  'sort': 'created',
283
  'order': 'asc'
284
  }
285
- headers_with_accept = headers.copy() if headers else {}
286
 
287
  try:
288
- response = request_with_backoff('GET', url, headers=headers_with_accept, params=params)
289
  if response is None:
290
  print(f"{indent} Error: retries exhausted for range {start_str} to {end_str}")
291
  return total_in_partition
@@ -334,7 +370,7 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, headers,
334
  split_start = split_start + timedelta(seconds=1)
335
 
336
  count = fetch_reviews_with_time_partition(
337
- base_query, split_start, split_end, headers, prs_by_url, debug_limit, depth + 1
338
  )
339
  total_from_splits += count
340
 
@@ -355,7 +391,7 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, headers,
355
  split_start = split_start + timedelta(minutes=1)
356
 
357
  count = fetch_reviews_with_time_partition(
358
- base_query, split_start, split_end, headers, prs_by_url, debug_limit, depth + 1
359
  )
360
  total_from_splits += count
361
 
@@ -376,7 +412,7 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, headers,
376
  split_start = split_start + timedelta(hours=1)
377
 
378
  count = fetch_reviews_with_time_partition(
379
- base_query, split_start, split_end, headers, prs_by_url, debug_limit, depth + 1
380
  )
381
  total_from_splits += count
382
 
@@ -407,7 +443,7 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, headers,
407
  split_start = split_start + timedelta(days=1)
408
 
409
  count = fetch_reviews_with_time_partition(
410
- base_query, split_start, split_end, headers, prs_by_url, debug_limit, depth + 1
411
  )
412
  total_from_splits += count
413
 
@@ -418,10 +454,10 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, headers,
418
 
419
  # Recursively fetch both halves
420
  count1 = fetch_reviews_with_time_partition(
421
- base_query, start_date, mid_date, headers, prs_by_url, debug_limit, depth + 1
422
  )
423
  count2 = fetch_reviews_with_time_partition(
424
- base_query, mid_date + timedelta(days=1), end_date, headers, prs_by_url, debug_limit, depth + 1
425
  )
426
 
427
  return count1 + count2
@@ -491,7 +527,7 @@ def extract_review_metadata(pr):
491
  }
492
 
493
 
494
- def update_pr_status(metadata_list, headers, token):
495
  """
496
  Update PR status for reviews to get current merged/closed state.
497
 
@@ -502,8 +538,7 @@ def update_pr_status(metadata_list, headers, token):
502
 
503
  Args:
504
  metadata_list: List of review metadata dictionaries
505
- headers: HTTP headers for GitHub API
506
- token: GitHub API token
507
 
508
  Returns:
509
  Updated metadata_list with current PR status
@@ -541,6 +576,7 @@ def update_pr_status(metadata_list, headers, token):
541
  owner, repo, pull_word, pr_number = parts[0], parts[1], parts[2], parts[3]
542
  api_url = f'https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}'
543
 
 
544
  response = request_with_backoff('GET', api_url, headers=headers, max_retries=3)
545
 
546
  if response and response.status_code == 200:
@@ -1683,8 +1719,8 @@ def fetch_and_update_daily_reviews():
1683
  - Fetch new reviews from yesterday 12am to today 12am
1684
  - Save all updated/new metadata back to HuggingFace
1685
  """
1686
- token = get_github_token()
1687
- headers = {'Authorization': f'token {token}'} if token else {}
1688
 
1689
  # Load all agents
1690
  agents = load_agents_from_hf()
@@ -1741,12 +1777,12 @@ def fetch_and_update_daily_reviews():
1741
  # This ensures we capture any reviews that may have been closed/merged since last check
1742
  if recent_metadata:
1743
  print(f"πŸ” Examining {len(recent_metadata)} open reviews for status updates (checking closed_at)...")
1744
- recent_metadata = update_pr_status(recent_metadata, headers, token)
1745
  print(f" βœ“ Updated PR status for existing reviews")
1746
 
1747
  # Step 3: Fetch NEW reviews from yesterday 12am to today 12am
1748
  print(f"πŸ” Fetching new reviews from {yesterday_midnight.isoformat()} to {today_midnight.isoformat()}...")
1749
-
1750
  base_query = f'is:pr review:approved author:{identifier} -is:draft'
1751
  prs_by_url = {}
1752
 
@@ -1754,7 +1790,7 @@ def fetch_and_update_daily_reviews():
1754
  base_query,
1755
  yesterday_midnight,
1756
  today_midnight,
1757
- headers,
1758
  prs_by_url,
1759
  debug_limit=None
1760
  )
@@ -1772,7 +1808,7 @@ def fetch_and_update_daily_reviews():
1772
  # Step 4: Update PR status for new reviews
1773
  if yesterday_metadata:
1774
  print(f" Updating PR status for {len(yesterday_metadata)} new reviews...")
1775
- yesterday_metadata = update_pr_status(yesterday_metadata, headers, token)
1776
 
1777
  # Step 5: Combine and save all metadata
1778
  all_updated_metadata = recent_metadata + yesterday_metadata
 
198
  print(f"Exceeded max retries for {url}")
199
  return None
200
 
201
+ def get_github_tokens():
202
+ """Get all GitHub tokens from environment variables (all vars starting with GITHUB_TOKEN)."""
203
+ tokens = []
204
+ for key, value in os.environ.items():
205
+ if key.startswith('GITHUB_TOKEN') and value:
206
+ tokens.append(value)
207
+
208
+ if not tokens:
209
+ print("Warning: No GITHUB_TOKEN found. API rate limits: 60/hour (authenticated: 5000/hour)")
210
+ else:
211
+ print(f"βœ“ Loaded {len(tokens)} GitHub token(s) for rotation")
212
+
213
+ return tokens
214
+
215
+
216
  def get_github_token():
217
+ """Get first GitHub token from environment variables (backward compatibility)."""
218
+ tokens = get_github_tokens()
219
+ return tokens[0] if tokens else None
220
+
221
+
222
+ class TokenPool:
223
+ """
224
+ Manages a pool of GitHub tokens for load balancing across rate limits.
225
+ Rotates through tokens in round-robin fashion to distribute API calls.
226
+ """
227
+ def __init__(self, tokens):
228
+ self.tokens = tokens if tokens else [None]
229
+ self.current_index = 0
230
+
231
+ def get_next_token(self):
232
+ """Get the next token in round-robin order."""
233
+ if not self.tokens:
234
+ return None
235
+ token = self.tokens[self.current_index]
236
+ self.current_index = (self.current_index + 1) % len(self.tokens)
237
+ return token
238
+
239
+ def get_headers(self):
240
+ """Get headers with the next token in rotation."""
241
+ token = self.get_next_token()
242
+ return {'Authorization': f'token {token}'} if token else {}
243
 
244
 
245
  def validate_github_username(identifier):
 
261
  return False, f"Validation error: {str(e)}"
262
 
263
 
264
+ def fetch_reviews_with_time_partition(base_query, start_date, end_date, token_pool, prs_by_url, debug_limit=None, depth=0):
265
  """
266
  Fetch reviews within a specific time range using time-based partitioning.
267
  Recursively splits the time range if hitting the 1000-result limit.
 
318
  'sort': 'created',
319
  'order': 'asc'
320
  }
321
+ headers = token_pool.get_headers()
322
 
323
  try:
324
+ response = request_with_backoff('GET', url, headers=headers, params=params)
325
  if response is None:
326
  print(f"{indent} Error: retries exhausted for range {start_str} to {end_str}")
327
  return total_in_partition
 
370
  split_start = split_start + timedelta(seconds=1)
371
 
372
  count = fetch_reviews_with_time_partition(
373
+ base_query, split_start, split_end, token_pool, prs_by_url, debug_limit, depth + 1
374
  )
375
  total_from_splits += count
376
 
 
391
  split_start = split_start + timedelta(minutes=1)
392
 
393
  count = fetch_reviews_with_time_partition(
394
+ base_query, split_start, split_end, token_pool, prs_by_url, debug_limit, depth + 1
395
  )
396
  total_from_splits += count
397
 
 
412
  split_start = split_start + timedelta(hours=1)
413
 
414
  count = fetch_reviews_with_time_partition(
415
+ base_query, split_start, split_end, token_pool, prs_by_url, debug_limit, depth + 1
416
  )
417
  total_from_splits += count
418
 
 
443
  split_start = split_start + timedelta(days=1)
444
 
445
  count = fetch_reviews_with_time_partition(
446
+ base_query, split_start, split_end, token_pool, prs_by_url, debug_limit, depth + 1
447
  )
448
  total_from_splits += count
449
 
 
454
 
455
  # Recursively fetch both halves
456
  count1 = fetch_reviews_with_time_partition(
457
+ base_query, start_date, mid_date, token_pool, prs_by_url, debug_limit, depth + 1
458
  )
459
  count2 = fetch_reviews_with_time_partition(
460
+ base_query, mid_date + timedelta(days=1), end_date, token_pool, prs_by_url, debug_limit, depth + 1
461
  )
462
 
463
  return count1 + count2
 
527
  }
528
 
529
 
530
+ def update_pr_status(metadata_list, token_pool):
531
  """
532
  Update PR status for reviews to get current merged/closed state.
533
 
 
538
 
539
  Args:
540
  metadata_list: List of review metadata dictionaries
541
+ token_pool: TokenPool instance for rotating tokens
 
542
 
543
  Returns:
544
  Updated metadata_list with current PR status
 
576
  owner, repo, pull_word, pr_number = parts[0], parts[1], parts[2], parts[3]
577
  api_url = f'https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}'
578
 
579
+ headers = token_pool.get_headers()
580
  response = request_with_backoff('GET', api_url, headers=headers, max_retries=3)
581
 
582
  if response and response.status_code == 200:
 
1719
  - Fetch new reviews from yesterday 12am to today 12am
1720
  - Save all updated/new metadata back to HuggingFace
1721
  """
1722
+ tokens = get_github_tokens()
1723
+ token_pool = TokenPool(tokens)
1724
 
1725
  # Load all agents
1726
  agents = load_agents_from_hf()
 
1777
  # This ensures we capture any reviews that may have been closed/merged since last check
1778
  if recent_metadata:
1779
  print(f"πŸ” Examining {len(recent_metadata)} open reviews for status updates (checking closed_at)...")
1780
+ recent_metadata = update_pr_status(recent_metadata, token_pool)
1781
  print(f" βœ“ Updated PR status for existing reviews")
1782
 
1783
  # Step 3: Fetch NEW reviews from yesterday 12am to today 12am
1784
  print(f"πŸ” Fetching new reviews from {yesterday_midnight.isoformat()} to {today_midnight.isoformat()}...")
1785
+
1786
  base_query = f'is:pr review:approved author:{identifier} -is:draft'
1787
  prs_by_url = {}
1788
 
 
1790
  base_query,
1791
  yesterday_midnight,
1792
  today_midnight,
1793
+ token_pool,
1794
  prs_by_url,
1795
  debug_limit=None
1796
  )
 
1808
  # Step 4: Update PR status for new reviews
1809
  if yesterday_metadata:
1810
  print(f" Updating PR status for {len(yesterday_metadata)} new reviews...")
1811
+ yesterday_metadata = update_pr_status(yesterday_metadata, token_pool)
1812
 
1813
  # Step 5: Combine and save all metadata
1814
  all_updated_metadata = recent_metadata + yesterday_metadata
msr.py CHANGED
@@ -52,12 +52,42 @@ def save_jsonl(filename, data):
52
  f.write(json.dumps(item) + '\n')
53
 
54
 
55
- def get_github_token():
56
- """Get GitHub token from environment variables."""
57
- token = os.getenv('GITHUB_TOKEN')
58
- if not token:
59
- print("Warning: GITHUB_TOKEN not found. API rate limits: 60/hour (authenticated: 5000/hour)")
60
- return token
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
 
63
  def get_hf_token():
@@ -144,7 +174,7 @@ def request_with_backoff(method, url, *, headers=None, params=None, json_body=No
144
  return None
145
 
146
 
147
- def fetch_reviews_with_time_partition(base_query, start_date, end_date, headers, prs_by_url, depth=0):
148
  """
149
  Fetch reviews within a specific time range using time-based partitioning.
150
  Recursively splits the time range if hitting the 1000-result limit.
@@ -193,10 +223,10 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, headers,
193
  'sort': 'created',
194
  'order': 'asc'
195
  }
196
- headers_with_accept = headers.copy() if headers else {}
197
 
198
  try:
199
- response = request_with_backoff('GET', url, headers=headers_with_accept, params=params)
200
  if response is None:
201
  print(f"{indent} Error: retries exhausted for range {start_str} to {end_str}")
202
  return total_in_partition
@@ -241,7 +271,7 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, headers,
241
  split_start = split_start + timedelta(seconds=1)
242
 
243
  count = fetch_reviews_with_time_partition(
244
- base_query, split_start, split_end, headers, prs_by_url, depth + 1
245
  )
246
  total_from_splits += count
247
 
@@ -260,7 +290,7 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, headers,
260
  split_start = split_start + timedelta(minutes=1)
261
 
262
  count = fetch_reviews_with_time_partition(
263
- base_query, split_start, split_end, headers, prs_by_url, depth + 1
264
  )
265
  total_from_splits += count
266
 
@@ -279,7 +309,7 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, headers,
279
  split_start = split_start + timedelta(hours=1)
280
 
281
  count = fetch_reviews_with_time_partition(
282
- base_query, split_start, split_end, headers, prs_by_url, depth + 1
283
  )
284
  total_from_splits += count
285
 
@@ -308,7 +338,7 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, headers,
308
  split_start = split_start + timedelta(days=1)
309
 
310
  count = fetch_reviews_with_time_partition(
311
- base_query, split_start, split_end, headers, prs_by_url, depth + 1
312
  )
313
  total_from_splits += count
314
 
@@ -318,10 +348,10 @@ def fetch_reviews_with_time_partition(base_query, start_date, end_date, headers,
318
  mid_date = start_date + time_diff / 2
319
 
320
  count1 = fetch_reviews_with_time_partition(
321
- base_query, start_date, mid_date, headers, prs_by_url, depth + 1
322
  )
323
  count2 = fetch_reviews_with_time_partition(
324
- base_query, mid_date + timedelta(days=1), end_date, headers, prs_by_url, depth + 1
325
  )
326
 
327
  return count1 + count2
@@ -382,7 +412,7 @@ def extract_review_metadata(pr):
382
  }
383
 
384
 
385
- def update_pr_status(metadata_list, headers, token):
386
  """
387
  Update PR status for reviews to get current merged/closed state.
388
 
@@ -391,8 +421,7 @@ def update_pr_status(metadata_list, headers, token):
391
 
392
  Args:
393
  metadata_list: List of review metadata dictionaries
394
- headers: HTTP headers for GitHub API
395
- token: GitHub API token
396
 
397
  Returns:
398
  Updated metadata_list with current PR status
@@ -425,6 +454,7 @@ def update_pr_status(metadata_list, headers, token):
425
  owner, repo, pull_word, pr_number = parts[0], parts[1], parts[2], parts[3]
426
  api_url = f'https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}'
427
 
 
428
  response = request_with_backoff('GET', api_url, headers=headers, max_retries=3)
429
 
430
  if response and response.status_code == 200:
@@ -468,7 +498,7 @@ def update_pr_status(metadata_list, headers, token):
468
  return metadata_list
469
 
470
 
471
- def fetch_all_reviews_metadata(identifier, agent_name, token=None):
472
  """
473
  Fetch PR reviews associated with a GitHub user or bot for the past LEADERBOARD_TIME_FRAME_DAYS.
474
  Returns lightweight metadata instead of full review objects.
@@ -482,12 +512,11 @@ def fetch_all_reviews_metadata(identifier, agent_name, token=None):
482
  Args:
483
  identifier: GitHub username or bot identifier
484
  agent_name: Human-readable name of the agent for metadata purposes
485
- token: GitHub API token for authentication
486
 
487
  Returns:
488
  List of dictionaries containing minimal PR review metadata with PR status
489
  """
490
- headers = {'Authorization': f'token {token}'} if token else {}
491
 
492
  # Define query pattern for PR reviews
493
  query_patterns = [f'is:pr reviewed-by:{identifier}']
@@ -512,7 +541,7 @@ def fetch_all_reviews_metadata(identifier, agent_name, token=None):
512
  query_pattern,
513
  start_date,
514
  end_date,
515
- headers,
516
  prs_by_url
517
  )
518
 
@@ -534,7 +563,7 @@ def fetch_all_reviews_metadata(identifier, agent_name, token=None):
534
 
535
  # Update PR status to get current merged/closed state
536
  print(f"πŸ” Updating PR status for reviewed PRs...")
537
- metadata_list = update_pr_status(metadata_list, headers, token)
538
 
539
  # Calculate memory savings
540
  import sys
@@ -725,7 +754,8 @@ def mine_all_agents():
725
  """
726
  Mine review metadata for all agents within LEADERBOARD_TIME_FRAME_DAYS and save to HuggingFace.
727
  """
728
- token = get_github_token()
 
729
 
730
  # Load agent metadata from HuggingFace
731
  agents = load_agents_from_hf()
@@ -753,7 +783,7 @@ def mine_all_agents():
753
  print(f"{'='*80}")
754
 
755
  # Fetch review metadata
756
- metadata = fetch_all_reviews_metadata(identifier, agent_name, token)
757
 
758
  if metadata:
759
  print(f"πŸ’Ύ Saving {len(metadata)} review records...")
 
52
  f.write(json.dumps(item) + '\n')
53
 
54
 
55
+ def get_github_tokens():
56
+ """Get all GitHub tokens from environment variables (all vars starting with GITHUB_TOKEN)."""
57
+ tokens = []
58
+ for key, value in os.environ.items():
59
+ if key.startswith('GITHUB_TOKEN') and value:
60
+ tokens.append(value)
61
+
62
+ if not tokens:
63
+ print("Warning: No GITHUB_TOKEN found. API rate limits: 60/hour (authenticated: 5000/hour)")
64
+ else:
65
+ print(f"βœ“ Loaded {len(tokens)} GitHub token(s) for rotation")
66
+
67
+ return tokens
68
+
69
+
70
+ class TokenPool:
71
+ """
72
+ Manages a pool of GitHub tokens for load balancing across rate limits.
73
+ Rotates through tokens in round-robin fashion to distribute API calls.
74
+ """
75
+ def __init__(self, tokens):
76
+ self.tokens = tokens if tokens else [None]
77
+ self.current_index = 0
78
+
79
+ def get_next_token(self):
80
+ """Get the next token in round-robin order."""
81
+ if not self.tokens:
82
+ return None
83
+ token = self.tokens[self.current_index]
84
+ self.current_index = (self.current_index + 1) % len(self.tokens)
85
+ return token
86
+
87
+ def get_headers(self):
88
+ """Get headers with the next token in rotation."""
89
+ token = self.get_next_token()
90
+ return {'Authorization': f'token {token}'} if token else {}
91
 
92
 
93
  def get_hf_token():
 
174
  return None
175
 
176
 
177
+ def fetch_reviews_with_time_partition(base_query, start_date, end_date, token_pool, prs_by_url, depth=0):
178
  """
179
  Fetch reviews within a specific time range using time-based partitioning.
180
  Recursively splits the time range if hitting the 1000-result limit.
 
223
  'sort': 'created',
224
  'order': 'asc'
225
  }
226
+ headers = token_pool.get_headers()
227
 
228
  try:
229
+ response = request_with_backoff('GET', url, headers=headers, params=params)
230
  if response is None:
231
  print(f"{indent} Error: retries exhausted for range {start_str} to {end_str}")
232
  return total_in_partition
 
271
  split_start = split_start + timedelta(seconds=1)
272
 
273
  count = fetch_reviews_with_time_partition(
274
+ base_query, split_start, split_end, token_pool, prs_by_url, depth + 1
275
  )
276
  total_from_splits += count
277
 
 
290
  split_start = split_start + timedelta(minutes=1)
291
 
292
  count = fetch_reviews_with_time_partition(
293
+ base_query, split_start, split_end, token_pool, prs_by_url, depth + 1
294
  )
295
  total_from_splits += count
296
 
 
309
  split_start = split_start + timedelta(hours=1)
310
 
311
  count = fetch_reviews_with_time_partition(
312
+ base_query, split_start, split_end, token_pool, prs_by_url, depth + 1
313
  )
314
  total_from_splits += count
315
 
 
338
  split_start = split_start + timedelta(days=1)
339
 
340
  count = fetch_reviews_with_time_partition(
341
+ base_query, split_start, split_end, token_pool, prs_by_url, depth + 1
342
  )
343
  total_from_splits += count
344
 
 
348
  mid_date = start_date + time_diff / 2
349
 
350
  count1 = fetch_reviews_with_time_partition(
351
+ base_query, start_date, mid_date, token_pool, prs_by_url, depth + 1
352
  )
353
  count2 = fetch_reviews_with_time_partition(
354
+ base_query, mid_date + timedelta(days=1), end_date, token_pool, prs_by_url, depth + 1
355
  )
356
 
357
  return count1 + count2
 
412
  }
413
 
414
 
415
+ def update_pr_status(metadata_list, token_pool):
416
  """
417
  Update PR status for reviews to get current merged/closed state.
418
 
 
421
 
422
  Args:
423
  metadata_list: List of review metadata dictionaries
424
+ token_pool: TokenPool instance for rotating tokens
 
425
 
426
  Returns:
427
  Updated metadata_list with current PR status
 
454
  owner, repo, pull_word, pr_number = parts[0], parts[1], parts[2], parts[3]
455
  api_url = f'https://api.github.com/repos/{owner}/{repo}/pulls/{pr_number}'
456
 
457
+ headers = token_pool.get_headers()
458
  response = request_with_backoff('GET', api_url, headers=headers, max_retries=3)
459
 
460
  if response and response.status_code == 200:
 
498
  return metadata_list
499
 
500
 
501
+ def fetch_all_reviews_metadata(identifier, agent_name, token_pool):
502
  """
503
  Fetch PR reviews associated with a GitHub user or bot for the past LEADERBOARD_TIME_FRAME_DAYS.
504
  Returns lightweight metadata instead of full review objects.
 
512
  Args:
513
  identifier: GitHub username or bot identifier
514
  agent_name: Human-readable name of the agent for metadata purposes
515
+ token_pool: TokenPool instance for rotating tokens
516
 
517
  Returns:
518
  List of dictionaries containing minimal PR review metadata with PR status
519
  """
 
520
 
521
  # Define query pattern for PR reviews
522
  query_patterns = [f'is:pr reviewed-by:{identifier}']
 
541
  query_pattern,
542
  start_date,
543
  end_date,
544
+ token_pool,
545
  prs_by_url
546
  )
547
 
 
563
 
564
  # Update PR status to get current merged/closed state
565
  print(f"πŸ” Updating PR status for reviewed PRs...")
566
+ metadata_list = update_pr_status(metadata_list, token_pool)
567
 
568
  # Calculate memory savings
569
  import sys
 
754
  """
755
  Mine review metadata for all agents within LEADERBOARD_TIME_FRAME_DAYS and save to HuggingFace.
756
  """
757
+ tokens = get_github_tokens()
758
+ token_pool = TokenPool(tokens)
759
 
760
  # Load agent metadata from HuggingFace
761
  agents = load_agents_from_hf()
 
783
  print(f"{'='*80}")
784
 
785
  # Fetch review metadata
786
+ metadata = fetch_all_reviews_metadata(identifier, agent_name, token_pool)
787
 
788
  if metadata:
789
  print(f"πŸ’Ύ Saving {len(metadata)} review records...")