zhiminy commited on
Commit
22ee755
·
1 Parent(s): f1f4f5c
Files changed (2) hide show
  1. app.py +5 -5
  2. msr.py +3 -3
app.py CHANGED
@@ -208,7 +208,7 @@ def get_bigquery_client():
208
  raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")
209
 
210
 
211
- def fetch_all_pr_metadata_batched(client, identifiers, start_date, end_date, batch_size=100):
212
  """
213
  Fetch PR review metadata for ALL agents using BATCHED BigQuery queries.
214
  Splits agents into smaller batches to avoid performance issues with large queries.
@@ -218,7 +218,7 @@ def fetch_all_pr_metadata_batched(client, identifiers, start_date, end_date, bat
218
  identifiers: List of GitHub usernames/bot identifiers
219
  start_date: Start datetime (timezone-aware)
220
  end_date: End datetime (timezone-aware)
221
- batch_size: Number of agents to process per batch (default: 100)
222
 
223
  Returns:
224
  Dictionary mapping agent identifier to list of PR metadata
@@ -2148,7 +2148,7 @@ def construct_leaderboard_from_metadata():
2148
  # UI FUNCTIONS
2149
  # =============================================================================
2150
 
2151
- def create_monthly_metrics_plot(top_n=None):
2152
  """
2153
  Create a Plotly figure with dual y-axes showing:
2154
  - Left y-axis: Acceptance Rate (%) as line curves
@@ -2157,7 +2157,7 @@ def create_monthly_metrics_plot(top_n=None):
2157
  Each agent gets a unique color for both their line and bars.
2158
 
2159
  Args:
2160
- top_n: If specified, only show metrics for the top N agents by total reviews.
2161
  """
2162
  # Try loading from saved dataset first
2163
  saved_data = load_leaderboard_data_from_hf()
@@ -2652,7 +2652,7 @@ with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as
2652
 
2653
  # Load monthly metrics when app starts
2654
  app.load(
2655
- fn=lambda: create_monthly_metrics_plot(top_n=5),
2656
  inputs=[],
2657
  outputs=[monthly_metrics_plot]
2658
  )
 
208
  raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")
209
 
210
 
211
+ def fetch_all_pr_metadata_batched(client, identifiers, start_date, end_date, batch_size=50):
212
  """
213
  Fetch PR review metadata for ALL agents using BATCHED BigQuery queries.
214
  Splits agents into smaller batches to avoid performance issues with large queries.
 
218
  identifiers: List of GitHub usernames/bot identifiers
219
  start_date: Start datetime (timezone-aware)
220
  end_date: End datetime (timezone-aware)
221
+ batch_size: Number of agents to process per batch (default: 50)
222
 
223
  Returns:
224
  Dictionary mapping agent identifier to list of PR metadata
 
2148
  # UI FUNCTIONS
2149
  # =============================================================================
2150
 
2151
+ def create_monthly_metrics_plot(top_n=5):
2152
  """
2153
  Create a Plotly figure with dual y-axes showing:
2154
  - Left y-axis: Acceptance Rate (%) as line curves
 
2157
  Each agent gets a unique color for both their line and bars.
2158
 
2159
  Args:
2160
+ top_n: Number of top agents to show (default: 5)
2161
  """
2162
  # Try loading from saved dataset first
2163
  saved_data = load_leaderboard_data_from_hf()
 
2652
 
2653
  # Load monthly metrics when app starts
2654
  app.load(
2655
+ fn=lambda: create_monthly_metrics_plot(),
2656
  inputs=[],
2657
  outputs=[monthly_metrics_plot]
2658
  )
msr.py CHANGED
@@ -222,7 +222,7 @@ def generate_table_union_statements(start_date, end_date):
222
  # BIGQUERY FUNCTIONS
223
  # =============================================================================
224
 
225
- def fetch_all_pr_metadata_batched(client, identifiers, start_date, end_date, batch_size=100):
226
  """
227
  Fetch PR review metadata for ALL agents using BATCHED BigQuery queries.
228
  Splits agents into smaller batches to avoid performance issues with large queries.
@@ -1005,7 +1005,7 @@ def mine_all_agents():
1005
  try:
1006
  # Use batched approach for better performance
1007
  all_metadata = fetch_all_pr_metadata_batched(
1008
- client, identifiers, start_date, end_date, batch_size=100
1009
  )
1010
  except Exception as e:
1011
  print(f"✗ Error during BigQuery fetch: {str(e)}")
@@ -1055,7 +1055,7 @@ def mine_all_agents():
1055
 
1056
  # Calculate number of batches
1057
  total_identifiers = len(identifiers)
1058
- batch_size = 100
1059
  num_batches = (total_identifiers + batch_size - 1) // batch_size # Ceiling division
1060
 
1061
  print(f"\n{'='*80}")
 
222
  # BIGQUERY FUNCTIONS
223
  # =============================================================================
224
 
225
+ def fetch_all_pr_metadata_batched(client, identifiers, start_date, end_date, batch_size=50):
226
  """
227
  Fetch PR review metadata for ALL agents using BATCHED BigQuery queries.
228
  Splits agents into smaller batches to avoid performance issues with large queries.
 
1005
  try:
1006
  # Use batched approach for better performance
1007
  all_metadata = fetch_all_pr_metadata_batched(
1008
+ client, identifiers, start_date, end_date, batch_size=50
1009
  )
1010
  except Exception as e:
1011
  print(f"✗ Error during BigQuery fetch: {str(e)}")
 
1055
 
1056
  # Calculate number of batches
1057
  total_identifiers = len(identifiers)
1058
+ batch_size = 50
1059
  num_batches = (total_identifiers + batch_size - 1) // batch_size # Ceiling division
1060
 
1061
  print(f"\n{'='*80}")