Spaces:
Running
Running
refine
Browse files
app.py
CHANGED
|
@@ -208,7 +208,7 @@ def get_bigquery_client():
|
|
| 208 |
raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")
|
| 209 |
|
| 210 |
|
| 211 |
-
def fetch_all_pr_metadata_batched(client, identifiers, start_date, end_date, batch_size=
|
| 212 |
"""
|
| 213 |
Fetch PR review metadata for ALL agents using BATCHED BigQuery queries.
|
| 214 |
Splits agents into smaller batches to avoid performance issues with large queries.
|
|
@@ -218,7 +218,7 @@ def fetch_all_pr_metadata_batched(client, identifiers, start_date, end_date, bat
|
|
| 218 |
identifiers: List of GitHub usernames/bot identifiers
|
| 219 |
start_date: Start datetime (timezone-aware)
|
| 220 |
end_date: End datetime (timezone-aware)
|
| 221 |
-
batch_size: Number of agents to process per batch (default:
|
| 222 |
|
| 223 |
Returns:
|
| 224 |
Dictionary mapping agent identifier to list of PR metadata
|
|
@@ -2148,7 +2148,7 @@ def construct_leaderboard_from_metadata():
|
|
| 2148 |
# UI FUNCTIONS
|
| 2149 |
# =============================================================================
|
| 2150 |
|
| 2151 |
-
def create_monthly_metrics_plot(top_n=
|
| 2152 |
"""
|
| 2153 |
Create a Plotly figure with dual y-axes showing:
|
| 2154 |
- Left y-axis: Acceptance Rate (%) as line curves
|
|
@@ -2157,7 +2157,7 @@ def create_monthly_metrics_plot(top_n=None):
|
|
| 2157 |
Each agent gets a unique color for both their line and bars.
|
| 2158 |
|
| 2159 |
Args:
|
| 2160 |
-
top_n:
|
| 2161 |
"""
|
| 2162 |
# Try loading from saved dataset first
|
| 2163 |
saved_data = load_leaderboard_data_from_hf()
|
|
@@ -2652,7 +2652,7 @@ with gr.Blocks(title="SWE Agent Review Leaderboard", theme=gr.themes.Soft()) as
|
|
| 2652 |
|
| 2653 |
# Load monthly metrics when app starts
|
| 2654 |
app.load(
|
| 2655 |
-
fn=lambda: create_monthly_metrics_plot(
|
| 2656 |
inputs=[],
|
| 2657 |
outputs=[monthly_metrics_plot]
|
| 2658 |
)
|
|
|
|
| 208 |
raise ValueError("GOOGLE_APPLICATION_CREDENTIALS_JSON not found in environment")
|
| 209 |
|
| 210 |
|
| 211 |
+
def fetch_all_pr_metadata_batched(client, identifiers, start_date, end_date, batch_size=50):
|
| 212 |
"""
|
| 213 |
Fetch PR review metadata for ALL agents using BATCHED BigQuery queries.
|
| 214 |
Splits agents into smaller batches to avoid performance issues with large queries.
|
|
|
|
| 218 |
identifiers: List of GitHub usernames/bot identifiers
|
| 219 |
start_date: Start datetime (timezone-aware)
|
| 220 |
end_date: End datetime (timezone-aware)
|
| 221 |
+
batch_size: Number of agents to process per batch (default: 50)
|
| 222 |
|
| 223 |
Returns:
|
| 224 |
Dictionary mapping agent identifier to list of PR metadata
|
|
|
|
| 2148 |
# UI FUNCTIONS
|
| 2149 |
# =============================================================================
|
| 2150 |
|
| 2151 |
+
def create_monthly_metrics_plot(top_n=5):
|
| 2152 |
"""
|
| 2153 |
Create a Plotly figure with dual y-axes showing:
|
| 2154 |
- Left y-axis: Acceptance Rate (%) as line curves
|
|
|
|
| 2157 |
Each agent gets a unique color for both their line and bars.
|
| 2158 |
|
| 2159 |
Args:
|
| 2160 |
+
top_n: Number of top agents to show (default: 5)
|
| 2161 |
"""
|
| 2162 |
# Try loading from saved dataset first
|
| 2163 |
saved_data = load_leaderboard_data_from_hf()
|
|
|
|
| 2652 |
|
| 2653 |
# Load monthly metrics when app starts
|
| 2654 |
app.load(
|
| 2655 |
+
fn=lambda: create_monthly_metrics_plot(),
|
| 2656 |
inputs=[],
|
| 2657 |
outputs=[monthly_metrics_plot]
|
| 2658 |
)
|
msr.py
CHANGED
|
@@ -222,7 +222,7 @@ def generate_table_union_statements(start_date, end_date):
|
|
| 222 |
# BIGQUERY FUNCTIONS
|
| 223 |
# =============================================================================
|
| 224 |
|
| 225 |
-
def fetch_all_pr_metadata_batched(client, identifiers, start_date, end_date, batch_size=
|
| 226 |
"""
|
| 227 |
Fetch PR review metadata for ALL agents using BATCHED BigQuery queries.
|
| 228 |
Splits agents into smaller batches to avoid performance issues with large queries.
|
|
@@ -1005,7 +1005,7 @@ def mine_all_agents():
|
|
| 1005 |
try:
|
| 1006 |
# Use batched approach for better performance
|
| 1007 |
all_metadata = fetch_all_pr_metadata_batched(
|
| 1008 |
-
client, identifiers, start_date, end_date, batch_size=
|
| 1009 |
)
|
| 1010 |
except Exception as e:
|
| 1011 |
print(f"✗ Error during BigQuery fetch: {str(e)}")
|
|
@@ -1055,7 +1055,7 @@ def mine_all_agents():
|
|
| 1055 |
|
| 1056 |
# Calculate number of batches
|
| 1057 |
total_identifiers = len(identifiers)
|
| 1058 |
-
batch_size =
|
| 1059 |
num_batches = (total_identifiers + batch_size - 1) // batch_size # Ceiling division
|
| 1060 |
|
| 1061 |
print(f"\n{'='*80}")
|
|
|
|
| 222 |
# BIGQUERY FUNCTIONS
|
| 223 |
# =============================================================================
|
| 224 |
|
| 225 |
+
def fetch_all_pr_metadata_batched(client, identifiers, start_date, end_date, batch_size=50):
|
| 226 |
"""
|
| 227 |
Fetch PR review metadata for ALL agents using BATCHED BigQuery queries.
|
| 228 |
Splits agents into smaller batches to avoid performance issues with large queries.
|
|
|
|
| 1005 |
try:
|
| 1006 |
# Use batched approach for better performance
|
| 1007 |
all_metadata = fetch_all_pr_metadata_batched(
|
| 1008 |
+
client, identifiers, start_date, end_date, batch_size=50
|
| 1009 |
)
|
| 1010 |
except Exception as e:
|
| 1011 |
print(f"✗ Error during BigQuery fetch: {str(e)}")
|
|
|
|
| 1055 |
|
| 1056 |
# Calculate number of batches
|
| 1057 |
total_identifiers = len(identifiers)
|
| 1058 |
+
batch_size = 50
|
| 1059 |
num_batches = (total_identifiers + batch_size - 1) // batch_size # Ceiling division
|
| 1060 |
|
| 1061 |
print(f"\n{'='*80}")
|