Spaces:
Sleeping
Sleeping
app prototype
Browse files
app.py
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
import gradio as gr
|
| 3 |
+
|
| 4 |
+
# Load the CSV file
|
| 5 |
+
model_test_results = pd.read_csv('test_results_by_type.csv')
|
| 6 |
+
# Get models with failed tests and their failure counts
|
| 7 |
+
failed_models_counts = model_test_results[
|
| 8 |
+
(model_test_results['test_type'] == 'failed') &
|
| 9 |
+
(model_test_results['number_of_tests'] > 0)
|
| 10 |
+
].groupby('model')['number_of_tests'].first().to_dict()
|
| 11 |
+
|
| 12 |
+
# Add ❌ and failure count to model names that have failures, ✅ for passing models
|
| 13 |
+
model_test_results['model'] = model_test_results.apply(
|
| 14 |
+
lambda row: f"{row['model']} ❌ ({failed_models_counts[row['model']]})" if row['model'] in failed_models_counts else f"{row['model']} ✅",
|
| 15 |
+
axis=1
|
| 16 |
+
)
|
| 17 |
+
|
| 18 |
+
# Separate failed tests and other tests
|
| 19 |
+
failed_tests = model_test_results[model_test_results['test_type'] == 'failed'].sort_values('number_of_tests', ascending=False)
|
| 20 |
+
other_tests = model_test_results[model_test_results['test_type'] != 'failed']
|
| 21 |
+
|
| 22 |
+
# Concatenate the dataframes
|
| 23 |
+
model_test_results = pd.concat([failed_tests, other_tests])
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
# Create the Gradio interface
|
| 27 |
+
with gr.Blocks() as test_results_viz:
|
| 28 |
+
gr.Markdown("# Test Results by Model")
|
| 29 |
+
|
| 30 |
+
# Sort models by success/failure and number of failed tests
|
| 31 |
+
model_order = model_test_results.sort_values(
|
| 32 |
+
by=['conclusion', 'test_type', 'number_of_tests'],
|
| 33 |
+
ascending=[True, False, False]
|
| 34 |
+
)['model'].unique().tolist()
|
| 35 |
+
|
| 36 |
+
# Create the stacked bar plot using Gradio's BarPlot
|
| 37 |
+
test_results_plot = gr.BarPlot(
|
| 38 |
+
model_test_results,
|
| 39 |
+
x="model",
|
| 40 |
+
y="number_of_tests", # Base layer
|
| 41 |
+
color="test_type", # Color by pass/fail status
|
| 42 |
+
color_map={"passed": "#008550", "skipped": "#F0B702", "failed": "#8B1710"},
|
| 43 |
+
title="Test Results by Model",
|
| 44 |
+
x_title="Model",
|
| 45 |
+
y_title="Number of Tests",
|
| 46 |
+
height=600,
|
| 47 |
+
width=1000,
|
| 48 |
+
x_label_angle=45, # Rotate x-axis labels by 45 degrees
|
| 49 |
+
x_order=model_order # Set custom order of x-axis
|
| 50 |
+
)
|
| 51 |
+
|
| 52 |
+
if __name__ == "__main__":
|
| 53 |
+
test_results_viz.launch()
|