model_trace

Runtime error

App Files Files Community

Ahmed Ahmed commited on Jul 26

Commit

36b1a23

1 Parent(s): 4864926

try again

Browse files

Files changed (2) hide show

src/evaluation/initialize_models.py +47 -0
src/leaderboard/read_evals.py +18 -0

src/evaluation/initialize_models.py CHANGED Viewed

@@ -73,6 +73,50 @@ def create_model_result_file(model_name, precision="float16"):
         sys.stderr.flush()
         return None
 def initialize_allowed_models():
     """
     Initialize result files for all allowed models.
@@ -81,6 +125,9 @@ def initialize_allowed_models():
     sys.stderr.write(f"📋 Models to initialize: {ALLOWED_MODELS}\n")
     sys.stderr.flush()
     created_files = []
     for model_name in ALLOWED_MODELS:

         sys.stderr.flush()
         return None
+def clean_non_allowed_results():
+    """
+    Remove result files for models that are not in the allowed list.
+    """
+    sys.stderr.write(f"\n🧹 CLEANING NON-ALLOWED RESULT FILES\n")
+    sys.stderr.flush()
+    if not os.path.exists(EVAL_RESULTS_PATH):
+        sys.stderr.write("📁 Results directory doesn't exist, nothing to clean\n")
+        sys.stderr.flush()
+        return
+    removed_count = 0
+    # Walk through all files in the results directory
+    for root, dirs, files in os.walk(EVAL_RESULTS_PATH):
+        for file in files:
+            if not file.endswith('.json'):
+                continue
+            file_path = os.path.join(root, file)
+            try:
+                # Try to extract model name from the result file
+                with open(file_path, 'r') as f:
+                    data = json.load(f)
+                config = data.get("config", {})
+                model_name = config.get("model_name", "")
+                if model_name and not is_model_allowed(model_name):
+                    sys.stderr.write(f"🗑️ Removing non-allowed model result: {file_path} (model: {model_name})\n")
+                    os.remove(file_path)
+                    removed_count += 1
+                elif not model_name:
+                    sys.stderr.write(f"⚠️ Skipping file with no model_name: {file_path}\n")
+            except Exception as e:
+                sys.stderr.write(f"⚠️ Error processing file {file_path}: {e}\n")
+                continue
+    sys.stderr.write(f"✅ Removed {removed_count} non-allowed result files\n")
+    sys.stderr.flush()
 def initialize_allowed_models():
     """
     Initialize result files for all allowed models.
     sys.stderr.write(f"📋 Models to initialize: {ALLOWED_MODELS}\n")
     sys.stderr.flush()
+    # First, clean up any existing non-allowed results
+    clean_non_allowed_results()
     created_files = []
     for model_name in ALLOWED_MODELS:

src/leaderboard/read_evals.py CHANGED Viewed

@@ -207,6 +207,24 @@ def get_raw_eval_results(results_path: str) -> list[EvalResult]:
         try:
             sys.stderr.write(f"\nProcessing file: {model_result_filepath}\n")
             sys.stderr.flush()
             # Creation of result
             eval_result = EvalResult.init_from_json_file(model_result_filepath)
             sys.stderr.write(f"Created result object for: {eval_result.full_model}\n")

         try:
             sys.stderr.write(f"\nProcessing file: {model_result_filepath}\n")
             sys.stderr.flush()
+            # Quick pre-check: Try to extract model name from file before full processing
+            try:
+                with open(model_result_filepath, 'r') as f:
+                    data = json.load(f)
+                config = data.get("config", {})
+                model_name = config.get("model_name", "")
+                if model_name and not is_model_allowed(model_name):
+                    sys.stderr.write(f"⏭️ Skipping non-allowed model file: {model_result_filepath} (model: {model_name})\n")
+                    sys.stderr.flush()
+                    continue
+            except Exception as e:
+                sys.stderr.write(f"⚠️ Error pre-checking file {model_result_filepath}: {e}\n")
+                sys.stderr.flush()
+                continue
             # Creation of result
             eval_result = EvalResult.init_from_json_file(model_result_filepath)
             sys.stderr.write(f"Created result object for: {eval_result.full_model}\n")