Spaces:

open-llm-leaderboard
/

open_llm_leaderboard

Running on CPU Upgrade

App Files Files Community

1147

check_chat_template

#984

by alozowski HF Staff - opened Oct 15, 2024

base: refs/heads/main

←

from: refs/pr/984

Discussion Files changed

+32

-3

Files changed (2) hide show

src/submission/check_validity.py +22 -1
src/submission/submit.py +10 -2

src/submission/check_validity.py CHANGED Viewed

@@ -6,7 +6,7 @@ from collections import defaultdict
 from datetime import datetime, timedelta, timezone
 import huggingface_hub
-from huggingface_hub import ModelCard
 from huggingface_hub.hf_api import ModelInfo, get_safetensors_metadata, parse_safetensors_file_metadata
 from transformers import AutoConfig, AutoTokenizer
@@ -179,7 +179,28 @@ def already_submitted_models(requested_models_dir: str) -> set[str]:
     return set(file_names), users_to_submission_dates
 def get_model_tags(model_card, model: str):
     is_merge_from_metadata = False
     is_moe_from_metadata = False

 from datetime import datetime, timedelta, timezone
 import huggingface_hub
+from huggingface_hub import ModelCard, hf_hub_download
 from huggingface_hub.hf_api import ModelInfo, get_safetensors_metadata, parse_safetensors_file_metadata
 from transformers import AutoConfig, AutoTokenizer
     return set(file_names), users_to_submission_dates
+def check_chat_template(model: str, revision: str) -> tuple[bool, str]:
+    try:
+        # Attempt to download only the tokenizer_config.json file
+        config_file = hf_hub_download(
+            repo_id=model,
+            filename="tokenizer_config.json",
+            revision=revision,
+            repo_type="model"
+        )
+        # Read and parse the tokenizer_config.json file
+        with open(config_file, 'r') as f:
+            tokenizer_config = json.load(f)
+        # Check if chat_template exists in the tokenizer configuration
+        if 'chat_template' not in tokenizer_config:
+            return False, f"The model {model} doesn't have a chat_template in its tokenizer_config.json. Please add a chat_template before submitting or submit without it."
+        return True, ""
+    except Exception as e:
+        return False, f"Error checking chat_template for model {model}: {str(e)}"
 def get_model_tags(model_card, model: str):
     is_merge_from_metadata = False
     is_moe_from_metadata = False

src/submission/submit.py CHANGED Viewed

@@ -24,6 +24,7 @@ from src.submission.check_validity import (
     get_model_size,
     is_model_on_hub,
     user_submission_permission,
 )
 from src.voting.vote_system import VoteManager
@@ -114,6 +115,7 @@ def add_new_eval(
     except Exception as e:
         return styled_error("Could not get your model information. Please fill it up properly.")
     model_key = f"{model}_{model_info.sha}_{precision}"
     if model_key in requested_models:
         return styled_error(f"The model '{model}' with revision '{model_info.sha}' and precision '{precision}' has already been submitted.")
@@ -123,12 +125,12 @@ def add_new_eval(
     if model_size is None:
         return styled_error(error_text)
-    # First check: Absolute size limit for float16 and bfloat16
     if precision in ["float16", "bfloat16"] and model_size > 100:
         return styled_error(f"Sadly, models larger than 100B parameters cannot be submitted in {precision} precision at this time. "
                             f"Your model size: {model_size:.2f}B parameters.")
-    # Second check: Precision-adjusted size limit for 8bit, 4bit, and GPTQ
     if precision in ["8bit", "4bit", "GPTQ"]:
         size_checker = ModelSizeChecker(model=model, precision=precision, model_size_in_b=model_size)
@@ -163,6 +165,12 @@ def add_new_eval(
     modelcard_OK, error_msg, model_card = check_model_card(model)
     if not modelcard_OK:
         return styled_error(error_msg)
     # Seems good, creating the eval
     print("Adding new eval")

     get_model_size,
     is_model_on_hub,
     user_submission_permission,
+    check_chat_template,
 )
 from src.voting.vote_system import VoteManager
     except Exception as e:
         return styled_error("Could not get your model information. Please fill it up properly.")
+    # Has it been submitted already?
     model_key = f"{model}_{model_info.sha}_{precision}"
     if model_key in requested_models:
         return styled_error(f"The model '{model}' with revision '{model_info.sha}' and precision '{precision}' has already been submitted.")
     if model_size is None:
         return styled_error(error_text)
+    # Absolute size limit for float16 and bfloat16
     if precision in ["float16", "bfloat16"] and model_size > 100:
         return styled_error(f"Sadly, models larger than 100B parameters cannot be submitted in {precision} precision at this time. "
                             f"Your model size: {model_size:.2f}B parameters.")
+    # Precision-adjusted size limit for 8bit, 4bit, and GPTQ
     if precision in ["8bit", "4bit", "GPTQ"]:
         size_checker = ModelSizeChecker(model=model, precision=precision, model_size_in_b=model_size)
     modelcard_OK, error_msg, model_card = check_model_card(model)
     if not modelcard_OK:
         return styled_error(error_msg)
+    # Check the chat template submission
+    if use_chat_template:
+        chat_template_valid, chat_template_error = check_chat_template(model, revision)
+        if not chat_template_valid:
+            return styled_error(chat_template_error)
     # Seems good, creating the eval
     print("Adding new eval")