TunisianEncodersArena

Runtime error

File size: 16,284 Bytes

import json
import os
from datetime import datetime, timezone

from src.display.formatting import styled_error, styled_message, styled_warning
from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
from src.submission.check_validity import (
    already_submitted_models,
    check_model_card,
    get_model_size,
    is_model_on_hub,
)
from src.evaluator.evaluate import evaluate_model, EvaluationStatus, EvaluationResult
from src.display.utils import Tasks
import torch
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from datasets import load_dataset
import time

REQUESTED_MODELS = None
USERS_TO_SUBMISSION_DATES = None

def create_eval_request(
    model: str,
    base_model: str,
    revision: str,
    precision: str,
    weight_type: str,
    model_type: str,
):
    """Create and upload an evaluation request"""
    try:
        # Create evaluation request file
        request_data = {
            'model': model,
            'base_model': base_model,
            'revision': revision,
            'precision': precision,
            'weight_type': weight_type,
            'model_type': model_type,
            'status': EvaluationStatus.PENDING.value,
            'submitted_time': datetime.now(timezone.utc).isoformat()
        }
        
        # Create filename
        username = model.split('/')[0] if '/' in model else None
        request_filename = f"{username or 'unknown'}_{model.replace('/', '_')}_eval_request_{revision}_{precision}_{weight_type}.json"
        request_path = os.path.join(EVAL_REQUESTS_PATH, request_filename)
        
        # Write request file
        with open(request_path, 'w') as f:
            json.dump(request_data, f, indent=2)
        
        print(f"Created evaluation request: {request_filename}")
        
        # Upload to Hugging Face
        API.upload_file(
            path_or_fileobj=request_path,
            path_in_repo=request_filename if not username else os.path.join(username, request_filename),
            repo_id=QUEUE_REPO,
            repo_type="dataset",
            commit_message=f"Add evaluation request for {model}",
            token=TOKEN
        )
        
        print(f"Uploaded evaluation request to {QUEUE_REPO}")
        
        return styled_message(
            "Evaluation request created! Please wait for the evaluation to complete."
        )
    except Exception as e:
        print(f"Error creating evaluation request: {str(e)}")
        return styled_error(f"Failed to create evaluation request: {str(e)}")

def add_new_eval(
    model: str,
    base_model: str,
    revision: str,
    precision: str,
    weight_type: str,
    model_type: str,
):
    """Validate model and create evaluation request"""
    try:
        print("\n=== Starting evaluation submission ===")
        print(f"Submission time: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
        print(f"Model: {model}")
        print(f"Base model: {base_model}")
        print(f"Revision: {revision}")
        print(f"Precision: {precision}")
        print(f"Weight type: {weight_type}")
        print(f"Model type: {model_type}")
        print(f"Evaluation requests path: {EVAL_REQUESTS_PATH}")
        print(f"Queue repo: {QUEUE_REPO}")

        # Always refresh the cache before checking for duplicates
        print("\n=== Checking for duplicate submissions ===")
        global REQUESTED_MODELS
        global USERS_TO_SUBMISSION_DATES
        start_time = time.time()
        REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
        print(f"Cache refresh completed in {time.time() - start_time:.2f} seconds")
        print(f"Found {len(REQUESTED_MODELS)} existing submissions")

        user_name = ""
        model_path = model
        if "/" in model:
            user_name = model.split("/")[0]
            model_path = model.split("/")[1]
        print(f"\nUser name: {user_name}")
        print(f"Model path: {model_path}")

        precision = precision.split(" ")[0]
        if revision == "":
            revision = "main"
            print("Using default revision: main")
        
        current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
        
        # Check if model is already submitted
        print("\n=== Checking for existing submission ===")
        model_key = f"{model}_{revision}_{precision}"
        if model_key in REQUESTED_MODELS:
            print(f"Found existing submission with key: {model_key}")
            # Get the status from the queue file
            queue_file = REQUESTED_MODELS[model_key]
            try:
                with open(queue_file, 'r') as f:
                    queue_entry = json.load(f)
                status = queue_entry.get('status')
                print(f"Found existing submission with status: {status}")
                if status is None:
                    print(f"Warning: No status found in queue file {queue_file}")
                    return styled_warning("Error checking model status. Please try again later.")
                
                if status != EvaluationStatus.FAILED.value:
                    print(f"Model already submitted and in {status} status")
                    return styled_warning(f"This model has been already submitted and is in {status} status.")
            except Exception as e:
                print(f"Error reading queue file: {e}")
                print(f"Full traceback: {traceback.format_exc()}")
                return styled_warning("Error checking model status. Please try again later.")
    except Exception as e:
        print(f"Error during evaluation: {str(e)}")
        raise

        print("\n=== Validating model type ===")
        if model_type is None or model_type == "":
            print("Error: Model type is missing")
            return styled_error("Please select a model type.")

        print("\n=== Validating model existence ===")
        if revision == "":
            revision = "main"
            print("Using default revision: main")

        print("\n=== Validating model on Hugging Face ===")
        try:
            if weight_type in ["Delta", "Adapter"]:
                print(f"Checking base model {base_model} on Hugging Face...")
                base_model_on_hub, error, _ = is_model_on_hub(
                    model_name=base_model, 
                    revision=revision, 
                    token=TOKEN, 
                    test_tokenizer=True
                )
                print(f"Base model check result: {base_model_on_hub}")
                if not base_model_on_hub:
                    print(f"Error: Base model not found: {error}")
                    return styled_error(f'Base model "{base_model}" {error}')

            if not weight_type == "Adapter":
                print(f"Checking model {model} on Hugging Face...")
                model_on_hub, error, _ = is_model_on_hub(
                    model_name=model, 
                    revision=revision, 
                    token=TOKEN, 
                    test_tokenizer=True
                )
                print(f"Model check result: {model_on_hub}")
                if not model_on_hub:
                    print(f"Error: Model not found: {error}")
                    return styled_error(f'Model "{model}" {error}')
        except Exception as e:
            print(f"Error checking model on Hugging Face: {e}")
            print(f"Full traceback: {traceback.format_exc()}")
            return styled_error(f"Failed to validate model on Hugging Face: {str(e)}")

        print("\n=== Getting model info ===")
        try:
            model_info = API.model_info(repo_id=model, revision=revision)
            print(f"Successfully retrieved model info for {model}")
        except Exception as e:
            print(f"Error getting model info: {e}")
            print(f"Full traceback: {traceback.format_exc()}")
            return styled_error("Could not get your model information. Please fill it up properly.")

        print("\n=== Getting model size ===")
        try:
            model_size = get_model_size(model_info=model_info, precision=precision)
            print(f"Model size: {model_size}")
        except Exception as e:
            print(f"Error getting model size: {e}")
            print(f"Full traceback: {traceback.format_exc()}")
            model_size = "?"

        print("\n=== Validating model card and license ===")
        try:
            license = model_info.cardData["license"]
            print(f"Model license: {license}")
        except Exception as e:
            print(f"Error getting model license: {e}")
            print(f"Full traceback: {traceback.format_exc()}")
            return styled_error("Please select a license for your model")

        print("\n=== Checking model card ===")
        try:
            modelcard_OK, error_msg = check_model_card(model)
            print(f"Model card check result: {modelcard_OK}")
            if not modelcard_OK:
                print(f"Model card error: {error_msg}")
                return styled_error(error_msg)
        except Exception as e:
            print(f"Error checking model card: {e}")
            print(f"Full traceback: {traceback.format_exc()}")
            return styled_error("Failed to validate model card")

        print("\n=== Creating evaluation entry ===")
        eval_entry = {
            "model": model,
            "base_model": base_model,
            "revision": revision,
            "precision": precision,
            "weight_type": weight_type,
            "status": "PENDING",
            "submitted_time": current_time,
            "model_type": model_type,
            "likes": model_info.likes,
            "params": model_size,
            "license": license,
            "private": False,
        }
        print(f"\nEvaluation entry created: {json.dumps(eval_entry, indent=2)}")

        print("\n=== Checking for duplicate submission ===")
        model_key = f"{model}_{revision}_{precision}"
        if model_key in REQUESTED_MODELS:
            print(f"Found existing submission with key: {model_key}")
            # Get the status from the queue file
            queue_file = REQUESTED_MODELS[model_key]
            try:
                with open(queue_file, 'r') as f:
                    queue_entry = json.load(f)
                status = queue_entry.get('status')
                print(f"Found existing submission with status: {status}")
                if status is None:
                    print(f"Warning: No status found in queue file {queue_file}")
                    return styled_warning("Error checking model status. Please try again later.")
                
                if status != EvaluationStatus.FAILED.value:
                    print(f"Model already submitted and in {status} status")
                    return styled_warning(f"This model has been already submitted and is in {status} status.")
            except Exception as e:
                print(f"Error reading queue file: {e}")
                print(f"Full traceback: {traceback.format_exc()}")
                return styled_warning("Error checking model status. Please try again later.")

        print("\n=== Creating evaluation file ===")
        OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
        print(f"Creating output directory: {OUT_DIR}")
        os.makedirs(OUT_DIR, exist_ok=True)
        
        out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
        print(f"Output file path: {out_path}")

        # Write evaluation entry to file
        try:
            with open(out_path, "w") as f:
                f.write(json.dumps(eval_entry))
            print("\nEvaluation file created successfully")

            # Upload to Hugging Face
            print("\n=== Uploading evaluation file ===")
            API.upload_file(
                path_or_fileobj=out_path,
                path_in_repo=out_path.split("eval-queue/")[1],
                repo_id=QUEUE_REPO,
                repo_type="dataset",
                commit_message=f"Add evaluation request for {model}",
                token=TOKEN
            )
            print(f"\nEvaluation request uploaded successfully to {QUEUE_REPO}")
            
            # Clean up local file
            os.remove(out_path)
            print("\nLocal evaluation file removed")
            
            return styled_message(
                "Evaluation request created successfully! Please wait for the evaluation to complete."
            )
        except Exception as e:
            print(f"Error during file operations: {str(e)}")
            print(f"Full traceback: {traceback.format_exc()}")
            return styled_error(f"Failed to create evaluation request: {str(e)}")



        dataloader = DataLoader(tsac_dataset, batch_size=32, shuffle=False)
        
        model_obj.eval()
        with torch.no_grad():
            predictions = []
            targets = []
            
            for batch in dataloader:
                inputs = {k: v.to(device) for k, v in batch.items() if k != 'target'}
                target = batch['target'].to(device)
                
                # Log the first batch details
                if len(predictions) == 0:  # Only log for the first batch
                    print(f"\nFirst batch example:")
                    print(f"Input keys: {list(inputs.keys())}")
                    print(f"Target shape: {target.shape}")
                
                outputs = model_obj(**inputs)
                print(f"\nModel output type: {type(outputs)}")
                
                # Try to get logits from different possible formats
                if isinstance(outputs, dict):
                    print(f"Output keys: {list(outputs.keys())}")
                    # Try different common keys
                    if 'logits' in outputs:
                        logits = outputs['logits']
                    elif 'prediction_logits' in outputs:
                        logits = outputs['prediction_logits']
                    else:
                        raise ValueError(f"Unknown output format. Available keys: {list(outputs.keys())}")
                elif isinstance(outputs, tuple):
                    print(f"Output tuple length: {len(outputs)}")
                    # Try different positions in the tuple
                    if len(outputs) > 0:
                        logits = outputs[0]
                    else:
                        raise ValueError("Empty output tuple")
                else:
                    # If it's a single tensor, assume it's the logits
                    logits = outputs
                
                print(f"Logits shape: {logits.shape}")
                # For sequence classification, we typically use the [CLS] token's prediction
                # Get the first token's prediction (CLS token)
                cls_logits = logits[:, 0, :]  # Shape: [batch_size, num_classes]
                predictions.extend(cls_logits.argmax(dim=-1).cpu().tolist())
                targets.extend(target.cpu().tolist())
        
        accuracy = sum(p == t for p, t in zip(predictions, targets)) / len(predictions)
        
        eval_entry['results'] = {'accuracy': accuracy}
        
        # Update the queue file with results
        with open(out_path, "w") as f:
            f.write(json.dumps(eval_entry))

        # Evaluate on ArabML
        print("Evaluating on ArabML Tunisian Corpus...")
        arabml_dataset = load_dataset("arbml/Tunisian_Dialect_Corpus", split="train", trust_remote_code=True)
        
        def preprocess_arabml(examples):
            return tokenizer(examples['Tweet'], padding=True, truncation=True, max_length=512)
        
        arabml_dataset = arabml_dataset.map(preprocess_arabml, batched=True)
        
        total_tokens = 0
        covered_tokens = 0
        
        for example in arabml_dataset:
            tokens = tokenizer.tokenize(example['Tweet'])
            total_tokens += len(tokens)
            covered_tokens += len([t for t in tokens if t != tokenizer.unk_token])
        
        arabml_coverage = covered_tokens / total_tokens if total_tokens > 0 else 0
        
        # Store results