Spaces:
Runtime error
Runtime error
| import json | |
| import os | |
| from datetime import datetime, timezone | |
| from src.display.formatting import styled_error, styled_message, styled_warning | |
| from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO | |
| from src.submission.check_validity import ( | |
| already_submitted_models, | |
| check_model_card, | |
| get_model_size, | |
| is_model_on_hub, | |
| ) | |
| from src.evaluator.evaluate import evaluate_model, EvaluationStatus, EvaluationResult | |
| from src.display.utils import Tasks | |
| import torch | |
| from transformers import AutoModelForSequenceClassification, AutoTokenizer | |
| from datasets import load_dataset | |
| import time | |
| REQUESTED_MODELS = None | |
| USERS_TO_SUBMISSION_DATES = None | |
| def create_eval_request( | |
| model: str, | |
| base_model: str, | |
| revision: str, | |
| precision: str, | |
| weight_type: str, | |
| model_type: str, | |
| ): | |
| """Create and upload an evaluation request""" | |
| try: | |
| # Create evaluation request file | |
| request_data = { | |
| 'model': model, | |
| 'base_model': base_model, | |
| 'revision': revision, | |
| 'precision': precision, | |
| 'weight_type': weight_type, | |
| 'model_type': model_type, | |
| 'status': EvaluationStatus.PENDING.value, | |
| 'submitted_time': datetime.now(timezone.utc).isoformat() | |
| } | |
| # Create filename | |
| username = model.split('/')[0] if '/' in model else None | |
| request_filename = f"{username or 'unknown'}_{model.replace('/', '_')}_eval_request_{revision}_{precision}_{weight_type}.json" | |
| request_path = os.path.join(EVAL_REQUESTS_PATH, request_filename) | |
| # Write request file | |
| with open(request_path, 'w') as f: | |
| json.dump(request_data, f, indent=2) | |
| print(f"Created evaluation request: {request_filename}") | |
| # Upload to Hugging Face | |
| API.upload_file( | |
| path_or_fileobj=request_path, | |
| path_in_repo=request_filename if not username else os.path.join(username, request_filename), | |
| repo_id=QUEUE_REPO, | |
| repo_type="dataset", | |
| commit_message=f"Add evaluation request for {model}", | |
| token=TOKEN | |
| ) | |
| print(f"Uploaded evaluation request to {QUEUE_REPO}") | |
| return styled_message( | |
| "Evaluation request created! Please wait for the evaluation to complete." | |
| ) | |
| except Exception as e: | |
| print(f"Error creating evaluation request: {str(e)}") | |
| return styled_error(f"Failed to create evaluation request: {str(e)}") | |
| def add_new_eval( | |
| model: str, | |
| base_model: str, | |
| revision: str, | |
| precision: str, | |
| weight_type: str, | |
| model_type: str, | |
| ): | |
| """Validate model and create evaluation request""" | |
| try: | |
| print("\n=== Starting evaluation submission ===") | |
| print(f"Submission time: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC") | |
| print(f"Model: {model}") | |
| print(f"Base model: {base_model}") | |
| print(f"Revision: {revision}") | |
| print(f"Precision: {precision}") | |
| print(f"Weight type: {weight_type}") | |
| print(f"Model type: {model_type}") | |
| print(f"Evaluation requests path: {EVAL_REQUESTS_PATH}") | |
| print(f"Queue repo: {QUEUE_REPO}") | |
| # Always refresh the cache before checking for duplicates | |
| print("\n=== Checking for duplicate submissions ===") | |
| global REQUESTED_MODELS | |
| global USERS_TO_SUBMISSION_DATES | |
| start_time = time.time() | |
| REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH) | |
| print(f"Cache refresh completed in {time.time() - start_time:.2f} seconds") | |
| print(f"Found {len(REQUESTED_MODELS)} existing submissions") | |
| user_name = "" | |
| model_path = model | |
| if "/" in model: | |
| user_name = model.split("/")[0] | |
| model_path = model.split("/")[1] | |
| print(f"\nUser name: {user_name}") | |
| print(f"Model path: {model_path}") | |
| precision = precision.split(" ")[0] | |
| if revision == "": | |
| revision = "main" | |
| print("Using default revision: main") | |
| current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") | |
| # Check if model is already submitted | |
| print("\n=== Checking for existing submission ===") | |
| model_key = f"{model}_{revision}_{precision}" | |
| if model_key in REQUESTED_MODELS: | |
| print(f"Found existing submission with key: {model_key}") | |
| # Get the status from the queue file | |
| queue_file = REQUESTED_MODELS[model_key] | |
| try: | |
| with open(queue_file, 'r') as f: | |
| queue_entry = json.load(f) | |
| status = queue_entry.get('status') | |
| print(f"Found existing submission with status: {status}") | |
| if status is None: | |
| print(f"Warning: No status found in queue file {queue_file}") | |
| return styled_warning("Error checking model status. Please try again later.") | |
| if status != EvaluationStatus.FAILED.value: | |
| print(f"Model already submitted and in {status} status") | |
| return styled_warning(f"This model has been already submitted and is in {status} status.") | |
| except Exception as e: | |
| print(f"Error reading queue file: {e}") | |
| print(f"Full traceback: {traceback.format_exc()}") | |
| return styled_warning("Error checking model status. Please try again later.") | |
| except Exception as e: | |
| print(f"Error during evaluation: {str(e)}") | |
| raise | |
| print("\n=== Validating model type ===") | |
| if model_type is None or model_type == "": | |
| print("Error: Model type is missing") | |
| return styled_error("Please select a model type.") | |
| print("\n=== Validating model existence ===") | |
| if revision == "": | |
| revision = "main" | |
| print("Using default revision: main") | |
| print("\n=== Validating model on Hugging Face ===") | |
| try: | |
| if weight_type in ["Delta", "Adapter"]: | |
| print(f"Checking base model {base_model} on Hugging Face...") | |
| base_model_on_hub, error, _ = is_model_on_hub( | |
| model_name=base_model, | |
| revision=revision, | |
| token=TOKEN, | |
| test_tokenizer=True | |
| ) | |
| print(f"Base model check result: {base_model_on_hub}") | |
| if not base_model_on_hub: | |
| print(f"Error: Base model not found: {error}") | |
| return styled_error(f'Base model "{base_model}" {error}') | |
| if not weight_type == "Adapter": | |
| print(f"Checking model {model} on Hugging Face...") | |
| model_on_hub, error, _ = is_model_on_hub( | |
| model_name=model, | |
| revision=revision, | |
| token=TOKEN, | |
| test_tokenizer=True | |
| ) | |
| print(f"Model check result: {model_on_hub}") | |
| if not model_on_hub: | |
| print(f"Error: Model not found: {error}") | |
| return styled_error(f'Model "{model}" {error}') | |
| except Exception as e: | |
| print(f"Error checking model on Hugging Face: {e}") | |
| print(f"Full traceback: {traceback.format_exc()}") | |
| return styled_error(f"Failed to validate model on Hugging Face: {str(e)}") | |
| print("\n=== Getting model info ===") | |
| try: | |
| model_info = API.model_info(repo_id=model, revision=revision) | |
| print(f"Successfully retrieved model info for {model}") | |
| except Exception as e: | |
| print(f"Error getting model info: {e}") | |
| print(f"Full traceback: {traceback.format_exc()}") | |
| return styled_error("Could not get your model information. Please fill it up properly.") | |
| print("\n=== Getting model size ===") | |
| try: | |
| model_size = get_model_size(model_info=model_info, precision=precision) | |
| print(f"Model size: {model_size}") | |
| except Exception as e: | |
| print(f"Error getting model size: {e}") | |
| print(f"Full traceback: {traceback.format_exc()}") | |
| model_size = "?" | |
| print("\n=== Validating model card and license ===") | |
| try: | |
| license = model_info.cardData["license"] | |
| print(f"Model license: {license}") | |
| except Exception as e: | |
| print(f"Error getting model license: {e}") | |
| print(f"Full traceback: {traceback.format_exc()}") | |
| return styled_error("Please select a license for your model") | |
| print("\n=== Checking model card ===") | |
| try: | |
| modelcard_OK, error_msg = check_model_card(model) | |
| print(f"Model card check result: {modelcard_OK}") | |
| if not modelcard_OK: | |
| print(f"Model card error: {error_msg}") | |
| return styled_error(error_msg) | |
| except Exception as e: | |
| print(f"Error checking model card: {e}") | |
| print(f"Full traceback: {traceback.format_exc()}") | |
| return styled_error("Failed to validate model card") | |
| print("\n=== Creating evaluation entry ===") | |
| eval_entry = { | |
| "model": model, | |
| "base_model": base_model, | |
| "revision": revision, | |
| "precision": precision, | |
| "weight_type": weight_type, | |
| "status": "PENDING", | |
| "submitted_time": current_time, | |
| "model_type": model_type, | |
| "likes": model_info.likes, | |
| "params": model_size, | |
| "license": license, | |
| "private": False, | |
| } | |
| print(f"\nEvaluation entry created: {json.dumps(eval_entry, indent=2)}") | |
| print("\n=== Checking for duplicate submission ===") | |
| model_key = f"{model}_{revision}_{precision}" | |
| if model_key in REQUESTED_MODELS: | |
| print(f"Found existing submission with key: {model_key}") | |
| # Get the status from the queue file | |
| queue_file = REQUESTED_MODELS[model_key] | |
| try: | |
| with open(queue_file, 'r') as f: | |
| queue_entry = json.load(f) | |
| status = queue_entry.get('status') | |
| print(f"Found existing submission with status: {status}") | |
| if status is None: | |
| print(f"Warning: No status found in queue file {queue_file}") | |
| return styled_warning("Error checking model status. Please try again later.") | |
| if status != EvaluationStatus.FAILED.value: | |
| print(f"Model already submitted and in {status} status") | |
| return styled_warning(f"This model has been already submitted and is in {status} status.") | |
| except Exception as e: | |
| print(f"Error reading queue file: {e}") | |
| print(f"Full traceback: {traceback.format_exc()}") | |
| return styled_warning("Error checking model status. Please try again later.") | |
| print("\n=== Creating evaluation file ===") | |
| OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}" | |
| print(f"Creating output directory: {OUT_DIR}") | |
| os.makedirs(OUT_DIR, exist_ok=True) | |
| out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json" | |
| print(f"Output file path: {out_path}") | |
| # Write evaluation entry to file | |
| try: | |
| with open(out_path, "w") as f: | |
| f.write(json.dumps(eval_entry)) | |
| print("\nEvaluation file created successfully") | |
| # Upload to Hugging Face | |
| print("\n=== Uploading evaluation file ===") | |
| API.upload_file( | |
| path_or_fileobj=out_path, | |
| path_in_repo=out_path.split("eval-queue/")[1], | |
| repo_id=QUEUE_REPO, | |
| repo_type="dataset", | |
| commit_message=f"Add evaluation request for {model}", | |
| token=TOKEN | |
| ) | |
| print(f"\nEvaluation request uploaded successfully to {QUEUE_REPO}") | |
| # Clean up local file | |
| os.remove(out_path) | |
| print("\nLocal evaluation file removed") | |
| return styled_message( | |
| "Evaluation request created successfully! Please wait for the evaluation to complete." | |
| ) | |
| except Exception as e: | |
| print(f"Error during file operations: {str(e)}") | |
| print(f"Full traceback: {traceback.format_exc()}") | |
| return styled_error(f"Failed to create evaluation request: {str(e)}") | |
| dataloader = DataLoader(tsac_dataset, batch_size=32, shuffle=False) | |
| model_obj.eval() | |
| with torch.no_grad(): | |
| predictions = [] | |
| targets = [] | |
| for batch in dataloader: | |
| inputs = {k: v.to(device) for k, v in batch.items() if k != 'target'} | |
| target = batch['target'].to(device) | |
| # Log the first batch details | |
| if len(predictions) == 0: # Only log for the first batch | |
| print(f"\nFirst batch example:") | |
| print(f"Input keys: {list(inputs.keys())}") | |
| print(f"Target shape: {target.shape}") | |
| outputs = model_obj(**inputs) | |
| print(f"\nModel output type: {type(outputs)}") | |
| # Try to get logits from different possible formats | |
| if isinstance(outputs, dict): | |
| print(f"Output keys: {list(outputs.keys())}") | |
| # Try different common keys | |
| if 'logits' in outputs: | |
| logits = outputs['logits'] | |
| elif 'prediction_logits' in outputs: | |
| logits = outputs['prediction_logits'] | |
| else: | |
| raise ValueError(f"Unknown output format. Available keys: {list(outputs.keys())}") | |
| elif isinstance(outputs, tuple): | |
| print(f"Output tuple length: {len(outputs)}") | |
| # Try different positions in the tuple | |
| if len(outputs) > 0: | |
| logits = outputs[0] | |
| else: | |
| raise ValueError("Empty output tuple") | |
| else: | |
| # If it's a single tensor, assume it's the logits | |
| logits = outputs | |
| print(f"Logits shape: {logits.shape}") | |
| # For sequence classification, we typically use the [CLS] token's prediction | |
| # Get the first token's prediction (CLS token) | |
| cls_logits = logits[:, 0, :] # Shape: [batch_size, num_classes] | |
| predictions.extend(cls_logits.argmax(dim=-1).cpu().tolist()) | |
| targets.extend(target.cpu().tolist()) | |
| accuracy = sum(p == t for p, t in zip(predictions, targets)) / len(predictions) | |
| eval_entry['results'] = {'accuracy': accuracy} | |
| # Update the queue file with results | |
| with open(out_path, "w") as f: | |
| f.write(json.dumps(eval_entry)) | |
| # Evaluate on ArabML | |
| print("Evaluating on ArabML Tunisian Corpus...") | |
| arabml_dataset = load_dataset("arbml/Tunisian_Dialect_Corpus", split="train", trust_remote_code=True) | |
| def preprocess_arabml(examples): | |
| return tokenizer(examples['Tweet'], padding=True, truncation=True, max_length=512) | |
| arabml_dataset = arabml_dataset.map(preprocess_arabml, batched=True) | |
| total_tokens = 0 | |
| covered_tokens = 0 | |
| for example in arabml_dataset: | |
| tokens = tokenizer.tokenize(example['Tweet']) | |
| total_tokens += len(tokens) | |
| covered_tokens += len([t for t in tokens if t != tokenizer.unk_token]) | |
| arabml_coverage = covered_tokens / total_tokens if total_tokens > 0 else 0 | |
| # Store results | |