TunisianEncodersArena

Runtime error

App Files Files Community

TunisianEncodersArena / src /submission /submit.py

hamzabouajila

implement scripts for checking , add logging and update submission and integrate evaluation

742dfc3 5 months ago

raw

history blame

16.3 kB

	import json
	import os
	from datetime import datetime, timezone

	from src.display.formatting import styled_error, styled_message, styled_warning
	from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
	from src.submission.check_validity import (
	already_submitted_models,
	check_model_card,
	get_model_size,
	is_model_on_hub,
	)
	from src.evaluator.evaluate import evaluate_model, EvaluationStatus, EvaluationResult
	from src.display.utils import Tasks
	import torch
	from transformers import AutoModelForSequenceClassification, AutoTokenizer
	from datasets import load_dataset
	import time

	REQUESTED_MODELS = None
	USERS_TO_SUBMISSION_DATES = None

	def create_eval_request(
	model: str,
	base_model: str,
	revision: str,
	precision: str,
	weight_type: str,
	model_type: str,
	):
	"""Create and upload an evaluation request"""
	try:
	# Create evaluation request file
	request_data = {
	'model': model,
	'base_model': base_model,
	'revision': revision,
	'precision': precision,
	'weight_type': weight_type,
	'model_type': model_type,
	'status': EvaluationStatus.PENDING.value,
	'submitted_time': datetime.now(timezone.utc).isoformat()
	}

	# Create filename
	username = model.split('/')[0] if '/' in model else None
	request_filename = f"{username or 'unknown'}_{model.replace('/', '_')}_eval_request_{revision}_{precision}_{weight_type}.json"
	request_path = os.path.join(EVAL_REQUESTS_PATH, request_filename)

	# Write request file
	with open(request_path, 'w') as f:
	json.dump(request_data, f, indent=2)

	print(f"Created evaluation request: {request_filename}")

	# Upload to Hugging Face
	API.upload_file(
	path_or_fileobj=request_path,
	path_in_repo=request_filename if not username else os.path.join(username, request_filename),
	repo_id=QUEUE_REPO,
	repo_type="dataset",
	commit_message=f"Add evaluation request for {model}",
	token=TOKEN
	)

	print(f"Uploaded evaluation request to {QUEUE_REPO}")

	return styled_message(
	"Evaluation request created! Please wait for the evaluation to complete."
	)
	except Exception as e:
	print(f"Error creating evaluation request: {str(e)}")
	return styled_error(f"Failed to create evaluation request: {str(e)}")

	def add_new_eval(
	model: str,
	base_model: str,
	revision: str,
	precision: str,
	weight_type: str,
	model_type: str,
	):
	"""Validate model and create evaluation request"""
	try:
	print("\n=== Starting evaluation submission ===")
	print(f"Submission time: {datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S')} UTC")
	print(f"Model: {model}")
	print(f"Base model: {base_model}")
	print(f"Revision: {revision}")
	print(f"Precision: {precision}")
	print(f"Weight type: {weight_type}")
	print(f"Model type: {model_type}")
	print(f"Evaluation requests path: {EVAL_REQUESTS_PATH}")
	print(f"Queue repo: {QUEUE_REPO}")

	# Always refresh the cache before checking for duplicates
	print("\n=== Checking for duplicate submissions ===")
	global REQUESTED_MODELS
	global USERS_TO_SUBMISSION_DATES
	start_time = time.time()
	REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
	print(f"Cache refresh completed in {time.time() - start_time:.2f} seconds")
	print(f"Found {len(REQUESTED_MODELS)} existing submissions")

	user_name = ""
	model_path = model
	if "/" in model:
	user_name = model.split("/")[0]
	model_path = model.split("/")[1]
	print(f"\nUser name: {user_name}")
	print(f"Model path: {model_path}")

	precision = precision.split(" ")[0]
	if revision == "":
	revision = "main"
	print("Using default revision: main")

	current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")

	# Check if model is already submitted
	print("\n=== Checking for existing submission ===")
	model_key = f"{model}_{revision}_{precision}"
	if model_key in REQUESTED_MODELS:
	print(f"Found existing submission with key: {model_key}")
	# Get the status from the queue file
	queue_file = REQUESTED_MODELS[model_key]
	try:
	with open(queue_file, 'r') as f:
	queue_entry = json.load(f)
	status = queue_entry.get('status')
	print(f"Found existing submission with status: {status}")
	if status is None:
	print(f"Warning: No status found in queue file {queue_file}")
	return styled_warning("Error checking model status. Please try again later.")

	if status != EvaluationStatus.FAILED.value:
	print(f"Model already submitted and in {status} status")
	return styled_warning(f"This model has been already submitted and is in {status} status.")
	except Exception as e:
	print(f"Error reading queue file: {e}")
	print(f"Full traceback: {traceback.format_exc()}")
	return styled_warning("Error checking model status. Please try again later.")
	except Exception as e:
	print(f"Error during evaluation: {str(e)}")
	raise

	print("\n=== Validating model type ===")
	if model_type is None or model_type == "":
	print("Error: Model type is missing")
	return styled_error("Please select a model type.")

	print("\n=== Validating model existence ===")
	if revision == "":
	revision = "main"
	print("Using default revision: main")

	print("\n=== Validating model on Hugging Face ===")
	try:
	if weight_type in ["Delta", "Adapter"]:
	print(f"Checking base model {base_model} on Hugging Face...")
	base_model_on_hub, error, _ = is_model_on_hub(
	model_name=base_model,
	revision=revision,
	token=TOKEN,
	test_tokenizer=True
	)
	print(f"Base model check result: {base_model_on_hub}")
	if not base_model_on_hub:
	print(f"Error: Base model not found: {error}")
	return styled_error(f'Base model "{base_model}" {error}')

	if not weight_type == "Adapter":
	print(f"Checking model {model} on Hugging Face...")
	model_on_hub, error, _ = is_model_on_hub(
	model_name=model,
	revision=revision,
	token=TOKEN,
	test_tokenizer=True
	)
	print(f"Model check result: {model_on_hub}")
	if not model_on_hub:
	print(f"Error: Model not found: {error}")
	return styled_error(f'Model "{model}" {error}')
	except Exception as e:
	print(f"Error checking model on Hugging Face: {e}")
	print(f"Full traceback: {traceback.format_exc()}")
	return styled_error(f"Failed to validate model on Hugging Face: {str(e)}")

	print("\n=== Getting model info ===")
	try:
	model_info = API.model_info(repo_id=model, revision=revision)
	print(f"Successfully retrieved model info for {model}")
	except Exception as e:
	print(f"Error getting model info: {e}")
	print(f"Full traceback: {traceback.format_exc()}")
	return styled_error("Could not get your model information. Please fill it up properly.")

	print("\n=== Getting model size ===")
	try:
	model_size = get_model_size(model_info=model_info, precision=precision)
	print(f"Model size: {model_size}")
	except Exception as e:
	print(f"Error getting model size: {e}")
	print(f"Full traceback: {traceback.format_exc()}")
	model_size = "?"

	print("\n=== Validating model card and license ===")
	try:
	license = model_info.cardData["license"]
	print(f"Model license: {license}")
	except Exception as e:
	print(f"Error getting model license: {e}")
	print(f"Full traceback: {traceback.format_exc()}")
	return styled_error("Please select a license for your model")

	print("\n=== Checking model card ===")
	try:
	modelcard_OK, error_msg = check_model_card(model)
	print(f"Model card check result: {modelcard_OK}")
	if not modelcard_OK:
	print(f"Model card error: {error_msg}")
	return styled_error(error_msg)
	except Exception as e:
	print(f"Error checking model card: {e}")
	print(f"Full traceback: {traceback.format_exc()}")
	return styled_error("Failed to validate model card")

	print("\n=== Creating evaluation entry ===")
	eval_entry = {
	"model": model,
	"base_model": base_model,
	"revision": revision,
	"precision": precision,
	"weight_type": weight_type,
	"status": "PENDING",
	"submitted_time": current_time,
	"model_type": model_type,
	"likes": model_info.likes,
	"params": model_size,
	"license": license,
	"private": False,
	}
	print(f"\nEvaluation entry created: {json.dumps(eval_entry, indent=2)}")

	print("\n=== Checking for duplicate submission ===")
	model_key = f"{model}_{revision}_{precision}"
	if model_key in REQUESTED_MODELS:
	print(f"Found existing submission with key: {model_key}")
	# Get the status from the queue file
	queue_file = REQUESTED_MODELS[model_key]
	try:
	with open(queue_file, 'r') as f:
	queue_entry = json.load(f)
	status = queue_entry.get('status')
	print(f"Found existing submission with status: {status}")
	if status is None:
	print(f"Warning: No status found in queue file {queue_file}")
	return styled_warning("Error checking model status. Please try again later.")

	if status != EvaluationStatus.FAILED.value:
	print(f"Model already submitted and in {status} status")
	return styled_warning(f"This model has been already submitted and is in {status} status.")
	except Exception as e:
	print(f"Error reading queue file: {e}")
	print(f"Full traceback: {traceback.format_exc()}")
	return styled_warning("Error checking model status. Please try again later.")

	print("\n=== Creating evaluation file ===")
	OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
	print(f"Creating output directory: {OUT_DIR}")
	os.makedirs(OUT_DIR, exist_ok=True)

	out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
	print(f"Output file path: {out_path}")

	# Write evaluation entry to file
	try:
	with open(out_path, "w") as f:
	f.write(json.dumps(eval_entry))
	print("\nEvaluation file created successfully")

	# Upload to Hugging Face
	print("\n=== Uploading evaluation file ===")
	API.upload_file(
	path_or_fileobj=out_path,
	path_in_repo=out_path.split("eval-queue/")[1],
	repo_id=QUEUE_REPO,
	repo_type="dataset",
	commit_message=f"Add evaluation request for {model}",
	token=TOKEN
	)
	print(f"\nEvaluation request uploaded successfully to {QUEUE_REPO}")

	# Clean up local file
	os.remove(out_path)
	print("\nLocal evaluation file removed")

	return styled_message(
	"Evaluation request created successfully! Please wait for the evaluation to complete."
	)
	except Exception as e:
	print(f"Error during file operations: {str(e)}")
	print(f"Full traceback: {traceback.format_exc()}")
	return styled_error(f"Failed to create evaluation request: {str(e)}")



	dataloader = DataLoader(tsac_dataset, batch_size=32, shuffle=False)

	model_obj.eval()
	with torch.no_grad():
	predictions = []
	targets = []

	for batch in dataloader:
	inputs = {k: v.to(device) for k, v in batch.items() if k != 'target'}
	target = batch['target'].to(device)

	# Log the first batch details
	if len(predictions) == 0: # Only log for the first batch
	print(f"\nFirst batch example:")
	print(f"Input keys: {list(inputs.keys())}")
	print(f"Target shape: {target.shape}")

	outputs = model_obj(**inputs)
	print(f"\nModel output type: {type(outputs)}")

	# Try to get logits from different possible formats
	if isinstance(outputs, dict):
	print(f"Output keys: {list(outputs.keys())}")
	# Try different common keys
	if 'logits' in outputs:
	logits = outputs['logits']
	elif 'prediction_logits' in outputs:
	logits = outputs['prediction_logits']
	else:
	raise ValueError(f"Unknown output format. Available keys: {list(outputs.keys())}")
	elif isinstance(outputs, tuple):
	print(f"Output tuple length: {len(outputs)}")
	# Try different positions in the tuple
	if len(outputs) > 0:
	logits = outputs[0]
	else:
	raise ValueError("Empty output tuple")
	else:
	# If it's a single tensor, assume it's the logits
	logits = outputs

	print(f"Logits shape: {logits.shape}")
	# For sequence classification, we typically use the [CLS] token's prediction
	# Get the first token's prediction (CLS token)
	cls_logits = logits[:, 0, :] # Shape: [batch_size, num_classes]
	predictions.extend(cls_logits.argmax(dim=-1).cpu().tolist())
	targets.extend(target.cpu().tolist())

	accuracy = sum(p == t for p, t in zip(predictions, targets)) / len(predictions)

	eval_entry['results'] = {'accuracy': accuracy}

	# Update the queue file with results
	with open(out_path, "w") as f:
	f.write(json.dumps(eval_entry))

	# Evaluate on ArabML
	print("Evaluating on ArabML Tunisian Corpus...")
	arabml_dataset = load_dataset("arbml/Tunisian_Dialect_Corpus", split="train", trust_remote_code=True)

	def preprocess_arabml(examples):
	return tokenizer(examples['Tweet'], padding=True, truncation=True, max_length=512)

	arabml_dataset = arabml_dataset.map(preprocess_arabml, batched=True)

	total_tokens = 0
	covered_tokens = 0

	for example in arabml_dataset:
	tokens = tokenizer.tokenize(example['Tweet'])
	total_tokens += len(tokens)
	covered_tokens += len([t for t in tokens if t != tokenizer.unk_token])

	arabml_coverage = covered_tokens / total_tokens if total_tokens > 0 else 0

	# Store results