Spaces:
Paused
Paused
| # Copyright 2020-2025 The HuggingFace Team. All rights reserved. | |
| # | |
| # Licensed under the Apache License, Version 2.0 (the "License"); | |
| # you may not use this file except in compliance with the License. | |
| # You may obtain a copy of the License at | |
| # | |
| # http://www.apache.org/licenses/LICENSE-2.0 | |
| # | |
| # Unless required by applicable law or agreed to in writing, software | |
| # distributed under the License is distributed on an "AS IS" BASIS, | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
| # See the License for the specific language governing permissions and | |
| # limitations under the License. | |
| import argparse | |
| import csv | |
| import evaluate | |
| import numpy as np | |
| import torch | |
| from datasets import load_dataset | |
| from tqdm import tqdm | |
| from transformers import AutoModelForCausalLM, AutoTokenizer, is_torch_npu_available, is_torch_xpu_available | |
| toxicity = evaluate.load("ybelkada/toxicity", "DaNLP/da-electra-hatespeech-detection", module_type="measurement") | |
| ds = load_dataset("OxAISH-AL-LLM/wiki_toxic", split="test") | |
| parser = argparse.ArgumentParser(description="Evaluate de-toxified models") | |
| parser.add_argument("--model_type", default="all", type=str, help="Relative path to the source model folder") | |
| parser.add_argument("--output_file", default="toxicity.csv", type=str, help="Relative path to the source model folder") | |
| parser.add_argument("--batch_size", default=64, type=int, help="Batch size") | |
| parser.add_argument("--num_samples", default=400, type=int, help="Number of samples") | |
| parser.add_argument("--context_length", default=2000, type=int, help="Number of samples") | |
| parser.add_argument("--max_new_tokens", default=30, type=int, help="Max new tokens for generation") | |
| args = parser.parse_args() | |
| if args.model_type == "all": | |
| MODELS_TO_TEST = [ | |
| "ybelkada/gpt-neo-125m-detox", | |
| "EleutherAI/gpt-neo-125M", | |
| "EleutherAI/gpt-neo-2.7B", | |
| "ybelkada/gpt-neo-2.7B-detox", | |
| "ybelkada/gpt-j-6b-sharded-bf16", | |
| "ybelkada/gpt-j-6b-detoxs", | |
| ] | |
| elif args.model_type == "gpt-neo": | |
| MODELS_TO_TEST = [ | |
| "ybelkada/gpt-neo-125m-detox", | |
| "EleutherAI/gpt-neo-125M", | |
| "EleutherAI/gpt-neo-2.7B", | |
| "ybelkada/gpt-neo-2.7B-detox", | |
| ] | |
| elif args.model_type == "gpt-j": | |
| MODELS_TO_TEST = [ | |
| "ybelkada/gpt-j-6b-sharded-bf16", | |
| "ybelkada/gpt-j-6b-detox", | |
| ] | |
| else: | |
| MODELS_TO_TEST = [args.model_type] | |
| NUM_SAMPLES = args.num_samples | |
| BATCH_SIZE = args.batch_size | |
| output_file = args.output_file | |
| max_new_tokens = args.max_new_tokens | |
| context_length = args.context_length | |
| if is_torch_xpu_available(): | |
| device = torch.xpu.current_device() | |
| elif is_torch_npu_available(): | |
| device = torch.npu.current_device() | |
| else: | |
| device = torch.cuda.current_device() if torch.cuda.is_available() else "cpu" | |
| # consider only toxic prompts | |
| ds = ds.filter(lambda x: x["label"] == 1) | |
| toxicities = {} | |
| # open a csv file | |
| file = open(f"{output_file}", "w", newline="") | |
| writer = csv.writer(file) | |
| # add first rows | |
| writer.writerow(["model_id", "mean_toxicity", "std_toxicity"]) | |
| for model_id in tqdm(MODELS_TO_TEST): | |
| model = AutoModelForCausalLM.from_pretrained(model_id, device_map={"": device}, torch_dtype=torch.bfloat16) | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| tokenizer.pad_token = tokenizer.eos_token | |
| tokenizer.padding_side = "left" | |
| input_texts = [] | |
| for i, example in enumerate(ds): | |
| # set seed | |
| torch.manual_seed(42) | |
| input_text = example["comment_text"] | |
| input_texts.append(input_text[:2000]) | |
| if i > NUM_SAMPLES: | |
| break | |
| if (i + 1) % BATCH_SIZE == 0: | |
| inputs = tokenizer(input_texts, return_tensors="pt", padding=True).to(device) | |
| inputs.input_ids = inputs.input_ids[:context_length] | |
| inputs.attention_mask = inputs.attention_mask[:context_length] | |
| outputs = model.generate(**inputs, do_sample=True, max_new_tokens=max_new_tokens, use_cache=True) | |
| generated_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True) | |
| generated_texts = [ | |
| generated_text.replace(input_texts[i], "") for i, generated_text in enumerate(generated_texts) | |
| ] | |
| toxicity_score = toxicity.compute(predictions=generated_texts) | |
| input_texts = [] | |
| if model_id not in toxicities: | |
| toxicities[model_id] = [] | |
| toxicities[model_id].extend(toxicity_score["toxicity"]) | |
| # last batch | |
| inputs = tokenizer(input_texts, return_tensors="pt", padding=True).to(device) | |
| outputs = model.generate(**inputs, do_sample=True, max_new_tokens=30) | |
| generated_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True) | |
| generated_texts = [generated_text.replace(input_texts[i], "") for i, generated_text in enumerate(generated_texts)] | |
| toxicity_score = toxicity.compute(predictions=generated_texts) | |
| toxicities[model_id].extend(toxicity_score["toxicity"]) | |
| # compute mean & std using np | |
| mean = np.mean(toxicities[model_id]) | |
| std = np.std(toxicities[model_id]) | |
| # save to file | |
| writer.writerow([model_id, mean, std]) | |
| print(f"Model: {model_id} - Mean: {mean} - Std: {std}") | |
| model = None | |
| if is_torch_xpu_available(): | |
| torch.xpu.empty_cache() | |
| elif is_torch_npu_available(): | |
| torch.npu.empty_cache() | |
| else: | |
| torch.cuda.empty_cache() | |
| # close file | |
| file.close() | |