Spaces:
Build error
Build error
| import os | |
| import re | |
| import pandas as pd | |
| import seaborn as sns | |
| import matplotlib.pyplot as plt | |
| from datasets import load_dataset | |
| from llm_toolkit.llm_utils import extract_answer | |
| from tqdm import tqdm | |
| print(f"loading {__file__}") | |
| def calc_metrics(references, predictions, debug=False): | |
| assert len(references) == len( | |
| predictions | |
| ), f"lengths are difference: {len(references)} != {len(predictions)}" | |
| predictions = [extract_answer(text) for text in predictions] | |
| correct = [1 if ref == pred else 0 for ref, pred in zip(references, predictions)] | |
| accuracy = sum(correct) / len(references) | |
| results = {"accuracy": accuracy} | |
| if debug: | |
| incorrect_ids = [i for i, c in enumerate(correct) if c == 0] | |
| results["incorrect_ids"] = incorrect_ids | |
| return results | |
| def save_results(model_name, results_path, dataset, predictions, debug=False): | |
| if not os.path.exists(results_path): | |
| # Get the directory part of the file path | |
| dir_path = os.path.dirname(results_path) | |
| # Create all directories in the path (if they don't exist) | |
| os.makedirs(dir_path, exist_ok=True) | |
| df = dataset.to_pandas() | |
| df.drop(columns=["answer", "prompt", "train_text"], inplace=True) | |
| else: | |
| df = pd.read_csv(results_path, on_bad_lines="warn") | |
| df[model_name] = predictions | |
| if debug: | |
| print(df.head(1)) | |
| df.to_csv(results_path, index=False) | |
| def load_logical_reasoning_dataset(data_path, tokenizer=None): | |
| train_data_file = data_path + "/train.csv" | |
| test_data_file = data_path + "/dev.csv" | |
| print("loading train/test data files") | |
| datasets = load_dataset( | |
| "csv", | |
| data_files={"train": train_data_file, "test": test_data_file}, | |
| ) | |
| if tokenizer: | |
| reasoning_prompt = """你是一个逻辑游戏的主持人。游戏规则如下: | |
| 1. 参与者会得到一个谜题。 | |
| 2. 参与者可以通过提问来获取线索,尝试解开谜题。 | |
| 3. 对于每个问题,主持人将根据实际情况回答以下五个选项之一:是、不是、不重要、回答正确、问法错误。 | |
| 4. 回答中不能添加任何其它信息,也不能省略选项中的任何一个字。例如,不可以把“不是”省略成“不”。 | |
| 5. 参与者需要根据回答来推理,并最终找出谜题的正确答案。 | |
| 请严格按照这些规则回答参与者提出的问题。 | |
| 谜题: {} | |
| 实际情况: {} | |
| 参与者提出的问题: {} | |
| """ | |
| def formatting_prompts_func(examples): | |
| inputs = examples["text"] | |
| outputs = examples["label"] | |
| puzzles = examples["puzzle"] | |
| truths = examples["truth"] | |
| messages = [ | |
| { | |
| "role": "system", | |
| "content": "You are an expert in logical reasoning.", | |
| }, | |
| None, | |
| ] | |
| model_name = os.getenv("MODEL_NAME") | |
| if "mistral" in model_name.lower(): | |
| messages = messages[1:] | |
| texts = [] | |
| prompts = [] | |
| for input, output, puzzle, truth in zip(inputs, outputs, puzzles, truths): | |
| prompt = reasoning_prompt.format(puzzle, truth, input) | |
| messages[-1] = {"role": "user", "content": prompt} | |
| prompt = tokenizer.apply_chat_template( | |
| messages, tokenize=False, add_generation_prompt=True | |
| ) | |
| prompts.append(prompt) | |
| texts.append(prompt + output + tokenizer.eos_token) | |
| return {"train_text": texts, "prompt": prompts} | |
| datasets = datasets.map( | |
| formatting_prompts_func, | |
| batched=True, | |
| ) | |
| print(datasets) | |
| return datasets | |
| def eval_model(model, tokenizer, eval_dataset): | |
| total = len(eval_dataset) | |
| predictions = [] | |
| for i in tqdm(range(total)): | |
| inputs = tokenizer( | |
| eval_dataset["prompt"][i : i + 1], | |
| return_tensors="pt", | |
| ).to("cuda") | |
| outputs = model.generate(**inputs, max_new_tokens=4096, use_cache=False) | |
| decoded_output = tokenizer.batch_decode(outputs) | |
| debug = i == 0 | |
| decoded_output = [ | |
| extract_answer(output, debug=debug) for output in decoded_output | |
| ] | |
| predictions.extend(decoded_output) | |
| return predictions | |
| def save_model( | |
| model, | |
| tokenizer, | |
| include_gguf=True, | |
| include_merged=True, | |
| publish=True, | |
| ): | |
| try: | |
| token = os.getenv("HF_TOKEN") or None | |
| model_name = os.getenv("MODEL_NAME") | |
| save_method = "lora" | |
| quantization_method = "q5_k_m" | |
| model_names = get_model_names( | |
| model_name, save_method=save_method, quantization_method=quantization_method | |
| ) | |
| model.save_pretrained(model_names["local"]) | |
| tokenizer.save_pretrained(model_names["local"]) | |
| if publish: | |
| model.push_to_hub( | |
| model_names["hub"], | |
| token=token, | |
| ) | |
| tokenizer.push_to_hub( | |
| model_names["hub"], | |
| token=token, | |
| ) | |
| if include_merged: | |
| model.save_pretrained_merged( | |
| model_names["local"] + "-merged", tokenizer, save_method=save_method | |
| ) | |
| if publish: | |
| model.push_to_hub_merged( | |
| model_names["hub"] + "-merged", | |
| tokenizer, | |
| save_method="lora", | |
| token="", | |
| ) | |
| if include_gguf: | |
| model.save_pretrained_gguf( | |
| model_names["local-gguf"], | |
| tokenizer, | |
| quantization_method=quantization_method, | |
| ) | |
| if publish: | |
| model.push_to_hub_gguf( | |
| model_names["hub-gguf"], | |
| tokenizer, | |
| quantization_method=quantization_method, | |
| token=token, | |
| ) | |
| except Exception as e: | |
| print(e) | |
| def get_metrics(df): | |
| metrics_df = pd.DataFrame(df.columns.T)[2:] | |
| metrics_df.rename(columns={0: "model"}, inplace=True) | |
| metrics_df["model"] = metrics_df["model"].apply(lambda x: x.split("/")[-1]) | |
| metrics_df.reset_index(inplace=True) | |
| metrics_df = metrics_df.drop(columns=["index"]) | |
| accuracy = [] | |
| meteor = [] | |
| bleu_1 = [] | |
| rouge_l = [] | |
| all_metrics = [] | |
| for col in df.columns[2:]: | |
| metrics = calc_metrics(df["english"], df[col], debug=True) | |
| print(f"{col}: {metrics}") | |
| accuracy.append(metrics["accuracy"]) | |
| all_metrics.append(metrics) | |
| metrics_df["accuracy"] = accuracy | |
| metrics_df["all_metrics"] = all_metrics | |
| return metrics_df | |