Spaces:
Runtime error
Runtime error
| # Comment to be included in the beginning of the final report. | |
| comment = "" | |
| [server] | |
| url = "http://localhost:11434/v1" | |
| api_key = "api key" | |
| model = "llama3" | |
| timeout = 600.0 | |
| [inference] | |
| # Ssettings below are from evaluate_from_local.py for VLLM on TIGER-AI-Lab/MMLU-Pro | |
| temperature = 0.0 | |
| top_p = 1.0 # not specified but default for VLLM | |
| max_tokens = 2048 | |
| # The variable {subject} will be replaced with appropriate value in runtime. | |
| system_prompt = "The following are multiple choice questions (with answers) about {subject}. Think step by step and then finish your answer with \"the answer is (X)\" where X is the correct letter choice." | |
| # "multi_chat" inserts COT examples into multi-turn messages. Use for instruct/chat models. | |
| # "no_chat" uses v1/completion api. Use for non-instruct/chat model. | |
| # "single_chat" (from the script for GPT-4O) inserts all the COT examples and question into a single message. Not recommended, use only for legacy compatibility. | |
| style = "multi_chat" | |
| [test] | |
| categories = ['biology', 'business', 'chemistry', 'computer science', 'economics', 'engineering', 'health', 'history', 'law', 'math', 'philosophy', 'physics', 'psychology', 'other'] | |
| parallel = 1 | |
| [log] | |
| # Verbosity between 0-2 | |
| verbosity = 0 | |
| # If true, logs exact prompt sent to the model in the test result files. | |
| log_prompt = true | |