Spaces:
Paused
Paused
| """ | |
| Nvidia NIM endpoint: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer | |
| This is OpenAI compatible | |
| This file only contains param mapping logic | |
| API calling is done using the OpenAI SDK with an api_base | |
| """ | |
| from typing import Optional, Union | |
| from litellm.llms.openai.chat.gpt_transformation import OpenAIGPTConfig | |
| class NvidiaNimConfig(OpenAIGPTConfig): | |
| """ | |
| Reference: https://docs.api.nvidia.com/nim/reference/databricks-dbrx-instruct-infer | |
| The class `NvidiaNimConfig` provides configuration for the Nvidia NIM's Chat Completions API interface. Below are the parameters: | |
| """ | |
| temperature: Optional[int] = None | |
| top_p: Optional[int] = None | |
| frequency_penalty: Optional[int] = None | |
| presence_penalty: Optional[int] = None | |
| max_tokens: Optional[int] = None | |
| stop: Optional[Union[str, list]] = None | |
| def __init__( | |
| self, | |
| temperature: Optional[int] = None, | |
| top_p: Optional[int] = None, | |
| frequency_penalty: Optional[int] = None, | |
| presence_penalty: Optional[int] = None, | |
| max_tokens: Optional[int] = None, | |
| stop: Optional[Union[str, list]] = None, | |
| ) -> None: | |
| locals_ = locals().copy() | |
| for key, value in locals_.items(): | |
| if key != "self" and value is not None: | |
| setattr(self.__class__, key, value) | |
| def get_config(cls): | |
| return super().get_config() | |
| def get_supported_openai_params(self, model: str) -> list: | |
| """ | |
| Get the supported OpenAI params for the given model | |
| Updated on July 5th, 2024 - based on https://docs.api.nvidia.com/nim/reference | |
| """ | |
| if model in [ | |
| "google/recurrentgemma-2b", | |
| "google/gemma-2-27b-it", | |
| "google/gemma-2-9b-it", | |
| "gemma-2-9b-it", | |
| ]: | |
| return ["stream", "temperature", "top_p", "max_tokens", "stop", "seed"] | |
| elif model == "nvidia/nemotron-4-340b-instruct": | |
| return [ | |
| "stream", | |
| "temperature", | |
| "top_p", | |
| "max_tokens", | |
| "max_completion_tokens", | |
| ] | |
| elif model == "nvidia/nemotron-4-340b-reward": | |
| return [ | |
| "stream", | |
| ] | |
| elif model in ["google/codegemma-1.1-7b"]: | |
| # most params - but no 'seed' :( | |
| return [ | |
| "stream", | |
| "temperature", | |
| "top_p", | |
| "frequency_penalty", | |
| "presence_penalty", | |
| "max_tokens", | |
| "max_completion_tokens", | |
| "stop", | |
| ] | |
| else: | |
| # DEFAULT Case - The vast majority of Nvidia NIM Models lie here | |
| # "upstage/solar-10.7b-instruct", | |
| # "snowflake/arctic", | |
| # "seallms/seallm-7b-v2.5", | |
| # "nvidia/llama3-chatqa-1.5-8b", | |
| # "nvidia/llama3-chatqa-1.5-70b", | |
| # "mistralai/mistral-large", | |
| # "mistralai/mixtral-8x22b-instruct-v0.1", | |
| # "mistralai/mixtral-8x7b-instruct-v0.1", | |
| # "mistralai/mistral-7b-instruct-v0.3", | |
| # "mistralai/mistral-7b-instruct-v0.2", | |
| # "mistralai/codestral-22b-instruct-v0.1", | |
| # "microsoft/phi-3-small-8k-instruct", | |
| # "microsoft/phi-3-small-128k-instruct", | |
| # "microsoft/phi-3-mini-4k-instruct", | |
| # "microsoft/phi-3-mini-128k-instruct", | |
| # "microsoft/phi-3-medium-4k-instruct", | |
| # "microsoft/phi-3-medium-128k-instruct", | |
| # "meta/llama3-70b-instruct", | |
| # "meta/llama3-8b-instruct", | |
| # "meta/llama2-70b", | |
| # "meta/codellama-70b", | |
| return [ | |
| "stream", | |
| "temperature", | |
| "top_p", | |
| "frequency_penalty", | |
| "presence_penalty", | |
| "max_tokens", | |
| "max_completion_tokens", | |
| "stop", | |
| "seed", | |
| ] | |
| def map_openai_params( | |
| self, | |
| non_default_params: dict, | |
| optional_params: dict, | |
| model: str, | |
| drop_params: bool, | |
| ) -> dict: | |
| supported_openai_params = self.get_supported_openai_params(model=model) | |
| for param, value in non_default_params.items(): | |
| if param == "max_completion_tokens": | |
| optional_params["max_tokens"] = value | |
| elif param in supported_openai_params: | |
| optional_params[param] = value | |
| return optional_params | |