| from typing import List, Optional, Union | |
| from transformers.models.llama import LlamaTokenizerFast | |
| class DeepseekTokenizerFast(LlamaTokenizerFast): | |
| def convert_ids_to_tokens( | |
| self, ids: Union[int, List[int]], skip_special_tokens: bool = False | |
| ) -> Union[str, List[str]]: | |
| """ | |
| Converts a single index or a sequence of indices in a token or a sequence of tokens, using the vocabulary and | |
| added tokens. | |
| Args: | |
| ids (`int` or `List[int]`): | |
| The token id (or token ids) to convert to tokens. | |
| skip_special_tokens (`bool`, *optional*, defaults to `False`): | |
| Whether or not to remove special tokens in the decoding. | |
| Returns: | |
| `str` or `List[str]`: The decoded token(s). | |
| """ | |
| if isinstance(ids, int): | |
| return self._convert_id_to_token(ids) | |
| tokens = [] | |
| for index in ids: | |
| index = int(index) | |
| if skip_special_tokens and index in self.all_special_ids: | |
| continue | |
| token = self._tokenizer.id_to_token(index) | |
| tokens.append(token if token is not None else "") | |
| return tokens | |
| def _convert_id_to_token(self, index: int) -> Optional[str]: | |
| token = self._tokenizer.id_to_token(int(index)) | |
| return token if token is not None else "" | |

