Spaces:
Running
on
Zero
Running
on
Zero
| from copy import copy | |
| from enum import Enum, auto | |
| from itertools import count | |
| from flashcosyvoice.config import SamplingParams | |
| class SequenceStatus(Enum): | |
| WAITING = auto() | |
| RUNNING = auto() | |
| FINISHED = auto() | |
| class Sequence: | |
| block_size = 256 | |
| counter = count() | |
| def __init__(self, token_ids: list[int], sampling_params = SamplingParams()): | |
| self.seq_id = next(Sequence.counter) | |
| self.status = SequenceStatus.WAITING | |
| self.token_ids = copy(token_ids) | |
| self.last_token = token_ids[-1] | |
| self.num_tokens = len(self.token_ids) | |
| self.num_prompt_tokens = len(token_ids) | |
| self.num_cached_tokens = 0 | |
| self.block_table = [] | |
| self.temperature = sampling_params.temperature | |
| self.min_tokens = sampling_params.min_tokens | |
| self.max_tokens = sampling_params.max_tokens | |
| self.ignore_eos = sampling_params.ignore_eos | |
| self.top_k = sampling_params.top_k | |
| # RasSampler parameters | |
| self.use_ras = sampling_params.use_ras | |
| self.win_size = sampling_params.win_size | |
| self.tau_r = sampling_params.tau_r | |
| self.top_p = sampling_params.top_p | |
| def __len__(self): | |
| return self.num_tokens | |
| def __getitem__(self, key): | |
| return self.token_ids[key] | |
| def is_finished(self): | |
| return self.status == SequenceStatus.FINISHED | |
| def num_completion_tokens(self): | |
| return self.num_tokens - self.num_prompt_tokens | |
| def prompt_token_ids(self): | |
| return self.token_ids[:self.num_prompt_tokens] | |
| def completion_token_ids(self): | |
| return self.token_ids[self.num_prompt_tokens:] | |
| def num_cached_blocks(self): | |
| return self.num_cached_tokens // self.block_size | |
| def num_blocks(self): | |
| return (self.num_tokens + self.block_size - 1) // self.block_size | |
| def last_block_num_tokens(self): | |
| return self.num_tokens - (self.num_blocks - 1) * self.block_size | |
| def block(self, i): | |
| assert 0 <= i < self.num_blocks | |
| return self.token_ids[i*self.block_size: (i+1)*self.block_size] | |
| def append_token(self, token_id: int): | |
| self.token_ids.append(token_id) | |
| self.last_token = token_id | |
| self.num_tokens += 1 | |
| def __getstate__(self): | |
| return (self.num_tokens, self.num_prompt_tokens, self.num_cached_tokens, self.block_table, | |
| self.token_ids if self.num_completion_tokens == 0 else self.last_token) | |
| def __setstate__(self, state): | |
| self.num_tokens, self.num_prompt_tokens, self.num_cached_tokens, self.block_table = state[:-1] | |
| if self.num_completion_tokens == 0: | |
| self.token_ids = state[-1] | |
| else: | |
| self.last_token = state[-1] | |