Morgan Funtowicz
commited on
Commit
·
1b7eead
1
Parent(s):
8550385
misc(whisper): minor
Browse files- Dockerfile +2 -2
- handler.py +27 -24
Dockerfile
CHANGED
|
@@ -7,8 +7,8 @@ RUN --mount=type=bind,from=huggingface/endpoints-sdk:v1.0.0-beta-py312-manylinux
|
|
| 7 |
|
| 8 |
COPY handler.py /opt/endpoints/
|
| 9 |
|
| 10 |
-
ENV
|
| 11 |
-
ENV
|
| 12 |
|
| 13 |
EXPOSE 80
|
| 14 |
ENTRYPOINT ["python3"]
|
|
|
|
| 7 |
|
| 8 |
COPY handler.py /opt/endpoints/
|
| 9 |
|
| 10 |
+
ENV INTERFACE=0.0.0.0
|
| 11 |
+
ENV PORT=80
|
| 12 |
|
| 13 |
EXPOSE 80
|
| 14 |
ENTRYPOINT ["python3"]
|
handler.py
CHANGED
|
@@ -38,7 +38,7 @@ SUPPORTED_MODEL_ARCHITECTURES = ["WhisperForConditionalGeneration"]
|
|
| 38 |
|
| 39 |
|
| 40 |
def chunk_audio_with_duration(
|
| 41 |
-
|
| 42 |
) -> Sequence[np.ndarray]:
|
| 43 |
"""
|
| 44 |
Chunk a mono audio timeseries so that each chunk is as long as `maximum_duration_sec`.
|
|
@@ -67,10 +67,10 @@ def compression_ratio(text: str) -> float:
|
|
| 67 |
|
| 68 |
|
| 69 |
def create_prompt(
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
):
|
| 75 |
"""
|
| 76 |
Generate the right prompt with the specific parameters to submit for inference over Whisper
|
|
@@ -97,7 +97,7 @@ def create_prompt(
|
|
| 97 |
|
| 98 |
|
| 99 |
def create_params(
|
| 100 |
-
|
| 101 |
) -> "SamplingParams":
|
| 102 |
"""
|
| 103 |
Create sampling parameters to submit for inference through vLLM `generate`
|
|
@@ -127,12 +127,12 @@ def get_avg_logprob(logprobs: "SampleLogprobs") -> float:
|
|
| 127 |
|
| 128 |
|
| 129 |
def process_chunk(
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
) -> Generator:
|
| 137 |
"""
|
| 138 |
Decode a single transcribed audio chunk and generates all the segments associated
|
|
@@ -202,9 +202,9 @@ def process_chunk(
|
|
| 202 |
|
| 203 |
|
| 204 |
def process_chunks(
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
) -> Tuple[List[Segment], str]:
|
| 209 |
"""
|
| 210 |
Iterate over all the audio chunk's outputs and consolidates outputs as segment(s) whether the response is verbose or not
|
|
@@ -227,7 +227,7 @@ def process_chunks(
|
|
| 227 |
logprobs = generation.logprobs
|
| 228 |
|
| 229 |
for segment, _is_continuation in process_chunk(
|
| 230 |
-
|
| 231 |
):
|
| 232 |
materialized_segments.append(segment)
|
| 233 |
|
|
@@ -267,12 +267,12 @@ class WhisperHandler(Handler[TranscriptionRequest, TranscriptionResponse]):
|
|
| 267 |
)
|
| 268 |
|
| 269 |
async def transcribe(
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
|
| 273 |
-
|
| 274 |
-
|
| 275 |
-
|
| 276 |
) -> (List[Segment], str):
|
| 277 |
async def __agenerate__(request_id: str, prompt, params):
|
| 278 |
"""
|
|
@@ -323,14 +323,14 @@ class WhisperHandler(Handler[TranscriptionRequest, TranscriptionResponse]):
|
|
| 323 |
return segments, text
|
| 324 |
|
| 325 |
async def __call__(
|
| 326 |
-
|
| 327 |
) -> TranscriptionResponse:
|
| 328 |
with logger.contextualize(request_id=ctx.request_id):
|
| 329 |
with memoryview(request) as audio:
|
| 330 |
|
| 331 |
# Check if we need to enable the verbose path
|
| 332 |
is_verbose = (
|
| 333 |
-
|
| 334 |
)
|
| 335 |
|
| 336 |
# Retrieve the tokenizer and model config asynchronously while we decode audio
|
|
@@ -377,6 +377,9 @@ class WhisperHandler(Handler[TranscriptionRequest, TranscriptionResponse]):
|
|
| 377 |
case TranscriptionResponseKind.TEXT:
|
| 378 |
return TranscriptionResponse.text(text)
|
| 379 |
|
|
|
|
|
|
|
|
|
|
| 380 |
|
| 381 |
def entrypoint():
|
| 382 |
# Retrieve endpoint configuration
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
def chunk_audio_with_duration(
|
| 41 |
+
audio: np.ndarray, maximum_duration_sec: int, sampling_rate: int
|
| 42 |
) -> Sequence[np.ndarray]:
|
| 43 |
"""
|
| 44 |
Chunk a mono audio timeseries so that each chunk is as long as `maximum_duration_sec`.
|
|
|
|
| 67 |
|
| 68 |
|
| 69 |
def create_prompt(
|
| 70 |
+
audio: np.ndarray,
|
| 71 |
+
sampling_rate: int,
|
| 72 |
+
language: int,
|
| 73 |
+
timestamp_marker: int,
|
| 74 |
):
|
| 75 |
"""
|
| 76 |
Generate the right prompt with the specific parameters to submit for inference over Whisper
|
|
|
|
| 97 |
|
| 98 |
|
| 99 |
def create_params(
|
| 100 |
+
max_tokens: int, temperature: float, is_verbose: bool
|
| 101 |
) -> "SamplingParams":
|
| 102 |
"""
|
| 103 |
Create sampling parameters to submit for inference through vLLM `generate`
|
|
|
|
| 127 |
|
| 128 |
|
| 129 |
def process_chunk(
|
| 130 |
+
tokenizer: "PreTrainedTokenizer",
|
| 131 |
+
ids: np.ndarray,
|
| 132 |
+
logprobs: "SampleLogprobs",
|
| 133 |
+
request: TranscriptionRequest,
|
| 134 |
+
segment_offset: int,
|
| 135 |
+
timestamp_offset: int,
|
| 136 |
) -> Generator:
|
| 137 |
"""
|
| 138 |
Decode a single transcribed audio chunk and generates all the segments associated
|
|
|
|
| 202 |
|
| 203 |
|
| 204 |
def process_chunks(
|
| 205 |
+
tokenizer: "PreTrainedTokenizer",
|
| 206 |
+
chunks: List["RequestOutput"],
|
| 207 |
+
request: TranscriptionRequest,
|
| 208 |
) -> Tuple[List[Segment], str]:
|
| 209 |
"""
|
| 210 |
Iterate over all the audio chunk's outputs and consolidates outputs as segment(s) whether the response is verbose or not
|
|
|
|
| 227 |
logprobs = generation.logprobs
|
| 228 |
|
| 229 |
for segment, _is_continuation in process_chunk(
|
| 230 |
+
tokenizer, ids, logprobs, request, segment_offset, time_offset
|
| 231 |
):
|
| 232 |
materialized_segments.append(segment)
|
| 233 |
|
|
|
|
| 267 |
)
|
| 268 |
|
| 269 |
async def transcribe(
|
| 270 |
+
self,
|
| 271 |
+
ctx: Context,
|
| 272 |
+
request: TranscriptionRequest,
|
| 273 |
+
tokenizer: "PreTrainedTokenizer",
|
| 274 |
+
audio_chunks: Iterable[np.ndarray],
|
| 275 |
+
params: "SamplingParams",
|
| 276 |
) -> (List[Segment], str):
|
| 277 |
async def __agenerate__(request_id: str, prompt, params):
|
| 278 |
"""
|
|
|
|
| 323 |
return segments, text
|
| 324 |
|
| 325 |
async def __call__(
|
| 326 |
+
self, request: TranscriptionRequest, ctx: Context
|
| 327 |
) -> TranscriptionResponse:
|
| 328 |
with logger.contextualize(request_id=ctx.request_id):
|
| 329 |
with memoryview(request) as audio:
|
| 330 |
|
| 331 |
# Check if we need to enable the verbose path
|
| 332 |
is_verbose = (
|
| 333 |
+
request.response_kind == TranscriptionResponseKind.VERBOSE_JSON
|
| 334 |
)
|
| 335 |
|
| 336 |
# Retrieve the tokenizer and model config asynchronously while we decode audio
|
|
|
|
| 377 |
case TranscriptionResponseKind.TEXT:
|
| 378 |
return TranscriptionResponse.text(text)
|
| 379 |
|
| 380 |
+
# I don't forsee any case this would happen but at least we are safe
|
| 381 |
+
raise ValueError(f"Invalid response_kind ({request.response_kind})")
|
| 382 |
+
|
| 383 |
|
| 384 |
def entrypoint():
|
| 385 |
# Retrieve endpoint configuration
|