Spaces:
Build error
Build error
Commit
·
ebfb900
1
Parent(s):
aa8a2cf
Add moonshine
Browse files
model.py
CHANGED
|
@@ -516,6 +516,61 @@ def _get_russian_pre_trained_model(
|
|
| 516 |
return recognizer
|
| 517 |
|
| 518 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 519 |
@lru_cache(maxsize=10)
|
| 520 |
def _get_whisper_model(
|
| 521 |
repo_id: str, decoding_method: str, num_active_paths: int
|
|
@@ -1618,6 +1673,8 @@ english_models = {
|
|
| 1618 |
"whisper-tiny.en": _get_whisper_model,
|
| 1619 |
"whisper-base.en": _get_whisper_model,
|
| 1620 |
"whisper-small.en": _get_whisper_model,
|
|
|
|
|
|
|
| 1621 |
"csukuangfj/sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000": _get_sherpa_onnx_nemo_ctc_models,
|
| 1622 |
"csukuangfj/sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000": _get_sherpa_onnx_nemo_transducer_models,
|
| 1623 |
# "whisper-medium.en": _get_whisper_model,
|
|
|
|
| 516 |
return recognizer
|
| 517 |
|
| 518 |
|
| 519 |
+
@lru_cache(maxsize=10)
|
| 520 |
+
def _get_moonshine_model(
|
| 521 |
+
repo_id: str, decoding_method: str, num_active_paths: int
|
| 522 |
+
) -> sherpa_onnx.OfflineRecognizer:
|
| 523 |
+
assert repo_id in ("moonshine-tiny", "moonshine-base"), repo_id
|
| 524 |
+
|
| 525 |
+
if repo_id == "moonshine-tiny":
|
| 526 |
+
full_repo_id = "csukuangfj/sherpa-onnx-moonshine-tiny-en-int8"
|
| 527 |
+
elif repo_id == "moonshine-base":
|
| 528 |
+
full_repo_id = "csukuangfj/sherpa-onnx-moonshine-base-en-int8"
|
| 529 |
+
else:
|
| 530 |
+
raise ValueError(f"Unknown repo_id: {repo_id}")
|
| 531 |
+
|
| 532 |
+
preprocessor = _get_nn_model_filename(
|
| 533 |
+
repo_id=full_repo_id,
|
| 534 |
+
filename=f"preprocess.onnx",
|
| 535 |
+
subfolder=".",
|
| 536 |
+
)
|
| 537 |
+
|
| 538 |
+
encoder = _get_nn_model_filename(
|
| 539 |
+
repo_id=full_repo_id,
|
| 540 |
+
filename=f"encode.int8.onnx",
|
| 541 |
+
subfolder=".",
|
| 542 |
+
)
|
| 543 |
+
|
| 544 |
+
uncached_decoder = _get_nn_model_filename(
|
| 545 |
+
repo_id=full_repo_id,
|
| 546 |
+
filename=f"uncached_decode.int8.onnx",
|
| 547 |
+
subfolder=".",
|
| 548 |
+
)
|
| 549 |
+
|
| 550 |
+
cached_decoder = _get_nn_model_filename(
|
| 551 |
+
repo_id=full_repo_id,
|
| 552 |
+
filename=f"cached_decode.int8.onnx",
|
| 553 |
+
subfolder=".",
|
| 554 |
+
)
|
| 555 |
+
|
| 556 |
+
tokens = _get_token_filename(
|
| 557 |
+
repo_id=full_repo_id,
|
| 558 |
+
subfolder=".",
|
| 559 |
+
filename="tokens.txt",
|
| 560 |
+
)
|
| 561 |
+
|
| 562 |
+
recognizer = sherpa_onnx.OfflineRecognizer.from_moonshine(
|
| 563 |
+
preprocessor=preprocessor,
|
| 564 |
+
encoder=encoder,
|
| 565 |
+
uncached_decoder=uncached_decoder,
|
| 566 |
+
cached_decoder=cached_decoder,
|
| 567 |
+
tokens=tokens,
|
| 568 |
+
num_threads=2,
|
| 569 |
+
)
|
| 570 |
+
|
| 571 |
+
return recognizer
|
| 572 |
+
|
| 573 |
+
|
| 574 |
@lru_cache(maxsize=10)
|
| 575 |
def _get_whisper_model(
|
| 576 |
repo_id: str, decoding_method: str, num_active_paths: int
|
|
|
|
| 1673 |
"whisper-tiny.en": _get_whisper_model,
|
| 1674 |
"whisper-base.en": _get_whisper_model,
|
| 1675 |
"whisper-small.en": _get_whisper_model,
|
| 1676 |
+
"moonshine-tiny": _get_moonshine_model,
|
| 1677 |
+
"moonshine-base": _get_moonshine_model,
|
| 1678 |
"csukuangfj/sherpa-onnx-nemo-parakeet_tdt_ctc_110m-en-36000": _get_sherpa_onnx_nemo_ctc_models,
|
| 1679 |
"csukuangfj/sherpa-onnx-nemo-parakeet_tdt_transducer_110m-en-36000": _get_sherpa_onnx_nemo_transducer_models,
|
| 1680 |
# "whisper-medium.en": _get_whisper_model,
|