Spaces:
Build error
Build error
Commit
·
4e478c6
1
Parent(s):
817f0f3
add updated gigaspeech model
Browse files
model.py
CHANGED
|
@@ -880,6 +880,51 @@ def _get_japanese_pre_trained_model(
|
|
| 880 |
return recognizer
|
| 881 |
|
| 882 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 883 |
@lru_cache(maxsize=10)
|
| 884 |
def _get_paraformer_zh_pre_trained_model(
|
| 885 |
repo_id: str,
|
|
@@ -971,6 +1016,7 @@ english_models = {
|
|
| 971 |
"whisper-base.en": _get_whisper_model,
|
| 972 |
"whisper-small.en": _get_whisper_model,
|
| 973 |
# "whisper-medium.en": _get_whisper_model,
|
|
|
|
| 974 |
"wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2": _get_gigaspeech_pre_trained_model, # noqa
|
| 975 |
"yfyeung/icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04": _get_english_model, # noqa
|
| 976 |
"yfyeung/icefall-asr-finetune-mux-pruned_transducer_stateless7-2023-05-19": _get_english_model, # noqa
|
|
|
|
| 880 |
return recognizer
|
| 881 |
|
| 882 |
|
| 883 |
+
@lru_cache(maxsize=10)
|
| 884 |
+
def _get_gigaspeech_pre_trained_model_onnx(
|
| 885 |
+
repo_id: str,
|
| 886 |
+
decoding_method: str,
|
| 887 |
+
num_active_paths: int,
|
| 888 |
+
) -> sherpa_onnx.OfflineRecognizer:
|
| 889 |
+
assert repo_id in [
|
| 890 |
+
"yfyeung/icefall-asr-gigaspeech-zipformer-2023-10-17",
|
| 891 |
+
], repo_id
|
| 892 |
+
|
| 893 |
+
encoder_model = _get_nn_model_filename(
|
| 894 |
+
repo_id=repo_id,
|
| 895 |
+
filename="encoder-epoch-30-avg-9.onnx",
|
| 896 |
+
subfolder="exp",
|
| 897 |
+
)
|
| 898 |
+
|
| 899 |
+
decoder_model = _get_nn_model_filename(
|
| 900 |
+
repo_id=repo_id,
|
| 901 |
+
filename="decoder-epoch-30-avg-9.onnx",
|
| 902 |
+
subfolder="exp",
|
| 903 |
+
)
|
| 904 |
+
|
| 905 |
+
joiner_model = _get_nn_model_filename(
|
| 906 |
+
repo_id=repo_id,
|
| 907 |
+
filename="joiner-epoch-30-avg-9.onnx",
|
| 908 |
+
subfolder="exp",
|
| 909 |
+
)
|
| 910 |
+
|
| 911 |
+
tokens = _get_token_filename(repo_id=repo_id, subfolder="data/lang_bpe_500")
|
| 912 |
+
|
| 913 |
+
recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
|
| 914 |
+
tokens=tokens,
|
| 915 |
+
encoder=encoder_model,
|
| 916 |
+
decoder=decoder_model,
|
| 917 |
+
joiner=joiner_model,
|
| 918 |
+
num_threads=2,
|
| 919 |
+
sample_rate=16000,
|
| 920 |
+
feature_dim=80,
|
| 921 |
+
decoding_method=decoding_method,
|
| 922 |
+
max_active_paths=num_active_paths,
|
| 923 |
+
)
|
| 924 |
+
|
| 925 |
+
return recognizer
|
| 926 |
+
|
| 927 |
+
|
| 928 |
@lru_cache(maxsize=10)
|
| 929 |
def _get_paraformer_zh_pre_trained_model(
|
| 930 |
repo_id: str,
|
|
|
|
| 1016 |
"whisper-base.en": _get_whisper_model,
|
| 1017 |
"whisper-small.en": _get_whisper_model,
|
| 1018 |
# "whisper-medium.en": _get_whisper_model,
|
| 1019 |
+
"yfyeung/icefall-asr-gigaspeech-zipformer-2023-10-17": _get_gigaspeech_pre_trained_model_onnx, # noqa
|
| 1020 |
"wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2": _get_gigaspeech_pre_trained_model, # noqa
|
| 1021 |
"yfyeung/icefall-asr-multidataset-pruned_transducer_stateless7-2023-05-04": _get_english_model, # noqa
|
| 1022 |
"yfyeung/icefall-asr-finetune-mux-pruned_transducer_stateless7-2023-05-19": _get_english_model, # noqa
|