Commit
·
0986216
1
Parent(s):
4ceafea
add nemo parakeet-tdt-0.6b-v2
Browse files- model.py +47 -0
- requirements.txt +1 -1
model.py
CHANGED
|
@@ -1270,6 +1270,52 @@ def _get_french_pre_trained_model(
|
|
| 1270 |
return recognizer
|
| 1271 |
|
| 1272 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1273 |
@lru_cache(maxsize=10)
|
| 1274 |
def _get_sherpa_onnx_nemo_transducer_models(
|
| 1275 |
repo_id: str,
|
|
@@ -1860,6 +1906,7 @@ chinese_models = {
|
|
| 1860 |
}
|
| 1861 |
|
| 1862 |
english_models = {
|
|
|
|
| 1863 |
"whisper-tiny.en": _get_whisper_model,
|
| 1864 |
"moonshine-tiny": _get_moonshine_model,
|
| 1865 |
"moonshine-base": _get_moonshine_model,
|
|
|
|
| 1270 |
return recognizer
|
| 1271 |
|
| 1272 |
|
| 1273 |
+
@lru_cache(maxsize=10)
|
| 1274 |
+
def _get_sherpa_onnx_nemo_transducer_models_int8(
|
| 1275 |
+
repo_id: str,
|
| 1276 |
+
decoding_method: str,
|
| 1277 |
+
num_active_paths: int,
|
| 1278 |
+
) -> sherpa_onnx.OfflineRecognizer:
|
| 1279 |
+
assert repo_id in [
|
| 1280 |
+
"csukuangfj/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8",
|
| 1281 |
+
], repo_id
|
| 1282 |
+
|
| 1283 |
+
encoder_model = _get_nn_model_filename(
|
| 1284 |
+
repo_id=repo_id,
|
| 1285 |
+
filename="encoder.int8.onnx",
|
| 1286 |
+
subfolder=".",
|
| 1287 |
+
)
|
| 1288 |
+
|
| 1289 |
+
decoder_model = _get_nn_model_filename(
|
| 1290 |
+
repo_id=repo_id,
|
| 1291 |
+
filename="decoder.int8.onnx",
|
| 1292 |
+
subfolder=".",
|
| 1293 |
+
)
|
| 1294 |
+
|
| 1295 |
+
joiner_model = _get_nn_model_filename(
|
| 1296 |
+
repo_id=repo_id,
|
| 1297 |
+
filename="joiner.int8.onnx",
|
| 1298 |
+
subfolder=".",
|
| 1299 |
+
)
|
| 1300 |
+
|
| 1301 |
+
tokens = _get_token_filename(repo_id=repo_id, subfolder=".")
|
| 1302 |
+
|
| 1303 |
+
recognizer = sherpa_onnx.OfflineRecognizer.from_transducer(
|
| 1304 |
+
tokens=tokens,
|
| 1305 |
+
encoder=encoder_model,
|
| 1306 |
+
decoder=decoder_model,
|
| 1307 |
+
joiner=joiner_model,
|
| 1308 |
+
num_threads=2,
|
| 1309 |
+
sample_rate=16000,
|
| 1310 |
+
feature_dim=128, # no used
|
| 1311 |
+
model_type="nemo_transducer",
|
| 1312 |
+
decoding_method=decoding_method,
|
| 1313 |
+
max_active_paths=num_active_paths,
|
| 1314 |
+
)
|
| 1315 |
+
|
| 1316 |
+
return recognizer
|
| 1317 |
+
|
| 1318 |
+
|
| 1319 |
@lru_cache(maxsize=10)
|
| 1320 |
def _get_sherpa_onnx_nemo_transducer_models(
|
| 1321 |
repo_id: str,
|
|
|
|
| 1906 |
}
|
| 1907 |
|
| 1908 |
english_models = {
|
| 1909 |
+
"csukuangfj/sherpa-onnx-nemo-parakeet-tdt-0.6b-v2-int8": _get_sherpa_onnx_nemo_transducer_models_int8,
|
| 1910 |
"whisper-tiny.en": _get_whisper_model,
|
| 1911 |
"moonshine-tiny": _get_moonshine_model,
|
| 1912 |
"moonshine-base": _get_moonshine_model,
|
requirements.txt
CHANGED
|
@@ -10,6 +10,6 @@ numpy<2
|
|
| 10 |
|
| 11 |
huggingface_hub
|
| 12 |
|
| 13 |
-
|
| 14 |
|
| 15 |
sherpa-onnx>=1.11.3
|
|
|
|
| 10 |
|
| 11 |
huggingface_hub
|
| 12 |
|
| 13 |
+
https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/cpu/1.11.4/sherpa_onnx-1.11.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
| 14 |
|
| 15 |
sherpa-onnx>=1.11.3
|