|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pretrained_path: Elyadata/ADI-whisper-ADI20
|
|
|
whisper_hub: openai/whisper-large-v3
|
|
|
|
|
|
n_languages: 20
|
|
|
features_dim: 1280
|
|
|
|
|
|
whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
|
|
|
source: !ref <whisper_hub>
|
|
|
encoder_only: True
|
|
|
freeze_encoder: False
|
|
|
save_path: !ref <whisper_hub>
|
|
|
|
|
|
attention_pooling: !new:speechbrain.nnet.pooling.AttentionPooling
|
|
|
input_dim: !ref <features_dim>
|
|
|
|
|
|
output_mlp: !new:speechbrain.nnet.linear.Linear
|
|
|
input_size: !ref <features_dim>
|
|
|
n_neurons: !ref <n_languages>
|
|
|
bias: False
|
|
|
|
|
|
|
|
|
modules:
|
|
|
whisper: !ref <whisper>
|
|
|
attention_pooling: !ref <attention_pooling>
|
|
|
output_mlp: !ref <output_mlp>
|
|
|
|
|
|
log_softmax: !new:speechbrain.nnet.activations.Softmax
|
|
|
apply_log: True
|
|
|
|
|
|
label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
|
|
|
|
|
|
pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
|
|
|
loadables:
|
|
|
whisper: !ref <whisper>
|
|
|
attention_pooling: !ref <attention_pooling>
|
|
|
output_mlp: !ref <output_mlp>
|
|
|
label_encoder: !ref <label_encoder>
|
|
|
paths:
|
|
|
whisper: !ref <pretrained_path>/whisper.ckpt
|
|
|
attention_pooling: !ref <pretrained_path>/attention_pooling.ckpt
|
|
|
output_mlp: !ref <pretrained_path>/output_mlp.ckpt
|
|
|
label_encoder: !ref <pretrained_path>/dialect_encoder.txt |