Update readme, `whisper-large` -> `whisper-large-v2` (#4)
Browse files- Update readme, `whisper-large` -> `whisper-large-v2` (6827ce791b91ce7af083878b8a153bcd768c1c74)
README.md
CHANGED
|
@@ -174,8 +174,8 @@ The "<|en|>" token is used to specify that the speech is in english and should b
|
|
| 174 |
>>> import torch
|
| 175 |
|
| 176 |
>>> # load model and processor
|
| 177 |
-
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
| 178 |
-
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
| 179 |
|
| 180 |
>>> # load dummy dataset and read soundfiles
|
| 181 |
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
|
@@ -199,8 +199,8 @@ transcription.
|
|
| 199 |
>>> import torch
|
| 200 |
|
| 201 |
>>> # load model and processor
|
| 202 |
-
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
| 203 |
-
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
| 204 |
|
| 205 |
>>> # load dummy dataset and read soundfiles
|
| 206 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
|
@@ -227,8 +227,8 @@ The "<|translate|>" is used as the first decoder input token to specify the tran
|
|
| 227 |
>>> import torch
|
| 228 |
|
| 229 |
>>> # load model and processor
|
| 230 |
-
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
| 231 |
-
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large")
|
| 232 |
|
| 233 |
>>> # load dummy dataset and read soundfiles
|
| 234 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
|
@@ -245,7 +245,7 @@ The "<|translate|>" is used as the first decoder input token to specify the tran
|
|
| 245 |
|
| 246 |
## Evaluation
|
| 247 |
|
| 248 |
-
This code snippet shows how to evaluate **openai/whisper-large** on LibriSpeech's "clean" and "other" test data.
|
| 249 |
|
| 250 |
```python
|
| 251 |
>>> from datasets import load_dataset
|
|
@@ -257,8 +257,8 @@ This code snippet shows how to evaluate **openai/whisper-large** on LibriSpeech'
|
|
| 257 |
|
| 258 |
>>> librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")
|
| 259 |
|
| 260 |
-
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large").to("cuda")
|
| 261 |
-
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large")
|
| 262 |
|
| 263 |
>>> def map_to_pred(batch):
|
| 264 |
>>> input_features = processor(batch["audio"]["array"], return_tensors="pt").input_features
|
|
|
|
| 174 |
>>> import torch
|
| 175 |
|
| 176 |
>>> # load model and processor
|
| 177 |
+
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
|
| 178 |
+
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
|
| 179 |
|
| 180 |
>>> # load dummy dataset and read soundfiles
|
| 181 |
>>> ds = load_dataset("hf-internal-testing/librispeech_asr_dummy", "clean", split="validation")
|
|
|
|
| 199 |
>>> import torch
|
| 200 |
|
| 201 |
>>> # load model and processor
|
| 202 |
+
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
|
| 203 |
+
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
|
| 204 |
|
| 205 |
>>> # load dummy dataset and read soundfiles
|
| 206 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
|
|
|
| 227 |
>>> import torch
|
| 228 |
|
| 229 |
>>> # load model and processor
|
| 230 |
+
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
|
| 231 |
+
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2")
|
| 232 |
|
| 233 |
>>> # load dummy dataset and read soundfiles
|
| 234 |
>>> ds = load_dataset("common_voice", "fr", split="test", streaming=True)
|
|
|
|
| 245 |
|
| 246 |
## Evaluation
|
| 247 |
|
| 248 |
+
This code snippet shows how to evaluate **openai/whisper-large-v2** on LibriSpeech's "clean" and "other" test data.
|
| 249 |
|
| 250 |
```python
|
| 251 |
>>> from datasets import load_dataset
|
|
|
|
| 257 |
|
| 258 |
>>> librispeech_eval = load_dataset("librispeech_asr", "clean", split="test")
|
| 259 |
|
| 260 |
+
>>> model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-large-v2").to("cuda")
|
| 261 |
+
>>> processor = WhisperProcessor.from_pretrained("openai/whisper-large-v2")
|
| 262 |
|
| 263 |
>>> def map_to_pred(batch):
|
| 264 |
>>> input_features = processor(batch["audio"]["array"], return_tensors="pt").input_features
|