Spaces:

whitphx
/

gradio-lite-text-to-speech

Running

whitphx HF Staff commited on Jun 27, 2024

Commit

38d8048

verified ·

1 Parent(s): 705fa3c

Update index.html

Files changed (1) hide show

index.html CHANGED Viewed

@@ -18,18 +18,22 @@ import scipy.io.wavfile as wavfile
 speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';
-async def synthesize(text):
-    synthesizer = await pipeline(
-      'text-to-speech',
-      'Xenova/speecht5_tts',
-      { "quantized": False }
-    )  # Put the pipeline initializer inside the function to show the first view of the app faster
     out = await synthesizer(text, { "speaker_embeddings": speaker_embeddings });
     audio_data_memory_view = out["audio"]
     sampling_rate = out["sampling_rate"]
     audio_data = np.frombuffer(audio_data_memory_view, dtype=np.float32)
     wavfile.write('output.wav', sampling_rate, audio_data)
     return "output.wav"

 speaker_embeddings = 'https://huggingface.co/datasets/Xenova/transformers.js-docs/resolve/main/speaker_embeddings.bin';
+synthesizer = await pipeline(
+    'text-to-speech',
+    'Xenova/speecht5_tts',
+    { "quantized": False }
+)
+async def synthesize(text):
     out = await synthesizer(text, { "speaker_embeddings": speaker_embeddings });
     audio_data_memory_view = out["audio"]
     sampling_rate = out["sampling_rate"]
     audio_data = np.frombuffer(audio_data_memory_view, dtype=np.float32)
+    audio_data_16bit = (audio_data * 32767).astype(np.int16)
+    return sampling_rate, audio_data_16bit
     wavfile.write('output.wav', sampling_rate, audio_data)
     return "output.wav"