Spaces:
Sleeping
Sleeping
Update utils.py
Browse files
utils.py
CHANGED
|
@@ -8,10 +8,41 @@ import re
|
|
| 8 |
import torch
|
| 9 |
import numpy as np
|
| 10 |
import os
|
|
|
|
|
|
|
| 11 |
|
| 12 |
_ref_audio_cache = {}
|
| 13 |
asr_pipe = None
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def chunk_text(text, max_chars=135):
|
| 16 |
|
| 17 |
# print(text)
|
|
@@ -129,9 +160,7 @@ def preprocess_ref_audio_text(ref_audio_orig, ref_text, clip_short=True, show_in
|
|
| 129 |
|
| 130 |
show_info("Converting audio...")
|
| 131 |
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
os.system("sox " + ref_audio_orig + " -r 24000 -c 1 " + ref_audio_orig_converted)
|
| 135 |
|
| 136 |
ref_audio_orig = ref_audio_orig_converted
|
| 137 |
|
|
|
|
| 8 |
import torch
|
| 9 |
import numpy as np
|
| 10 |
import os
|
| 11 |
+
from scipy.io import wavfile
|
| 12 |
+
from scipy.signal import resample_poly
|
| 13 |
|
| 14 |
_ref_audio_cache = {}
|
| 15 |
asr_pipe = None
|
| 16 |
|
| 17 |
+
def resample_to_24khz(input_path: str, output_path: str):
|
| 18 |
+
"""
|
| 19 |
+
Resample WAV audio file to 24,000 Hz using scipy.
|
| 20 |
+
|
| 21 |
+
Parameters:
|
| 22 |
+
- input_path (str): Path to the input WAV file.
|
| 23 |
+
- output_path (str): Path to save the output WAV file.
|
| 24 |
+
"""
|
| 25 |
+
# Load WAV file
|
| 26 |
+
orig_sr, audio = wavfile.read(input_path)
|
| 27 |
+
|
| 28 |
+
# Convert to mono if stereo
|
| 29 |
+
if len(audio.shape) == 2:
|
| 30 |
+
audio = audio.mean(axis=1)
|
| 31 |
+
|
| 32 |
+
# Convert to float32 for processing
|
| 33 |
+
if audio.dtype != np.float32:
|
| 34 |
+
audio = audio.astype(np.float32) / np.iinfo(audio.dtype).max
|
| 35 |
+
|
| 36 |
+
# Resample
|
| 37 |
+
target_sr = 24000
|
| 38 |
+
resampled = resample_poly(audio, target_sr, orig_sr)
|
| 39 |
+
|
| 40 |
+
# Convert back to int16 for saving
|
| 41 |
+
resampled_int16 = (resampled * 32767).astype(np.int16)
|
| 42 |
+
|
| 43 |
+
# Save output
|
| 44 |
+
wavfile.write(output_path, target_sr, resampled_int16)
|
| 45 |
+
|
| 46 |
def chunk_text(text, max_chars=135):
|
| 47 |
|
| 48 |
# print(text)
|
|
|
|
| 160 |
|
| 161 |
show_info("Converting audio...")
|
| 162 |
|
| 163 |
+
resample_to_24khz(ref_audio_orig, ref_audio_orig_converted)
|
|
|
|
|
|
|
| 164 |
|
| 165 |
ref_audio_orig = ref_audio_orig_converted
|
| 166 |
|