Spaces:
Running
Running
| import numpy as np | |
| import resampy | |
| import soundfile as sf | |
| from utils.spectrogram import VoicedAreaDetection | |
| def load_wav(wav_path, sr=24000): | |
| # wav, fs = librosa.load(wav_path, sr=sr) | |
| wav, fs = sf.read(wav_path) | |
| if fs != sr: | |
| wav = resampy.resample(wav, fs, sr, axis=0) | |
| fs = sr | |
| # assert fs == sr, f"input audio sample rate must be {sr}Hz. Got {fs}" | |
| peak = np.abs(wav).max() | |
| if peak > 1.0: | |
| wav /= peak | |
| return wav, fs | |
| def extract_voiced_area(wav_path, hi_freq=1000, hop_size=480, energy_thres=0.5): | |
| wav, fs = load_wav(wav_path) | |
| voiced_flag = VoicedAreaDetection( | |
| x=wav, | |
| sr=fs, | |
| n_fft=2048, | |
| n_shift=hop_size, | |
| win_length=2048, | |
| hi_freq=hi_freq, | |
| energy_thres=energy_thres, | |
| ) | |
| return voiced_flag | |
| def init_weights(m, mean=0.0, std=0.01): | |
| classname = m.__class__.__name__ | |
| if classname.find("Conv") != -1: | |
| m.weight.data.normal_(mean, std) | |
| def get_padding(kernel_size, dilation=1): | |
| return int((kernel_size*dilation - dilation)/2) | |
| class AttrDict(dict): | |
| def __init__(self, *args, **kwargs): | |
| super(AttrDict, self).__init__(*args, **kwargs) | |
| self.__dict__ = self |