ClearVoice

Running

App Files Files Community

ClearVoice / utils /bandwidth_sub.py

alibabasglab

Upload 19 files

303de59 verified 10 months ago

raw

history blame contribute delete

4.87 kB

	import numpy as np
	import soundfile as sf
	import librosa
	import os
	from scipy.signal import butter, filtfilt, stft, istft

	# Step 1: Load audio files
	def load_audio(audio_path):
	audio, sr = librosa.load(audio_path, sr=48000)
	#audio, fs = sf.read(audio_path)
	return audio, sr

	# Step 2: Detect effective signal bandwidth
	def detect_bandwidth_org(signal, fs, energy_threshold=0.95):
	f, t, Zxx = stft(signal, fs=fs)
	psd = np.abs(Zxx)**2
	total_energy = np.sum(psd)
	cumulative_energy = np.cumsum(np.sum(psd, axis=1)) / total_energy
	f_low = f[np.argmax(cumulative_energy > (1 - energy_threshold))]
	f_high = f[np.argmax(cumulative_energy >= energy_threshold)]
	return f_low, f_high

	def detect_bandwidth(signal, fs, energy_threshold=0.99):
	f, t, Zxx = stft(signal, fs=fs)
	psd = np.abs(Zxx)**2
	total_energy = np.sum(psd)
	cumulative_energy = np.cumsum(np.sum(psd, axis=1)) / total_energy

	# Exclude DC component (0 Hz)
	valid_indices = np.where(f > 0)[0]
	f_low = f[valid_indices][np.argmax(cumulative_energy[valid_indices] > (1 - energy_threshold))]
	f_high = f[np.argmax(cumulative_energy >= energy_threshold)]
	return f_low, f_high

	# Step 3: Apply bandpass and lowpass filters
	def bandpass_filter(signal, fs, f_low, f_high):
	nyquist = 0.5 * fs
	low = f_low / nyquist
	high = f_high / nyquist
	b, a = butter(N=4, Wn=[low, high], btype='band')
	return filtfilt(b, a, signal)

	def lowpass_filter(signal, fs, cutoff):
	nyquist = 0.5 * fs
	cutoff_normalized = cutoff / nyquist
	b, a = butter(N=4, Wn=cutoff_normalized, btype='low')
	return filtfilt(b, a, signal)

	def highpass_filter(signal, fs, cutoff):
	nyquist = 0.5 * fs
	cutoff_normalized = cutoff / nyquist
	b, a = butter(N=4, Wn=cutoff_normalized, btype='high')
	return filtfilt(b, a, signal)

	# Step 4: Replace bandwidth
	def replace_bandwidth(signal1, signal2, fs, f_low, f_high):
	# Extract effective band from signal1
	#effective_band = bandpass_filter(signal1, fs, f_low, f_high)
	effective_band = lowpass_filter(signal1, fs, f_high)
	# Extract lowpass band from signal2
	#signal2_lowpass = lowpass_filter(signal2, fs, f_high)
	signal2_highpass = highpass_filter(signal2, fs, f_high)

	# Match lengths of the two signals
	min_length = min(len(effective_band), len(signal2_highpass))
	effective_band = effective_band[:min_length]
	signal2_highpass = signal2_highpass[:min_length]

	# Combine the two signals
	return signal2_highpass + effective_band

	# Step 5: Smooth transitions
	def smooth_transition(signal1, signal2, fs, transition_band=100):
	fade = np.linspace(0, 1, int(transition_band * fs / 1000))
	crossfade = np.concatenate([fade, np.ones(len(signal1) - len(fade))])
	min_length = min(len(signal1), len(signal2))
	smoothed_signal = (1 - crossfade) * signal2[:min_length] + crossfade * signal1[:min_length]
	return smoothed_signal

	# Step 6: Save audio
	def save_audio(file_path, audio, fs):
	sf.write(file_path, audio, fs)


	def bandwidth_sub(low_bandwidth_audio, high_bandwidth_audio, fs=48000):
	# Detect effective bandwidth of the first signal
	f_low, f_high = detect_bandwidth(low_bandwidth_audio, fs)

	# Replace the lower frequency of the second audio
	substituted_audio = replace_bandwidth(low_bandwidth_audio, high_bandwidth_audio, fs, f_low, f_high)

	# Optional: Smooth the transition
	smoothed_audio = smooth_transition(substituted_audio, low_bandwidth_audio, fs)
	return smoothed_audio

	# Main process
	if __name__ == "__main__":
	low_spectra_dir = 'LJSpeech_22k'
	upper_spectra_dir = 'LJSpeech_22k_hifi-sr_speech_g_03925000'
	output_dir = upper_spectra_dir+'_restored'
	if not os.path.exists(output_dir):
	os.mkdir(output_dir)

	filelist = [file for file in os.listdir(low_spectra_dir) if file.endswith('.wav')]
	for audio_name in filelist:
	audio1, fs1 = load_audio(low_spectra_dir + "/" + audio_name) # Source for effective bandwidth
	audio2, fs2 = load_audio(upper_spectra_dir + "/" + audio_name.replace('.wav', '_generated.wav')) # Target audio to replace lower frequencies

	if fs1 != 48000 or fs2 != 48000:
	raise ValueError("Both audio files must have a sampling rate of 48 kHz.")

	# Detect effective bandwidth of the first signal
	f_low, f_high = detect_bandwidth(audio1, fs1)
	print(f"Effective bandwidth: {f_low} Hz to {f_high} Hz")

	# Replace the lower frequency of the second audio
	replaced_audio = replace_bandwidth(audio1, audio2, fs2, f_low, f_high)

	# Optional: Smooth the transition
	smoothed_audio = smooth_transition(replaced_audio, audio1, fs1)

	# Save the result
	save_audio(output_dir+"/"+audio_name, smoothed_audio, fs2)