Spaces:

grgsaliba
/

voice-denoising

Sleeping

App Files Files Community

grgsaliba commited on Oct 12

Commit

0f01c60

verified ·

1 Parent(s): 2d3c3ce

Upload example_usage.py with huggingface_hub

Browse files

Files changed (1) hide show

example_usage.py +207 -0

example_usage.py ADDED Viewed

	@@ -0,0 +1,207 @@

+"""
+Example: Quick test of DTLN model
+This script demonstrates how to:
+1. Create a model
+2. Generate synthetic noisy audio
+3. Process it through the model
+"""
+import numpy as np
+import soundfile as sf
+from dtln_ethos_u55 import DTLN_Ethos_U55, create_lightweight_model
+import matplotlib.pyplot as plt
+def generate_test_audio(duration=2.0, sample_rate=16000):
+    """
+    Generate synthetic test audio (speech + noise)
+    Args:
+        duration: Audio duration in seconds
+        sample_rate: Sampling rate
+    Returns:
+        Tuple of (clean, noisy) audio
+    """
+    t = np.linspace(0, duration, int(duration * sample_rate))
+    # Generate synthetic "speech" (mixture of frequencies)
+    speech = (
+        0.3 * np.sin(2 * np.pi * 200 * t) +
+        0.2 * np.sin(2 * np.pi * 400 * t) +
+        0.15 * np.sin(2 * np.pi * 600 * t)
+    )
+    # Add envelope to simulate speech patterns
+    envelope = 0.5 + 0.5 * np.sin(2 * np.pi * 3 * t)
+    speech = speech * envelope
+    # Generate noise
+    noise = np.random.randn(len(t)) * 0.15
+    # Mix speech and noise (SNR ~10dB)
+    noisy = speech + noise
+    # Normalize
+    speech = speech / (np.max(np.abs(speech)) + 1e-8) * 0.9
+    noisy = noisy / (np.max(np.abs(noisy)) + 1e-8) * 0.9
+    return speech.astype(np.float32), noisy.astype(np.float32)
+def plot_comparison(clean, noisy, enhanced, sample_rate=16000):
+    """Plot waveforms and spectrograms for comparison"""
+    fig, axes = plt.subplots(3, 2, figsize=(12, 10))
+    # Time domain plots
+    t = np.arange(len(clean)) / sample_rate
+    axes[0, 0].plot(t, clean)
+    axes[0, 0].set_title('Clean Speech (Waveform)')
+    axes[0, 0].set_ylabel('Amplitude')
+    axes[0, 0].grid(True)
+    axes[1, 0].plot(t, noisy)
+    axes[1, 0].set_title('Noisy Speech (Waveform)')
+    axes[1, 0].set_ylabel('Amplitude')
+    axes[1, 0].grid(True)
+    axes[2, 0].plot(t, enhanced)
+    axes[2, 0].set_title('Enhanced Speech (Waveform)')
+    axes[2, 0].set_xlabel('Time (s)')
+    axes[2, 0].set_ylabel('Amplitude')
+    axes[2, 0].grid(True)
+    # Frequency domain plots (spectrograms)
+    from scipy import signal
+    for idx, (audio, title) in enumerate([
+        (clean, 'Clean Speech (Spectrogram)'),
+        (noisy, 'Noisy Speech (Spectrogram)'),
+        (enhanced, 'Enhanced Speech (Spectrogram)')
+    ]):
+        f, t_spec, Sxx = signal.spectrogram(audio, sample_rate, nperseg=512)
+        axes[idx, 1].pcolormesh(
+            t_spec, f, 10 * np.log10(Sxx + 1e-10),
+            shading='gouraud',
+            cmap='viridis'
+        )
+        axes[idx, 1].set_ylabel('Frequency (Hz)')
+        axes[idx, 1].set_title(title)
+        if idx == 2:
+            axes[idx, 1].set_xlabel('Time (s)')
+    plt.tight_layout()
+    plt.savefig('/mnt/user-data/outputs/denoising_comparison.png', dpi=150)
+    print("\n✓ Comparison plot saved to: denoising_comparison.png")
+    plt.close()
+def calculate_metrics(clean, enhanced):
+    """Calculate quality metrics"""
+    # Signal-to-Noise Ratio (SNR)
+    noise = clean - enhanced
+    signal_power = np.mean(clean ** 2)
+    noise_power = np.mean(noise ** 2)
+    snr = 10 * np.log10(signal_power / (noise_power + 1e-10))
+    # Mean Squared Error
+    mse = np.mean((clean - enhanced) ** 2)
+    # Root Mean Squared Error
+    rmse = np.sqrt(mse)
+    return {
+        'SNR (dB)': snr,
+        'MSE': mse,
+        'RMSE': rmse
+    }
+def main():
+    """Main example function"""
+    print("="*60)
+    print("DTLN Model Example for Alif E7 Ethos-U55")
+    print("="*60)
+    # 1. Create model
+    print("\n1. Creating DTLN model...")
+    dtln = create_lightweight_model(target_size_kb=100)
+    model = dtln.build_model()
+    print("   ✓ Model created")
+    # 2. Generate test audio
+    print("\n2. Generating test audio...")
+    clean, noisy = generate_test_audio(duration=2.0)
+    print(f"   ✓ Generated {len(clean)/16000:.1f}s of audio")
+    print(f"   ✓ Clean audio range: [{np.min(clean):.3f}, {np.max(clean):.3f}]")
+    print(f"   ✓ Noisy audio range: [{np.min(noisy):.3f}, {np.max(noisy):.3f}]")
+    # Save test audio
+    sf.write('/mnt/user-data/outputs/test_clean.wav', clean, 16000)
+    sf.write('/mnt/user-data/outputs/test_noisy.wav', noisy, 16000)
+    print("   ✓ Saved: test_clean.wav, test_noisy.wav")
+    # 3. Process through model (random weights, not trained yet)
+    print("\n3. Processing through model...")
+    print("   ⚠ Note: Model has random weights (not trained)")
+    # Expand dims for batch
+    noisy_batch = np.expand_dims(noisy, 0)
+    # Forward pass
+    enhanced = model.predict(noisy_batch, verbose=0)
+    enhanced = enhanced[0]  # Remove batch dimension
+    print("   ✓ Processing complete")
+    print(f"   ✓ Enhanced audio range: [{np.min(enhanced):.3f}, {np.max(enhanced):.3f}]")
+    # Save enhanced audio
+    sf.write('/mnt/user-data/outputs/test_enhanced.wav', enhanced, 16000)
+    print("   ✓ Saved: test_enhanced.wav")
+    # 4. Calculate metrics
+    print("\n4. Quality Metrics:")
+    metrics = calculate_metrics(clean, enhanced)
+    for metric_name, value in metrics.items():
+        print(f"   {metric_name}: {value:.4f}")
+    print("\n   ⚠ Note: These metrics are poor because model is untrained")
+    print("   After training, expect SNR improvement of 10-15 dB")
+    # 5. Plot comparison
+    print("\n5. Creating visualization...")
+    plot_comparison(clean, noisy, enhanced)
+    # 6. Show model info
+    print("\n6. Model Information:")
+    print(f"   Parameters: {model.count_params():,}")
+    print(f"   Layers: {len(model.layers)}")
+    print(f"   Input shape: {model.input_shape}")
+    print(f"   Output shape: {model.output_shape}")
+    # 7. Build stateful models
+    print("\n7. Building stateful models for real-time inference...")
+    stage1, stage2 = dtln.build_stateful_model()
+    print(f"   ✓ Stage 1 parameters: {stage1.count_params():,}")
+    print(f"   ✓ Stage 2 parameters: {stage2.count_params():,}")
+    print("\n" + "="*60)
+    print("✓ Example complete!")
+    print("\nGenerated files:")
+    print("  - test_clean.wav")
+    print("  - test_noisy.wav")
+    print("  - test_enhanced.wav")
+    print("  - denoising_comparison.png")
+    print("\nNext steps:")
+    print("  1. Train the model: python train_dtln.py --help")
+    print("  2. Convert to TFLite: python convert_to_tflite.py --help")
+    print("  3. Deploy to Alif E7")
+    print("="*60)
+if __name__ == "__main__":
+    main()