Spaces:
Runtime error
Runtime error
| import numpy as np | |
| import librosa | |
| from sklearn.cluster import DBSCAN | |
| from pydub import AudioSegment | |
| def extract_voice_features(audio_path, segment_duration=1000): | |
| # Load the audio file | |
| y, sr = librosa.load(audio_path) | |
| # Extract MFCC features | |
| mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13) | |
| # Segment the MFCCs | |
| segment_length = int(segment_duration * sr / 1000) | |
| num_segments = len(y) // segment_length | |
| segments = [] | |
| for i in range(num_segments): | |
| start = i * segment_length | |
| end = start + segment_length | |
| segment = mfccs[:, start:end] | |
| segments.append(np.mean(segment, axis=1)) | |
| return np.array(segments) | |
| def cluster_voices(features): | |
| if len(features) < 2: | |
| print("Not enough voice segments for clustering. Assigning all to one cluster.") | |
| return np.zeros(len(features), dtype=int) | |
| dbscan = DBSCAN(eps=0.5, min_samples=5, metric='euclidean') | |
| clusters = dbscan.fit_predict(features) | |
| if np.all(clusters == -1): | |
| print("DBSCAN assigned all to noise. Considering as one cluster.") | |
| return np.zeros(len(features), dtype=int) | |
| return clusters | |
| def get_most_frequent_voice(features, clusters): | |
| largest_cluster = max(set(clusters), key=list(clusters).count) | |
| return features[clusters == largest_cluster] | |
| def process_audio(audio_path, segment_duration=1000): | |
| features = extract_voice_features(audio_path, segment_duration) | |
| clusters = cluster_voices(features) | |
| most_frequent_voice = get_most_frequent_voice(features, clusters) | |
| return most_frequent_voice, features, clusters |