Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| from speechbrain.inference import EncoderClassifier | |
| import torch | |
| import requests | |
| import subprocess | |
| import os | |
| import uuid | |
| import yt_dlp | |
| model = None # Lazy-loaded model | |
| def get_model(): | |
| global model | |
| if model is None: | |
| model = EncoderClassifier.from_hparams("Jzuluaga/accent-id-commonaccent_ecapa") | |
| return model | |
| def extract_id_from_url(url): | |
| urlRet = url.split("/")[-1] | |
| if '?' in urlRet: | |
| urlRet = urlRet.split("?")[0] | |
| return urlRet | |
| def fetch_loom_download_url(id): | |
| response = requests.post(url=f"https://www.loom.com/api/campaigns/sessions/{id}/transcoded-url") | |
| if response.status_code == 200: | |
| return response.json()["url"] | |
| else: | |
| print("Error while retrieving response: ", response.status_code) | |
| exit | |
| def download_loom_video(url, filename): | |
| headers = { | |
| "User-Agent": "Mozilla/5.0" | |
| } | |
| try: | |
| with requests.get(url, headers=headers, stream=True) as response: | |
| response.raise_for_status() | |
| with open(filename, "wb") as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| if chunk: | |
| f.write(chunk) | |
| print(f"Downloaded video to {filename}") | |
| return filename | |
| except requests.exceptions.RequestException as e: | |
| print(f"Failed to download Loom video: {e}") | |
| return None | |
| def download_direct_mp4(url, filename): | |
| try: | |
| response = requests.get(url, stream=True) | |
| response.raise_for_status() | |
| with open(filename, "wb") as f: | |
| for chunk in response.iter_content(chunk_size=8192): | |
| if chunk: | |
| f.write(chunk) | |
| return filename | |
| except Exception as e: | |
| print(f"Error downloading direct mp4: {e}") | |
| return None | |
| def download_video_from_url(url): | |
| if "loom.com" in url: | |
| video_id = extract_id_from_url(url) | |
| print(video_id) | |
| direct_url = fetch_loom_download_url(video_id) | |
| print(direct_url) | |
| filename = f"LoomVideo_{video_id}.mp4" | |
| success = download_loom_video(direct_url, filename) | |
| print(success) | |
| return filename if success else None | |
| elif url.endswith(".mp4"): | |
| filename = f"video_{uuid.uuid4()}.mp4" | |
| result = download_direct_mp4(url, filename) | |
| return result | |
| else: | |
| # fallback to yt_dlp for youtube, vimeo, etc. | |
| out_path = f"video_{uuid.uuid4()}.mp4" | |
| ydl_opts = { | |
| 'format': 'bestaudio/best', | |
| 'outtmpl': out_path, | |
| 'quiet': True, | |
| } | |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: | |
| ydl.download([url]) | |
| return out_path | |
| def extract_audio(video_file): | |
| audio_path = f"audio_{uuid.uuid4()}.wav" | |
| cmd = [ | |
| "ffmpeg", "-i", video_file, "-vn", | |
| "-acodec", "pcm_s16le", "-ac", "1", "-ar", "16000", | |
| audio_path, "-y" | |
| ] | |
| subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) | |
| return audio_path | |
| def classify_accent(input_file_or_url): | |
| model = get_model() | |
| # Check if it's a URL | |
| if isinstance(input_file_or_url, str) and input_file_or_url.startswith("http"): | |
| video_path = download_video_from_url(input_file_or_url) | |
| else: | |
| video_path = input_file_or_url.name if hasattr(input_file_or_url, "name") else input_file_or_url | |
| audio_path = extract_audio(video_path) | |
| out_probs, top_prob, top_idx, label = model.classify_file(audio_path) | |
| top_labels = model.hparams.label_encoder.decode_ndim(torch.topk(out_probs, 3).indices.squeeze()) | |
| confidences = torch.topk(out_probs, 3).values.squeeze().tolist() | |
| result = "\n".join([f"{l}: {p*100:.2f}%" for l, p in zip(top_labels, confidences)]) | |
| return label[0], f"{top_prob.item()*100:.2f}%", result | |
| # Gradio UI | |
| with gr.Blocks() as demo: | |
| gr.Markdown("# Accent Identifier") | |
| gr.Markdown( | |
| "Upload a video or audio file, or paste a link (e.g. direct .mp4 URL or Loom video) to identify the speaker's accent." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| input_file = gr.File(label="Upload video/audio file", file_types=[".mp4", ".wav", ".mp3"]) | |
| url_input = gr.Textbox(label="...or paste a direct mp4 URL/loom link") | |
| submit_btn = gr.Button("Classify Accent") | |
| with gr.Column(): | |
| label_output = gr.Textbox(label="Top Prediction") | |
| confidence_output = gr.Textbox(label="Confidence") | |
| top3_output = gr.Textbox(label="Top 3 Predictions") | |
| def handle_inputs(file, url): | |
| if url: | |
| return classify_accent(url) | |
| elif file: | |
| return classify_accent(file) | |
| else: | |
| return "No input", "", "" | |
| submit_btn.click(handle_inputs, inputs=[input_file, url_input], outputs=[label_output, confidence_output, top3_output]) | |
| if __name__ == "__main__": | |
| demo.launch(share=True) | |