transcribe_audio

Running

App Files Files Community

cstr commited on Oct 2, 2024

Commit

57968e0

verified ·

1 Parent(s): 26eb097

+vv

Browse files

Files changed (1) hide show

app.py +26 -4

app.py CHANGED Viewed

@@ -31,6 +31,7 @@ device = "cuda:0" if torch.cuda.is_available() else "cpu"
 def download_audio(url, method_choice):
     parsed_url = urlparse(url)
     if parsed_url.netloc in ['www.youtube.com', 'youtu.be', 'youtube.com']:
         return download_youtube_audio(url, method_choice)
     else:
@@ -47,12 +48,14 @@ def download_youtube_audio(url, method_choice):
     }
     method = methods.get(method_choice, youtube_dl_method)
     try:
         return method(url)
     except Exception as e:
         logging.error(f"Error downloading using {method_choice}: {str(e)}")
         return None
 def youtube_dl_method(url):
     ydl_opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
@@ -64,9 +67,11 @@ def youtube_dl_method(url):
     }
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info = ydl.extract_info(url, download=True)
         return f"{info['id']}.mp3"
 def pytube_method(url):
     from pytube import YouTube
     yt = YouTube(url)
     audio_stream = yt.streams.filter(only_audio=True).first()
@@ -74,9 +79,11 @@ def pytube_method(url):
     base, ext = os.path.splitext(out_file)
     new_file = base + '.mp3'
     os.rename(out_file, new_file)
     return new_file
 def youtube_dl_classic_method(url):
     ydl_opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
@@ -88,9 +95,11 @@ def youtube_dl_classic_method(url):
     }
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info = ydl.extract_info(url, download=True)
         return f"{info['id']}.mp3"
 def youtube_dl_alternative_method(url):
     ydl_opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
@@ -106,21 +115,27 @@ def youtube_dl_alternative_method(url):
     }
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info = ydl.extract_info(url, download=True)
         return f"{info['id']}.mp3"
 def ffmpeg_method(url):
     output_file = tempfile.mktemp(suffix='.mp3')
     command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
     subprocess.run(command, check=True, capture_output=True)
     return output_file
 def aria2_method(url):
     output_file = tempfile.mktemp(suffix='.mp3')
     command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
     subprocess.run(command, check=True, capture_output=True)
     return output_file
 def download_direct_audio(url, method_choice):
     if method_choice == 'wget':
         return wget_method(url)
     else:
@@ -129,6 +144,7 @@ def download_direct_audio(url, method_choice):
             if response.status_code == 200:
                 with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
                     temp_file.write(response.content)
                     return temp_file.name
             else:
                 raise Exception(f"Failed to download audio from {url}")
@@ -137,32 +153,38 @@ def download_direct_audio(url, method_choice):
             return None
 def wget_method(url):
     output_file = tempfile.mktemp(suffix='.mp3')
     command = ['wget', '-O', output_file, url]
     subprocess.run(command, check=True, capture_output=True)
     return output_file
 def trim_audio(audio_path, start_time, end_time):
     audio = AudioSegment.from_file(audio_path)
     trimmed_audio = audio[start_time*1000:end_time*1000] if end_time else audio[start_time*1000:]
     trimmed_audio_path = tempfile.mktemp(suffix='.wav')
     trimmed_audio.export(trimmed_audio_path, format="wav")
     return trimmed_audio_path
 def save_transcription(transcription):
     file_path = tempfile.mktemp(suffix='.txt')
     with open(file_path, 'w') as f:
         f.write(transcription)
     return file_path
 def get_model_options(pipeline_type):
     if pipeline_type == "faster-batched":
-        return ["cstr/whisper-large-v3-turbo-int8_float32"]
     elif pipeline_type == "faster-sequenced":
-        return ["deepdml/faster-whisper-large-v3-turbo-ct2"]
     elif pipeline_type == "transformers":
-        return ["openai/whisper-large-v3"]
-    return []
 def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
     try:

 def download_audio(url, method_choice):
     parsed_url = urlparse(url)
+    logging.info(f"Downloading audio from URL: {url} using method: {method_choice}")
     if parsed_url.netloc in ['www.youtube.com', 'youtu.be', 'youtube.com']:
         return download_youtube_audio(url, method_choice)
     else:
     }
     method = methods.get(method_choice, youtube_dl_method)
     try:
+        logging.info(f"Attempting to download YouTube audio using {method_choice}")
         return method(url)
     except Exception as e:
         logging.error(f"Error downloading using {method_choice}: {str(e)}")
         return None
 def youtube_dl_method(url):
+    logging.info("Using yt-dlp method")
     ydl_opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
     }
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info = ydl.extract_info(url, download=True)
+        logging.info(f"Downloaded YouTube audio: {info['id']}.mp3")
         return f"{info['id']}.mp3"
 def pytube_method(url):
+    logging.info("Using pytube method")
     from pytube import YouTube
     yt = YouTube(url)
     audio_stream = yt.streams.filter(only_audio=True).first()
     base, ext = os.path.splitext(out_file)
     new_file = base + '.mp3'
     os.rename(out_file, new_file)
+    logging.info(f"Downloaded and converted audio to: {new_file}")
     return new_file
 def youtube_dl_classic_method(url):
+    logging.info("Using youtube-dl classic method")
     ydl_opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
     }
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info = ydl.extract_info(url, download=True)
+        logging.info(f"Downloaded YouTube audio: {info['id']}.mp3")
         return f"{info['id']}.mp3"
 def youtube_dl_alternative_method(url):
+    logging.info("Using yt-dlp alternative method")
     ydl_opts = {
         'format': 'bestaudio/best',
         'postprocessors': [{
     }
     with yt_dlp.YoutubeDL(ydl_opts) as ydl:
         info = ydl.extract_info(url, download=True)
+        logging.info(f"Downloaded YouTube audio: {info['id']}.mp3")
         return f"{info['id']}.mp3"
 def ffmpeg_method(url):
+    logging.info("Using ffmpeg method")
     output_file = tempfile.mktemp(suffix='.mp3')
     command = ['ffmpeg', '-i', url, '-vn', '-acodec', 'libmp3lame', '-q:a', '2', output_file]
     subprocess.run(command, check=True, capture_output=True)
+    logging.info(f"Downloaded and converted audio to: {output_file}")
     return output_file
 def aria2_method(url):
+    logging.info("Using aria2 method")
     output_file = tempfile.mktemp(suffix='.mp3')
     command = ['aria2c', '--split=4', '--max-connection-per-server=4', '--out', output_file, url]
     subprocess.run(command, check=True, capture_output=True)
+    logging.info(f"Downloaded audio to: {output_file}")
     return output_file
 def download_direct_audio(url, method_choice):
+    logging.info(f"Downloading direct audio from: {url} using method: {method_choice}")
     if method_choice == 'wget':
         return wget_method(url)
     else:
             if response.status_code == 200:
                 with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as temp_file:
                     temp_file.write(response.content)
+                    logging.info(f"Downloaded direct audio to: {temp_file.name}")
                     return temp_file.name
             else:
                 raise Exception(f"Failed to download audio from {url}")
             return None
 def wget_method(url):
+    logging.info("Using wget method")
     output_file = tempfile.mktemp(suffix='.mp3')
     command = ['wget', '-O', output_file, url]
     subprocess.run(command, check=True, capture_output=True)
+    logging.info(f"Downloaded audio to: {output_file}")
     return output_file
 def trim_audio(audio_path, start_time, end_time):
+    logging.info(f"Trimming audio from {start_time} to {end_time}")
     audio = AudioSegment.from_file(audio_path)
     trimmed_audio = audio[start_time*1000:end_time*1000] if end_time else audio[start_time*1000:]
     trimmed_audio_path = tempfile.mktemp(suffix='.wav')
     trimmed_audio.export(trimmed_audio_path, format="wav")
+    logging.info(f"Trimmed audio saved to: {trimmed_audio_path}")
     return trimmed_audio_path
 def save_transcription(transcription):
     file_path = tempfile.mktemp(suffix='.txt')
     with open(file_path, 'w') as f:
         f.write(transcription)
+    logging.info(f"Transcription saved to: {file_path}")
     return file_path
 def get_model_options(pipeline_type):
     if pipeline_type == "faster-batched":
+        return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
     elif pipeline_type == "faster-sequenced":
+        return ["cstr/whisper-large-v3-turbo-int8_float32", "deepdml/faster-whisper-large-v3-turbo-ct2", "Systran/faster-whisper-large-v3", "GalaktischeGurke/primeline-whisper-large-v3-german-ct2"]
     elif pipeline_type == "transformers":
+        return ["openai/whisper-large-v3", "openai/whisper-large-v3-turbo", "primeline/whisper-large-v3-german"]
+    else:
+        return []
 def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
     try: