Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -173,7 +173,23 @@ def wget_method(url):
|
|
| 173 |
def trim_audio(audio_path, start_time, end_time):
|
| 174 |
logging.info(f"Trimming audio from {start_time} to {end_time}")
|
| 175 |
audio = AudioSegment.from_file(audio_path)
|
| 176 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 177 |
trimmed_audio_path = tempfile.mktemp(suffix='.wav')
|
| 178 |
trimmed_audio.export(trimmed_audio_path, format="wav")
|
| 179 |
logging.info(f"Trimmed audio saved to: {trimmed_audio_path}")
|
|
@@ -197,6 +213,20 @@ def get_model_options(pipeline_type):
|
|
| 197 |
return []
|
| 198 |
|
| 199 |
def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
try:
|
| 201 |
logging.info(f"Transcription parameters: pipeline_type={pipeline_type}, model_id={model_id}, dtype={dtype}, batch_size={batch_size}, download_method={download_method}")
|
| 202 |
verbose_messages = f"Starting transcription with parameters:\nPipeline Type: {pipeline_type}\nModel ID: {model_id}\nData Type: {dtype}\nBatch Size: {batch_size}\nDownload Method: {download_method}\n"
|
|
@@ -243,8 +273,11 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
| 243 |
else:
|
| 244 |
audio_path = input_source
|
| 245 |
|
|
|
|
|
|
|
|
|
|
| 246 |
if start_time is not None or end_time is not None:
|
| 247 |
-
trimmed_audio_path = trim_audio(audio_path, start_time
|
| 248 |
audio_path = trimmed_audio_path
|
| 249 |
verbose_messages += f"Audio trimmed from {start_time} to {end_time}\n"
|
| 250 |
if verbose:
|
|
@@ -289,23 +322,23 @@ def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, d
|
|
| 289 |
yield f"An error occurred: {str(e)}", "", None
|
| 290 |
|
| 291 |
finally:
|
| 292 |
-
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
|
| 303 |
with gr.Blocks() as iface:
|
| 304 |
gr.Markdown("# Multi-Pipeline Transcription")
|
| 305 |
gr.Markdown("Transcribe audio using multiple pipelines and models.")
|
| 306 |
|
| 307 |
with gr.Row():
|
| 308 |
-
input_source = gr.
|
| 309 |
pipeline_type = gr.Dropdown(
|
| 310 |
choices=["faster-batched", "faster-sequenced", "transformers"],
|
| 311 |
label="Pipeline Type",
|
|
@@ -327,8 +360,8 @@ with gr.Blocks() as iface:
|
|
| 327 |
)
|
| 328 |
|
| 329 |
with gr.Row():
|
| 330 |
-
start_time = gr.Number(label="Start Time (seconds)", value=0)
|
| 331 |
-
end_time = gr.Number(label="End Time (seconds)", value=0)
|
| 332 |
verbose = gr.Checkbox(label="Verbose Output", value=True) # Set to True by default
|
| 333 |
|
| 334 |
transcribe_button = gr.Button("Transcribe")
|
|
@@ -366,9 +399,9 @@ with gr.Blocks() as iface:
|
|
| 366 |
|
| 367 |
gr.Examples(
|
| 368 |
examples=[
|
| 369 |
-
|
| 370 |
-
|
| 371 |
-
|
| 372 |
],
|
| 373 |
inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
|
| 374 |
)
|
|
|
|
| 173 |
def trim_audio(audio_path, start_time, end_time):
|
| 174 |
logging.info(f"Trimming audio from {start_time} to {end_time}")
|
| 175 |
audio = AudioSegment.from_file(audio_path)
|
| 176 |
+
audio_duration = len(audio) / 1000 # Duration in seconds
|
| 177 |
+
|
| 178 |
+
# Default start and end times if None
|
| 179 |
+
if start_time is None:
|
| 180 |
+
start_time = 0
|
| 181 |
+
if end_time is None or end_time > audio_duration:
|
| 182 |
+
end_time = audio_duration
|
| 183 |
+
|
| 184 |
+
# Validate times
|
| 185 |
+
if start_time < 0 or end_time < 0:
|
| 186 |
+
raise ValueError("Start time and end time must be non-negative.")
|
| 187 |
+
if start_time >= end_time:
|
| 188 |
+
raise gr.Error("End time must be greater than start time.")
|
| 189 |
+
if start_time > audio_duration:
|
| 190 |
+
raise ValueError("Start time exceeds audio duration.")
|
| 191 |
+
|
| 192 |
+
trimmed_audio = audio[start_time * 1000:end_time * 1000]
|
| 193 |
trimmed_audio_path = tempfile.mktemp(suffix='.wav')
|
| 194 |
trimmed_audio.export(trimmed_audio_path, format="wav")
|
| 195 |
logging.info(f"Trimmed audio saved to: {trimmed_audio_path}")
|
|
|
|
| 213 |
return []
|
| 214 |
|
| 215 |
def transcribe_audio(input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time=None, end_time=None, verbose=False):
|
| 216 |
+
try:
|
| 217 |
+
# Determine if input_source is a URL or file
|
| 218 |
+
if isinstance(input_source, str):
|
| 219 |
+
if input_source.startswith('http://') or input_source.startswith('https://'):
|
| 220 |
+
audio_path = download_audio(input_source, download_method)
|
| 221 |
+
# Handle potential errors during download
|
| 222 |
+
if not audio_path or audio_path.startswith("Error"):
|
| 223 |
+
yield f"Error: {audio_path}", "", None
|
| 224 |
+
return
|
| 225 |
+
else:
|
| 226 |
+
# Assume input_source is an uploaded file object
|
| 227 |
+
audio_path = input_source.name
|
| 228 |
+
logging.info(f"Using uploaded audio file: {audio_path}")
|
| 229 |
+
|
| 230 |
try:
|
| 231 |
logging.info(f"Transcription parameters: pipeline_type={pipeline_type}, model_id={model_id}, dtype={dtype}, batch_size={batch_size}, download_method={download_method}")
|
| 232 |
verbose_messages = f"Starting transcription with parameters:\nPipeline Type: {pipeline_type}\nModel ID: {model_id}\nData Type: {dtype}\nBatch Size: {batch_size}\nDownload Method: {download_method}\n"
|
|
|
|
| 273 |
else:
|
| 274 |
audio_path = input_source
|
| 275 |
|
| 276 |
+
start_time = float(start_time) if start_time else None
|
| 277 |
+
end_time = float(end_time) if end_time else None
|
| 278 |
+
|
| 279 |
if start_time is not None or end_time is not None:
|
| 280 |
+
trimmed_audio_path = trim_audio(audio_path, start_time, end_time)
|
| 281 |
audio_path = trimmed_audio_path
|
| 282 |
verbose_messages += f"Audio trimmed from {start_time} to {end_time}\n"
|
| 283 |
if verbose:
|
|
|
|
| 322 |
yield f"An error occurred: {str(e)}", "", None
|
| 323 |
|
| 324 |
finally:
|
| 325 |
+
# Remove downloaded audio file
|
| 326 |
+
if audio_path and os.path.exists(audio_path):
|
| 327 |
+
os.remove(audio_path)
|
| 328 |
+
# Remove trimmed audio file
|
| 329 |
+
if 'trimmed_audio_path' in locals() and os.path.exists(trimmed_audio_path):
|
| 330 |
+
os.remove(trimmed_audio_path)
|
| 331 |
+
# Remove transcription file if needed
|
| 332 |
+
if transcription_file and os.path.exists(transcription_file):
|
| 333 |
+
os.remove(transcription_file)
|
| 334 |
+
|
| 335 |
|
| 336 |
with gr.Blocks() as iface:
|
| 337 |
gr.Markdown("# Multi-Pipeline Transcription")
|
| 338 |
gr.Markdown("Transcribe audio using multiple pipelines and models.")
|
| 339 |
|
| 340 |
with gr.Row():
|
| 341 |
+
input_source = gr.File(label="Audio Source (Upload a file or enter a URL/YouTube URL)")
|
| 342 |
pipeline_type = gr.Dropdown(
|
| 343 |
choices=["faster-batched", "faster-sequenced", "transformers"],
|
| 344 |
label="Pipeline Type",
|
|
|
|
| 360 |
)
|
| 361 |
|
| 362 |
with gr.Row():
|
| 363 |
+
start_time = gr.Number(label="Start Time (seconds)", value=None, minimum=0)
|
| 364 |
+
end_time = gr.Number(label="End Time (seconds)", value=None, minimum=0)
|
| 365 |
verbose = gr.Checkbox(label="Verbose Output", value=True) # Set to True by default
|
| 366 |
|
| 367 |
transcribe_button = gr.Button("Transcribe")
|
|
|
|
| 399 |
|
| 400 |
gr.Examples(
|
| 401 |
examples=[
|
| 402 |
+
["https://www.youtube.com/watch?v=daQ_hqA6HDo", "faster-batched", "cstr/whisper-large-v3-turbo-int8_float32", "int8", 16, "yt-dlp", None, None, True],
|
| 403 |
+
["https://mcdn.podbean.com/mf/web/dir5wty678b6g4vg/HoP_453_-_The_Price_is_Right_-_Law_and_Economics_in_the_Second_Scholastic5yxzh.mp3", "faster-sequenced", "deepdml/faster-whisper-large-v3-turbo-ct2", "float16", 1, "ffmpeg", 0, 300, True],
|
| 404 |
+
[None, "transformers", "openai/whisper-large-v3", "float16", 16, "yt-dlp", 60, 180, True]
|
| 405 |
],
|
| 406 |
inputs=[input_source, pipeline_type, model_id, dtype, batch_size, download_method, start_time, end_time, verbose],
|
| 407 |
)
|