whisper-webui-translate

Running

avans06 commited on Jul 15, 2023

Commit

4c650d7

1 Parent(s): b6ac700

app.py has added the following arguments:

vad_max_merge_size, language, save_downloaded_files, and autolaunch.

vad_max_merge_size:
The number of VAD - Max Merge Size (s).

language:
The language spoken in the audio. Specify None to perform language detection.

save_downloaded_files:
True to move downloaded files to outputs.

autolaunch:
Open the webui URL in the system's default browser upon launch.

webui.bat, has been added.
This file is sourced from stable-diffusion-webui.

When downloading YouTube, the format has been changed from bestaudio to bestvideo[ext=mp4]+bestaudio[ext=m4a].

Files changed (8) hide show

.gitignore +6 -2
app.py +21 -1
dockerfile +2 -2
requirements-fasterWhisper.txt +2 -2
requirements-whisper.txt +2 -2
requirements.txt +6 -6
src/download.py +2 -2
webui.bat +73 -0

.gitignore CHANGED Viewed

@@ -1,6 +1,10 @@
-# Byte-compiled / optimized / DLL files
 __pycache__/
 .vscode/
 flagged/
 *.py[cod]
-*$py.class

+# Byte-compiled / optimized / DLL files
 __pycache__/
 .vscode/
 flagged/
 *.py[cod]
+*$py.class
+.vs/
+output/
+tmp/
+venv/

app.py CHANGED Viewed

@@ -17,6 +17,7 @@ from src.config import VAD_INITIAL_PROMPT_MODE_VALUES, ApplicationConfig, VadIni
 from src.hooks.progressListener import ProgressListener
 from src.hooks.subTaskProgressListener import SubTaskProgressListener
 from src.hooks.whisperProgressHook import create_progress_listener_handle
 from src.languages import get_language_names
 from src.modelCache import ModelCache
 from src.prompts.jsonPromptStrategy import JsonPromptStrategy
@@ -36,6 +37,8 @@ from src.vad import AbstractTranscription, NonSpeechStrategy, PeriodicTranscript
 from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
 from src.whisper.whisperFactory import create_whisper_container
 # Configure more application defaults in config.json5
 # Gradio seems to truncate files without keeping the extension, so we need to truncate the file prefix ourself
@@ -249,6 +252,14 @@ class WhisperTranscriber:
                 # Cleanup source
                 if self.deleteUploadedFiles:
                     for source in sources:
                         print("Deleting source file " + source.source_path)
                         try:
@@ -571,7 +582,7 @@ def create_ui(app_config: ApplicationConfig):
     else:
         print("Queue mode disabled - progress bars will not be shown.")
-    demo.launch(share=app_config.share, server_name=app_config.server_name, server_port=app_config.server_port)
     # Clean up
     ui.close()
@@ -616,6 +627,15 @@ if __name__ == '__main__':
                         help="the compute type to use for inference")
     parser.add_argument("--threads", type=optional_int, default=0,
                         help="number of threads used by torch for CPU inference; supercedes MKL_NUM_THREADS/OMP_NUM_THREADS")
     args = parser.parse_args().__dict__

 from src.hooks.progressListener import ProgressListener
 from src.hooks.subTaskProgressListener import SubTaskProgressListener
 from src.hooks.whisperProgressHook import create_progress_listener_handle
+from src.languages import _TO_LANGUAGE_CODE
 from src.languages import get_language_names
 from src.modelCache import ModelCache
 from src.prompts.jsonPromptStrategy import JsonPromptStrategy
 from src.whisper.abstractWhisperContainer import AbstractWhisperContainer
 from src.whisper.whisperFactory import create_whisper_container
+import shutil
 # Configure more application defaults in config.json5
 # Gradio seems to truncate files without keeping the extension, so we need to truncate the file prefix ourself
                 # Cleanup source
                 if self.deleteUploadedFiles:
                     for source in sources:
+                        if self.app_config.save_downloaded_files and self.app_config.output_dir is not None and urlData:
+                            print("Saving downloaded file [" + os.path.basename(source.source_path) + "]")
+                            try:
+                                shutil.copy(source.source_path, self.app_config.output_dir)
+                            except Exception as e:
+                                # Ignore error - it's just a cleanup
+                                print("Error saving downloaded file " + source.source_path + ": " + str(e))
                         print("Deleting source file " + source.source_path)
                         try:
     else:
         print("Queue mode disabled - progress bars will not be shown.")
+    demo.launch(inbrowser=app_config.autolaunch, share=app_config.share, server_name=app_config.server_name, server_port=app_config.server_port)
     # Clean up
     ui.close()
                         help="the compute type to use for inference")
     parser.add_argument("--threads", type=optional_int, default=0,
                         help="number of threads used by torch for CPU inference; supercedes MKL_NUM_THREADS/OMP_NUM_THREADS")
+    parser.add_argument("--vad_max_merge_size", type=int, default=default_app_config.vad_max_merge_size, \
+                        help="The number of VAD - Max Merge Size (s).") # 30
+    parser.add_argument("--language", type=str, default=None, choices=sorted(get_language_names()) + sorted([k.title() for k in _TO_LANGUAGE_CODE.keys()]),
+                        help="language spoken in the audio, specify None to perform language detection")
+    parser.add_argument("--save_downloaded_files", action='store_true', \
+                        help="True to move downloaded files to outputs.")
+    parser.add_argument("--autolaunch", action='store_true', \
+                        help="open the webui URL in the system's default browser upon launch")
     args = parser.parse_args().__dict__

dockerfile CHANGED Viewed

@@ -3,7 +3,7 @@
 FROM huggingface/transformers-pytorch-gpu
 EXPOSE 7860
-ARG WHISPER_IMPLEMENTATION=whisper
 ENV WHISPER_IMPLEMENTATION=${WHISPER_IMPLEMENTATION}
 ADD . /opt/whisper-webui/
@@ -27,4 +27,4 @@ ENV PYTHONUNBUFFERED=1
 WORKDIR /opt/whisper-webui/
 ENTRYPOINT ["python3"]
-CMD ["app.py", "--input_audio_max_duration", "-1", "--server_name", "0.0.0.0", "--auto_parallel", "True"]

 FROM huggingface/transformers-pytorch-gpu
 EXPOSE 7860
+ARG WHISPER_IMPLEMENTATION=faster-whisper
 ENV WHISPER_IMPLEMENTATION=${WHISPER_IMPLEMENTATION}
 ADD . /opt/whisper-webui/
 WORKDIR /opt/whisper-webui/
 ENTRYPOINT ["python3"]
+CMD ["app.py", "--whisper_implementation", "faster-whisper", "--input_audio_max_duration", "-1", "--server_name", "0.0.0.0", "--auto_parallel", "True"]

requirements-fasterWhisper.txt CHANGED Viewed

@@ -1,7 +1,7 @@
-ctranslate2
 faster-whisper
 ffmpeg-python==0.2.0
-gradio==3.23.0
 yt-dlp
 json5
 torch

+ctranslate2
 faster-whisper
 ffmpeg-python==0.2.0
+gradio==3.27.0
 yt-dlp
 json5
 torch

requirements-whisper.txt CHANGED Viewed

@@ -1,8 +1,8 @@
-git+https://github.com/huggingface/transformers
 git+https://github.com/openai/whisper.git
 transformers
 ffmpeg-python==0.2.0
-gradio==3.23.0
 yt-dlp
 torchaudio
 altair

+git+https://github.com/huggingface/transformers
 git+https://github.com/openai/whisper.git
 transformers
 ffmpeg-python==0.2.0
+gradio==3.27.0
 yt-dlp
 torchaudio
 altair

requirements.txt CHANGED Viewed

@@ -1,9 +1,9 @@
-git+https://github.com/huggingface/transformers
-git+https://github.com/openai/whisper.git
-transformers
 ffmpeg-python==0.2.0
-gradio==3.23.0
 yt-dlp
 torchaudio
-altair
-json5

+ctranslate2
+faster-whisper
 ffmpeg-python==0.2.0
+gradio==3.27.0
 yt-dlp
+json5
+torch
 torchaudio
+more_itertools

src/download.py CHANGED Viewed

@@ -23,13 +23,13 @@ def download_url(url: str, maxDuration: int = None, destinationDirectory: str =
             return _perform_download(url, maxDuration=maxDuration, outputTemplate="%(title).10s %(id)s.%(ext)s")
         pass
-def _perform_download(url: str, maxDuration: int = None, outputTemplate: str = None, destinationDirectory: str = None, playlistItems: str = "1"):
     # Create a temporary directory to store the downloaded files
     if destinationDirectory is None:
         destinationDirectory = mkdtemp()
     ydl_opts = {
-        "format": "bestaudio/best",
         'paths': {
             'home': destinationDirectory
         }

             return _perform_download(url, maxDuration=maxDuration, outputTemplate="%(title).10s %(id)s.%(ext)s")
         pass
+def _perform_download(url: str, maxDuration: int = None, outputTemplate: str = None, destinationDirectory: str = None, playlistItems: str = "1", onlyAudio: bool = False):
     # Create a temporary directory to store the downloaded files
     if destinationDirectory is None:
         destinationDirectory = mkdtemp()
     ydl_opts = {
+        "format": "bestaudio/best" if onlyAudio else "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best",
         'paths': {
             'home': destinationDirectory
         }

webui.bat ADDED Viewed

	@@ -0,0 +1,73 @@

+@echo off
+:: The source of the webui.bat file is stable-diffusion-webui
+set COMMANDLINE_ARGS=--whisper_implementation faster-whisper --input_audio_max_duration -1 --default_model_name large-v2 --auto_parallel True --output_dir output --vad_max_merge_size 90 --save_downloaded_files --autolaunch
+if not defined PYTHON (set PYTHON=python)
+if not defined VENV_DIR (set "VENV_DIR=%~dp0%venv")
+mkdir tmp 2>NUL
+%PYTHON% -c "" >tmp/stdout.txt 2>tmp/stderr.txt
+if %ERRORLEVEL% == 0 goto :check_pip
+echo Couldn't launch python
+goto :show_stdout_stderr
+:check_pip
+%PYTHON% -mpip --help >tmp/stdout.txt 2>tmp/stderr.txt
+if %ERRORLEVEL% == 0 goto :start_venv
+if "%PIP_INSTALLER_LOCATION%" == "" goto :show_stdout_stderr
+%PYTHON% "%PIP_INSTALLER_LOCATION%" >tmp/stdout.txt 2>tmp/stderr.txt
+if %ERRORLEVEL% == 0 goto :start_venv
+echo Couldn't install pip
+goto :show_stdout_stderr
+:start_venv
+if ["%VENV_DIR%"] == ["-"] goto :skip_venv
+if ["%SKIP_VENV%"] == ["1"] goto :skip_venv
+dir "%VENV_DIR%\Scripts\Python.exe" >tmp/stdout.txt 2>tmp/stderr.txt
+if %ERRORLEVEL% == 0 goto :activate_venv
+for /f "delims=" %%i in ('CALL %PYTHON% -c "import sys; print(sys.executable)"') do set PYTHON_FULLNAME="%%i"
+echo Creating venv in directory %VENV_DIR% using python %PYTHON_FULLNAME%
+%PYTHON_FULLNAME% -m venv "%VENV_DIR%" >tmp/stdout.txt 2>tmp/stderr.txt
+if %ERRORLEVEL% == 0 goto :activate_venv
+echo Unable to create venv in directory "%VENV_DIR%"
+goto :show_stdout_stderr
+:activate_venv
+set PYTHON="%VENV_DIR%\Scripts\Python.exe"
+echo venv %PYTHON%
+:skip_venv
+goto :launch
+:launch
+%PYTHON% app.py %COMMANDLINE_ARGS% %*
+pause
+exit /b
+:show_stdout_stderr
+echo.
+echo exit code: %errorlevel%
+for /f %%i in ("tmp\stdout.txt") do set size=%%~zi
+if %size% equ 0 goto :show_stderr
+echo.
+echo stdout:
+type tmp\stdout.txt
+:show_stderr
+for /f %%i in ("tmp\stderr.txt") do set size=%%~zi
+if %size% equ 0 goto :show_stderr
+echo.
+echo stderr:
+type tmp\stderr.txt
+:endofscript
+echo.
+echo Launch unsuccessful. Exiting.
+pause