Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	If the user does not choose a language for Whisper,
Browse filesthe detected language by Whisper will be automatically set for the nllb model to avoid abnormal errors when determining the source language in nllb.
- app.py +16 -12
- src/vad.py +2 -2
- webui.bat +1 -1
    	
        app.py
    CHANGED
    
    | @@ -20,9 +20,7 @@ from src.diarization.diarizationContainer import DiarizationContainer | |
| 20 | 
             
            from src.hooks.progressListener import ProgressListener
         | 
| 21 | 
             
            from src.hooks.subTaskProgressListener import SubTaskProgressListener
         | 
| 22 | 
             
            from src.hooks.whisperProgressHook import create_progress_listener_handle
         | 
| 23 | 
            -
            from src.languages import _TO_LANGUAGE_CODE
         | 
| 24 | 
            -
            from src.languages import get_language_names
         | 
| 25 | 
            -
            from src.languages import get_language_from_name
         | 
| 26 | 
             
            from src.modelCache import ModelCache
         | 
| 27 | 
             
            from src.prompts.jsonPromptStrategy import JsonPromptStrategy
         | 
| 28 | 
             
            from src.prompts.prependPromptStrategy import PrependPromptStrategy
         | 
| @@ -269,6 +267,10 @@ class WhisperTranscriber: | |
| 269 |  | 
| 270 | 
             
                                # Transcribe
         | 
| 271 | 
             
                                result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
         | 
|  | |
|  | |
|  | |
|  | |
| 272 | 
             
                                short_name, suffix = source.get_short_name_suffix(max_length=self.app_config.input_max_file_name_length)
         | 
| 273 | 
             
                                filePrefix = slugify(source_prefix + short_name, allow_unicode=True)
         | 
| 274 |  | 
| @@ -700,8 +702,8 @@ def create_ui(app_config: ApplicationConfig): | |
| 700 |  | 
| 701 | 
             
                common_output = lambda : [
         | 
| 702 | 
             
                    gr.File(label="Download"),
         | 
| 703 | 
            -
                    gr.Text(label="Transcription"),
         | 
| 704 | 
            -
                    gr.Text(label="Segments"),
         | 
| 705 | 
             
                ]
         | 
| 706 |  | 
| 707 | 
             
                is_queue_mode = app_config.queue_concurrency_count is not None and app_config.queue_concurrency_count > 0
         | 
| @@ -863,13 +865,15 @@ if __name__ == '__main__': | |
| 863 |  | 
| 864 | 
             
                updated_config = default_app_config.update(**args)
         | 
| 865 |  | 
| 866 | 
            -
                #updated_config.whisper_implementation = "faster-whisper"
         | 
| 867 | 
            -
                #updated_config.input_audio_max_duration = -1
         | 
| 868 | 
            -
                #updated_config.default_model_name = "large-v2"
         | 
| 869 | 
            -
                #updated_config.output_dir = "output"
         | 
| 870 | 
            -
                #updated_config.vad_max_merge_size = 90
         | 
| 871 | 
            -
                #updated_config.merge_subtitle_with_sources =  | 
| 872 | 
            -
                #updated_config.autolaunch = True
         | 
|  | |
|  | |
| 873 |  | 
| 874 | 
             
                if (threads := args.pop("threads")) > 0:
         | 
| 875 | 
             
                    torch.set_num_threads(threads)
         | 
|  | |
| 20 | 
             
            from src.hooks.progressListener import ProgressListener
         | 
| 21 | 
             
            from src.hooks.subTaskProgressListener import SubTaskProgressListener
         | 
| 22 | 
             
            from src.hooks.whisperProgressHook import create_progress_listener_handle
         | 
| 23 | 
            +
            from src.languages import _TO_LANGUAGE_CODE, get_language_names, get_language_from_name, get_language_from_code
         | 
|  | |
|  | |
| 24 | 
             
            from src.modelCache import ModelCache
         | 
| 25 | 
             
            from src.prompts.jsonPromptStrategy import JsonPromptStrategy
         | 
| 26 | 
             
            from src.prompts.prependPromptStrategy import PrependPromptStrategy
         | 
|  | |
| 267 |  | 
| 268 | 
             
                                # Transcribe
         | 
| 269 | 
             
                                result = self.transcribe_file(model, source.source_path, selectedLanguage, task, vadOptions, scaled_progress_listener, **decodeOptions)
         | 
| 270 | 
            +
                                if whisper_lang is None and result["language"] is not None and len(result["language"]) > 0:
         | 
| 271 | 
            +
                                    whisper_lang = get_language_from_code(result["language"])
         | 
| 272 | 
            +
                                    nllb_model.whisper_lang = whisper_lang
         | 
| 273 | 
            +
                                    
         | 
| 274 | 
             
                                short_name, suffix = source.get_short_name_suffix(max_length=self.app_config.input_max_file_name_length)
         | 
| 275 | 
             
                                filePrefix = slugify(source_prefix + short_name, allow_unicode=True)
         | 
| 276 |  | 
|  | |
| 702 |  | 
| 703 | 
             
                common_output = lambda : [
         | 
| 704 | 
             
                    gr.File(label="Download"),
         | 
| 705 | 
            +
                    gr.Text(label="Transcription", autoscroll=False),
         | 
| 706 | 
            +
                    gr.Text(label="Segments", autoscroll=False),
         | 
| 707 | 
             
                ]
         | 
| 708 |  | 
| 709 | 
             
                is_queue_mode = app_config.queue_concurrency_count is not None and app_config.queue_concurrency_count > 0
         | 
|  | |
| 865 |  | 
| 866 | 
             
                updated_config = default_app_config.update(**args)
         | 
| 867 |  | 
| 868 | 
            +
                # updated_config.whisper_implementation = "faster-whisper"
         | 
| 869 | 
            +
                # updated_config.input_audio_max_duration = -1
         | 
| 870 | 
            +
                # updated_config.default_model_name = "large-v2"
         | 
| 871 | 
            +
                # updated_config.output_dir = "output"
         | 
| 872 | 
            +
                # updated_config.vad_max_merge_size = 90
         | 
| 873 | 
            +
                # updated_config.merge_subtitle_with_sources = False
         | 
| 874 | 
            +
                # updated_config.autolaunch = True
         | 
| 875 | 
            +
                # updated_config.auto_parallel = False
         | 
| 876 | 
            +
                # updated_config.save_downloaded_files = True
         | 
| 877 |  | 
| 878 | 
             
                if (threads := args.pop("threads")) > 0:
         | 
| 879 | 
             
                    torch.set_num_threads(threads)
         | 
    	
        src/vad.py
    CHANGED
    
    | @@ -205,7 +205,7 @@ class AbstractTranscription(ABC): | |
| 205 | 
             
                            # Detected language
         | 
| 206 | 
             
                            detected_language = languageCounter.most_common(1)[0][0] if len(languageCounter) > 0 else None
         | 
| 207 |  | 
| 208 | 
            -
                            print("Running whisper from ", format_timestamp(segment_start), " to ", format_timestamp(segment_end), ", duration: ", 
         | 
| 209 | 
             
                                segment_duration, "expanded: ", segment_expand_amount, ", prompt: ", segment_prompt, ", detected language: ", detected_language)
         | 
| 210 |  | 
| 211 | 
             
                            perf_start_time = time.perf_counter()
         | 
| @@ -217,7 +217,7 @@ class AbstractTranscription(ABC): | |
| 217 | 
             
                            segment_result = whisperCallable.invoke(segment_audio, segment_index, segment_prompt, detected_language, progress_listener=scaled_progress_listener)
         | 
| 218 |  | 
| 219 | 
             
                            perf_end_time = time.perf_counter()
         | 
| 220 | 
            -
                            print(" | 
| 221 |  | 
| 222 | 
             
                            adjusted_segments = self.adjust_timestamp(segment_result["segments"], adjust_seconds=segment_start, max_source_time=segment_duration)
         | 
| 223 |  | 
|  | |
| 205 | 
             
                            # Detected language
         | 
| 206 | 
             
                            detected_language = languageCounter.most_common(1)[0][0] if len(languageCounter) > 0 else None
         | 
| 207 |  | 
| 208 | 
            +
                            print(f"Running whisper {idx}: from ", format_timestamp(segment_start), " to ", format_timestamp(segment_end), ", duration: ", 
         | 
| 209 | 
             
                                segment_duration, "expanded: ", segment_expand_amount, ", prompt: ", segment_prompt, ", detected language: ", detected_language)
         | 
| 210 |  | 
| 211 | 
             
                            perf_start_time = time.perf_counter()
         | 
|  | |
| 217 | 
             
                            segment_result = whisperCallable.invoke(segment_audio, segment_index, segment_prompt, detected_language, progress_listener=scaled_progress_listener)
         | 
| 218 |  | 
| 219 | 
             
                            perf_end_time = time.perf_counter()
         | 
| 220 | 
            +
                            print("\tWhisper took {} seconds".format(perf_end_time - perf_start_time))
         | 
| 221 |  | 
| 222 | 
             
                            adjusted_segments = self.adjust_timestamp(segment_result["segments"], adjust_seconds=segment_start, max_source_time=segment_duration)
         | 
| 223 |  | 
    	
        webui.bat
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 | 
             
            @echo off
         | 
| 2 |  | 
| 3 | 
             
            :: The source of the webui.bat file is stable-diffusion-webui
         | 
| 4 | 
            -
            set COMMANDLINE_ARGS=--whisper_implementation faster-whisper --input_audio_max_duration -1 --default_model_name large-v2 --auto_parallel True --output_dir output --vad_max_merge_size 90 -- | 
| 5 |  | 
| 6 | 
             
            if not defined PYTHON (set PYTHON=python)
         | 
| 7 | 
             
            if not defined VENV_DIR (set "VENV_DIR=%~dp0%venv")
         | 
|  | |
| 1 | 
             
            @echo off
         | 
| 2 |  | 
| 3 | 
             
            :: The source of the webui.bat file is stable-diffusion-webui
         | 
| 4 | 
            +
            set COMMANDLINE_ARGS=--whisper_implementation faster-whisper --input_audio_max_duration -1 --default_model_name large-v2 --auto_parallel True --output_dir output --vad_max_merge_size 90 --save_downloaded_files --autolaunch
         | 
| 5 |  | 
| 6 | 
             
            if not defined PYTHON (set PYTHON=python)
         | 
| 7 | 
             
            if not defined VENV_DIR (set "VENV_DIR=%~dp0%venv")
         |