Spaces:
Running
Running
Do not run whisper on very short VAD segments
Browse files
vad.py
CHANGED
|
@@ -32,6 +32,9 @@ SEGMENT_PADDING_RIGHT = 3 # End detected segments late
|
|
| 32 |
# Whether to attempt to transcribe non-speech
|
| 33 |
TRANSCRIBE_NON_SPEECH = False
|
| 34 |
|
|
|
|
|
|
|
|
|
|
| 35 |
class AbstractTranscription(ABC):
|
| 36 |
def __init__(self, segment_padding_left: int = None, segment_padding_right = None, max_silent_period: int = None, max_merge_size: int = None, transcribe_non_speech: bool = False):
|
| 37 |
self.sampling_rate = 16000
|
|
@@ -108,6 +111,9 @@ class AbstractTranscription(ABC):
|
|
| 108 |
|
| 109 |
segment_duration = segment_end - segment_start
|
| 110 |
|
|
|
|
|
|
|
|
|
|
| 111 |
segment_audio = self.get_audio_segment(audio, start_time = str(segment_start), duration = str(segment_duration))
|
| 112 |
|
| 113 |
print("Running whisper from ", format_timestamp(segment_start), " to ", format_timestamp(segment_end), ", duration: ", segment_duration, "gap: ", segment_gap)
|
|
|
|
| 32 |
# Whether to attempt to transcribe non-speech
|
| 33 |
TRANSCRIBE_NON_SPEECH = False
|
| 34 |
|
| 35 |
+
# Minimum size of segments to process
|
| 36 |
+
MIN_SEGMENT_DURATION = 1
|
| 37 |
+
|
| 38 |
class AbstractTranscription(ABC):
|
| 39 |
def __init__(self, segment_padding_left: int = None, segment_padding_right = None, max_silent_period: int = None, max_merge_size: int = None, transcribe_non_speech: bool = False):
|
| 40 |
self.sampling_rate = 16000
|
|
|
|
| 111 |
|
| 112 |
segment_duration = segment_end - segment_start
|
| 113 |
|
| 114 |
+
if segment_duration < MIN_SEGMENT_DURATION:
|
| 115 |
+
continue;
|
| 116 |
+
|
| 117 |
segment_audio = self.get_audio_segment(audio, start_time = str(segment_start), duration = str(segment_duration))
|
| 118 |
|
| 119 |
print("Running whisper from ", format_timestamp(segment_start), " to ", format_timestamp(segment_end), ", duration: ", segment_duration, "gap: ", segment_gap)
|