Commit
·
a6bbce6
1
Parent(s):
c5b9103
improve logging
Browse files
src/whisper_streaming/online_asr.py
CHANGED
|
@@ -69,6 +69,7 @@ class HypothesisBuffer:
|
|
| 69 |
return commit
|
| 70 |
|
| 71 |
def pop_commited(self, time):
|
|
|
|
| 72 |
while self.commited_in_buffer and self.commited_in_buffer[0][1] <= time:
|
| 73 |
self.commited_in_buffer.pop(0)
|
| 74 |
|
|
@@ -183,7 +184,8 @@ class OnlineASRProcessor:
|
|
| 183 |
if self.buffer_trimming_way == "sentence":
|
| 184 |
|
| 185 |
self.chunk_completed_sentence()
|
| 186 |
-
|
|
|
|
| 187 |
|
| 188 |
|
| 189 |
|
|
@@ -197,6 +199,7 @@ class OnlineASRProcessor:
|
|
| 197 |
|
| 198 |
|
| 199 |
self.chunk_completed_segment(res)
|
|
|
|
| 200 |
|
| 201 |
|
| 202 |
# alternative: on any word
|
|
@@ -215,9 +218,7 @@ class OnlineASRProcessor:
|
|
| 215 |
|
| 216 |
|
| 217 |
|
| 218 |
-
|
| 219 |
-
f"len of buffer now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}"
|
| 220 |
-
)
|
| 221 |
return self.to_flush(o)
|
| 222 |
|
| 223 |
def chunk_completed_sentence(self):
|
|
@@ -252,7 +253,9 @@ class OnlineASRProcessor:
|
|
| 252 |
|
| 253 |
t = self.commited[-1][1]
|
| 254 |
|
| 255 |
-
if len(ends)
|
|
|
|
|
|
|
| 256 |
|
| 257 |
e = ends[-2] + self.buffer_time_offset
|
| 258 |
while len(ends) > 2 and e > t:
|
|
@@ -263,16 +266,21 @@ class OnlineASRProcessor:
|
|
| 263 |
self.chunk_at(e)
|
| 264 |
else:
|
| 265 |
logger.debug(f"--- last segment not within commited area")
|
| 266 |
-
|
| 267 |
-
logger.debug(f"--- not enough segments to chunk")
|
| 268 |
|
| 269 |
def chunk_at(self, time):
|
| 270 |
"""trims the hypothesis and audio buffer at "time" """
|
|
|
|
|
|
|
| 271 |
self.transcript_buffer.pop_commited(time)
|
| 272 |
cut_seconds = time - self.buffer_time_offset
|
| 273 |
self.audio_buffer = self.audio_buffer[int(cut_seconds * self.SAMPLING_RATE) :]
|
| 274 |
self.buffer_time_offset = time
|
| 275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 276 |
def words_to_sentences(self, words):
|
| 277 |
"""Uses self.tokenize for sentence segmentation of words.
|
| 278 |
Returns: [(beg,end,"sentence 1"),...]
|
|
|
|
| 69 |
return commit
|
| 70 |
|
| 71 |
def pop_commited(self, time):
|
| 72 |
+
"Remove (from the beginning) of commited_in_buffer all the words that are finished before `time`"
|
| 73 |
while self.commited_in_buffer and self.commited_in_buffer[0][1] <= time:
|
| 74 |
self.commited_in_buffer.pop(0)
|
| 75 |
|
|
|
|
| 184 |
if self.buffer_trimming_way == "sentence":
|
| 185 |
|
| 186 |
self.chunk_completed_sentence()
|
| 187 |
+
|
| 188 |
+
|
| 189 |
|
| 190 |
|
| 191 |
|
|
|
|
| 199 |
|
| 200 |
|
| 201 |
self.chunk_completed_segment(res)
|
| 202 |
+
|
| 203 |
|
| 204 |
|
| 205 |
# alternative: on any word
|
|
|
|
| 218 |
|
| 219 |
|
| 220 |
|
| 221 |
+
|
|
|
|
|
|
|
| 222 |
return self.to_flush(o)
|
| 223 |
|
| 224 |
def chunk_completed_sentence(self):
|
|
|
|
| 253 |
|
| 254 |
t = self.commited[-1][1]
|
| 255 |
|
| 256 |
+
if len(ends) <= 1:
|
| 257 |
+
logger.debug(f"--- not enough segments to chunk (<=1 words)")
|
| 258 |
+
else:
|
| 259 |
|
| 260 |
e = ends[-2] + self.buffer_time_offset
|
| 261 |
while len(ends) > 2 and e > t:
|
|
|
|
| 266 |
self.chunk_at(e)
|
| 267 |
else:
|
| 268 |
logger.debug(f"--- last segment not within commited area")
|
| 269 |
+
|
|
|
|
| 270 |
|
| 271 |
def chunk_at(self, time):
|
| 272 |
"""trims the hypothesis and audio buffer at "time" """
|
| 273 |
+
logger.debug(f"chunking at {time:2.2f}s")
|
| 274 |
+
|
| 275 |
self.transcript_buffer.pop_commited(time)
|
| 276 |
cut_seconds = time - self.buffer_time_offset
|
| 277 |
self.audio_buffer = self.audio_buffer[int(cut_seconds * self.SAMPLING_RATE) :]
|
| 278 |
self.buffer_time_offset = time
|
| 279 |
|
| 280 |
+
logger.debug(
|
| 281 |
+
f"len of audio buffer is now: {len(self.audio_buffer)/self.SAMPLING_RATE:2.2f}s"
|
| 282 |
+
)
|
| 283 |
+
|
| 284 |
def words_to_sentences(self, words):
|
| 285 |
"""Uses self.tokenize for sentence segmentation of words.
|
| 286 |
Returns: [(beg,end,"sentence 1"),...]
|