Commit
·
4f772d6
1
Parent(s):
084c2f9
fix srt writer and parser for standard time format (hh:mm:ss,ms)
Browse files- utils/subtitler.py +2 -2
- utils/transcriber.py +15 -2
utils/subtitler.py
CHANGED
|
@@ -10,8 +10,8 @@ def parse_srt(srt_file):
|
|
| 10 |
while i < len(lines):
|
| 11 |
if lines[i].strip().isdigit():
|
| 12 |
timing_str = lines[i+1].strip().split(" --> ")
|
| 13 |
-
start = timing_str[0]
|
| 14 |
-
end = timing_str[1]
|
| 15 |
text = lines[i+2].strip()
|
| 16 |
subtitles.append((start, end, text))
|
| 17 |
i += 4
|
|
|
|
| 10 |
while i < len(lines):
|
| 11 |
if lines[i].strip().isdigit():
|
| 12 |
timing_str = lines[i+1].strip().split(" --> ")
|
| 13 |
+
start = timing_str[0]
|
| 14 |
+
end = timing_str[1]
|
| 15 |
text = lines[i+2].strip()
|
| 16 |
subtitles.append((start, end, text))
|
| 17 |
i += 4
|
utils/transcriber.py
CHANGED
|
@@ -8,6 +8,19 @@ logging.basicConfig(filename='main.log',
|
|
| 8 |
datefmt='%m/%d/%Y %I:%M:%S %p')
|
| 9 |
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
|
| 10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
def write_srt(segments, srt_path, max_words_per_line):
|
| 12 |
"""Write segments to an SRT file with a maximum number of words per line."""
|
| 13 |
with open(srt_path, "w", encoding='utf-8') as file:
|
|
@@ -19,8 +32,8 @@ def write_srt(segments, srt_path, max_words_per_line):
|
|
| 19 |
# Write the line if max words limit reached or it's the last word in the segment
|
| 20 |
if len(words_in_line) == max_words_per_line or w == len(segment.words) - 1:
|
| 21 |
if words_in_line: # Check to avoid writing a line if there are no words
|
| 22 |
-
start_time = words_in_line[0].start
|
| 23 |
-
end_time = words_in_line[-1].end
|
| 24 |
line_text = ' '.join([w.word.strip() for w in words_in_line])
|
| 25 |
file.write(f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n")
|
| 26 |
# Reset for the next line and increment line counter
|
|
|
|
| 8 |
datefmt='%m/%d/%Y %I:%M:%S %p')
|
| 9 |
logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
|
| 10 |
|
| 11 |
+
def convert_seconds_to_time(seconds):
|
| 12 |
+
# Separate seconds into hours, minutes, and seconds
|
| 13 |
+
seconds = float(seconds)
|
| 14 |
+
hours, remainder = divmod(seconds, 3600)
|
| 15 |
+
minutes, remainder = divmod(remainder, 60)
|
| 16 |
+
whole_seconds = int(remainder)
|
| 17 |
+
milliseconds = int((remainder - whole_seconds) * 1000)
|
| 18 |
+
|
| 19 |
+
# Format the time string
|
| 20 |
+
time_string = f"{int(hours):02}:{int(minutes):02}:{whole_seconds:02},{milliseconds:03}"
|
| 21 |
+
|
| 22 |
+
return time_string
|
| 23 |
+
|
| 24 |
def write_srt(segments, srt_path, max_words_per_line):
|
| 25 |
"""Write segments to an SRT file with a maximum number of words per line."""
|
| 26 |
with open(srt_path, "w", encoding='utf-8') as file:
|
|
|
|
| 32 |
# Write the line if max words limit reached or it's the last word in the segment
|
| 33 |
if len(words_in_line) == max_words_per_line or w == len(segment.words) - 1:
|
| 34 |
if words_in_line: # Check to avoid writing a line if there are no words
|
| 35 |
+
start_time = convert_seconds_to_time(words_in_line[0].start)
|
| 36 |
+
end_time = convert_seconds_to_time(words_in_line[-1].end)
|
| 37 |
line_text = ' '.join([w.word.strip() for w in words_in_line])
|
| 38 |
file.write(f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n")
|
| 39 |
# Reset for the next line and increment line counter
|