marquesafonso commited on
Commit
4f772d6
·
1 Parent(s): 084c2f9

fix srt writer and parser for standard time format (hh:mm:ss,ms)

Browse files
Files changed (2) hide show
  1. utils/subtitler.py +2 -2
  2. utils/transcriber.py +15 -2
utils/subtitler.py CHANGED
@@ -10,8 +10,8 @@ def parse_srt(srt_file):
10
  while i < len(lines):
11
  if lines[i].strip().isdigit():
12
  timing_str = lines[i+1].strip().split(" --> ")
13
- start = timing_str[0].replace(',', '.')
14
- end = timing_str[1].replace(',', '.')
15
  text = lines[i+2].strip()
16
  subtitles.append((start, end, text))
17
  i += 4
 
10
  while i < len(lines):
11
  if lines[i].strip().isdigit():
12
  timing_str = lines[i+1].strip().split(" --> ")
13
+ start = timing_str[0]
14
+ end = timing_str[1]
15
  text = lines[i+2].strip()
16
  subtitles.append((start, end, text))
17
  i += 4
utils/transcriber.py CHANGED
@@ -8,6 +8,19 @@ logging.basicConfig(filename='main.log',
8
  datefmt='%m/%d/%Y %I:%M:%S %p')
9
  logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  def write_srt(segments, srt_path, max_words_per_line):
12
  """Write segments to an SRT file with a maximum number of words per line."""
13
  with open(srt_path, "w", encoding='utf-8') as file:
@@ -19,8 +32,8 @@ def write_srt(segments, srt_path, max_words_per_line):
19
  # Write the line if max words limit reached or it's the last word in the segment
20
  if len(words_in_line) == max_words_per_line or w == len(segment.words) - 1:
21
  if words_in_line: # Check to avoid writing a line if there are no words
22
- start_time = words_in_line[0].start
23
- end_time = words_in_line[-1].end
24
  line_text = ' '.join([w.word.strip() for w in words_in_line])
25
  file.write(f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n")
26
  # Reset for the next line and increment line counter
 
8
  datefmt='%m/%d/%Y %I:%M:%S %p')
9
  logging.getLogger("faster_whisper").setLevel(logging.DEBUG)
10
 
11
+ def convert_seconds_to_time(seconds):
12
+ # Separate seconds into hours, minutes, and seconds
13
+ seconds = float(seconds)
14
+ hours, remainder = divmod(seconds, 3600)
15
+ minutes, remainder = divmod(remainder, 60)
16
+ whole_seconds = int(remainder)
17
+ milliseconds = int((remainder - whole_seconds) * 1000)
18
+
19
+ # Format the time string
20
+ time_string = f"{int(hours):02}:{int(minutes):02}:{whole_seconds:02},{milliseconds:03}"
21
+
22
+ return time_string
23
+
24
  def write_srt(segments, srt_path, max_words_per_line):
25
  """Write segments to an SRT file with a maximum number of words per line."""
26
  with open(srt_path, "w", encoding='utf-8') as file:
 
32
  # Write the line if max words limit reached or it's the last word in the segment
33
  if len(words_in_line) == max_words_per_line or w == len(segment.words) - 1:
34
  if words_in_line: # Check to avoid writing a line if there are no words
35
+ start_time = convert_seconds_to_time(words_in_line[0].start)
36
+ end_time = convert_seconds_to_time(words_in_line[-1].end)
37
  line_text = ' '.join([w.word.strip() for w in words_in_line])
38
  file.write(f"{line_counter}\n{start_time} --> {end_time}\n{line_text}\n\n")
39
  # Reset for the next line and increment line counter