Spaces:
Runtime error
Runtime error
Commit
·
a3e23e4
1
Parent(s):
ed868f3
Update app.py
Browse files
app.py
CHANGED
|
@@ -47,9 +47,9 @@ def RemoveAllFiles():
|
|
| 47 |
if (os.path.isfile(file)):
|
| 48 |
os.remove(file)
|
| 49 |
|
| 50 |
-
def
|
| 51 |
SPEAKER_DICT = {}
|
| 52 |
-
SPEAKERS = []
|
| 53 |
|
| 54 |
def GetSpeaker(sp):
|
| 55 |
speaker = sp
|
|
@@ -61,10 +61,6 @@ def Transcribe(NumberOfSpeakers, SpeakerNames="", audio="temp_audio.wav"):
|
|
| 61 |
else:
|
| 62 |
speaker = SPEAKER_DICT[sp]
|
| 63 |
return speaker
|
| 64 |
-
|
| 65 |
-
def GenerateSpeakerDict(sp):
|
| 66 |
-
global SPEAKERS
|
| 67 |
-
SPEAKERS = [speaker.strip() for speaker in sp.split(',')]
|
| 68 |
|
| 69 |
def millisec(timeStr):
|
| 70 |
spl = timeStr.split(":")
|
|
@@ -113,7 +109,7 @@ def Transcribe(NumberOfSpeakers, SpeakerNames="", audio="temp_audio.wav"):
|
|
| 113 |
return f"dz_{audio}.wav", dzList, segments
|
| 114 |
|
| 115 |
def transcribe(dz_audio):
|
| 116 |
-
model = whisper.load_model("
|
| 117 |
result = model.transcribe(dz_audio)
|
| 118 |
# for _ in result['segments']:
|
| 119 |
# print(_['start'], _['end'], _['text'])
|
|
@@ -140,7 +136,6 @@ def Transcribe(NumberOfSpeakers, SpeakerNames="", audio="temp_audio.wav"):
|
|
| 140 |
#print(f"[{dzList[i][2]}] {c[2]}")
|
| 141 |
return conversation, ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation]))
|
| 142 |
|
| 143 |
-
GenerateSpeakerDict(SpeakerNames)
|
| 144 |
spacermilli, spacer = preprocess(audio)
|
| 145 |
dz_audio, dzList, segments = diarization(audio)
|
| 146 |
conversation, t_text = transcribe(dz_audio)
|
|
@@ -179,7 +174,7 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
|
|
| 179 |
# conversation.append([GetSpeaker(segment["speaker"]), segment["text"][1:]]) # segment["speaker"] + ' ' + str(time(segment["start"])) + '\n\n'
|
| 180 |
# conversation[-1][1] += segment["text"][1:]
|
| 181 |
# return output
|
| 182 |
-
return ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation])), conversation
|
| 183 |
|
| 184 |
def get_duration(path):
|
| 185 |
with contextlib.closing(wave.open(path,'r')) as f:
|
|
@@ -237,7 +232,7 @@ def AudioTranscribe(NumberOfSpeakers=None, SpeakerNames="", audio="", retries=5)
|
|
| 237 |
return AudioTranscribe(NumberOfSpeakers, SpeakerNames, audio, retries-1)
|
| 238 |
if not (os.path.isfile("temp_audio.wav")):
|
| 239 |
return AudioTranscribe(NumberOfSpeakers, SpeakerNames, audio, retries-1)
|
| 240 |
-
return
|
| 241 |
else:
|
| 242 |
raise gr.Error("There is some issue ith Audio Transcriber. Please try again later!")
|
| 243 |
|
|
@@ -253,10 +248,10 @@ def VideoTranscribe(NumberOfSpeakers=None, SpeakerNames="", video="", retries=5)
|
|
| 253 |
return VideoTranscribe(NumberOfSpeakers, SpeakerNames, video, retries-1)
|
| 254 |
if not (os.path.isfile("temp_audio.wav")):
|
| 255 |
return VideoTranscribe(NumberOfSpeakers, SpeakerNames, video, retries-1)
|
| 256 |
-
return
|
| 257 |
else:
|
| 258 |
raise gr.Error("There is some issue ith Video Transcriber. Please try again later!")
|
| 259 |
-
return
|
| 260 |
|
| 261 |
def YoutubeTranscribe(NumberOfSpeakers=None, SpeakerNames="", URL="", retries = 5):
|
| 262 |
if retries:
|
|
@@ -280,7 +275,7 @@ def YoutubeTranscribe(NumberOfSpeakers=None, SpeakerNames="", URL="", retries =
|
|
| 280 |
stream = ffmpeg.input('temp_audio.m4a')
|
| 281 |
stream = ffmpeg.output(stream, 'temp_audio.wav')
|
| 282 |
RemoveFile("temp_audio.m4a")
|
| 283 |
-
return
|
| 284 |
else:
|
| 285 |
raise gr.Error(f"Unable to get video from {URL}")
|
| 286 |
|
|
|
|
| 47 |
if (os.path.isfile(file)):
|
| 48 |
os.remove(file)
|
| 49 |
|
| 50 |
+
def Transcribe_V1(NumberOfSpeakers, SpeakerNames="", audio="temp_audio.wav"):
|
| 51 |
SPEAKER_DICT = {}
|
| 52 |
+
SPEAKERS = [speaker.strip() for speaker in SpeakerNames.split(',')]
|
| 53 |
|
| 54 |
def GetSpeaker(sp):
|
| 55 |
speaker = sp
|
|
|
|
| 61 |
else:
|
| 62 |
speaker = SPEAKER_DICT[sp]
|
| 63 |
return speaker
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
|
| 65 |
def millisec(timeStr):
|
| 66 |
spl = timeStr.split(":")
|
|
|
|
| 109 |
return f"dz_{audio}.wav", dzList, segments
|
| 110 |
|
| 111 |
def transcribe(dz_audio):
|
| 112 |
+
model = whisper.load_model("large")
|
| 113 |
result = model.transcribe(dz_audio)
|
| 114 |
# for _ in result['segments']:
|
| 115 |
# print(_['start'], _['end'], _['text'])
|
|
|
|
| 136 |
#print(f"[{dzList[i][2]}] {c[2]}")
|
| 137 |
return conversation, ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation]))
|
| 138 |
|
|
|
|
| 139 |
spacermilli, spacer = preprocess(audio)
|
| 140 |
dz_audio, dzList, segments = diarization(audio)
|
| 141 |
conversation, t_text = transcribe(dz_audio)
|
|
|
|
| 174 |
# conversation.append([GetSpeaker(segment["speaker"]), segment["text"][1:]]) # segment["speaker"] + ' ' + str(time(segment["start"])) + '\n\n'
|
| 175 |
# conversation[-1][1] += segment["text"][1:]
|
| 176 |
# return output
|
| 177 |
+
return ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation])), ({ "data": [{"speaker": speaker, "text": text} for speaker, text in conversation]})
|
| 178 |
|
| 179 |
def get_duration(path):
|
| 180 |
with contextlib.closing(wave.open(path,'r')) as f:
|
|
|
|
| 232 |
return AudioTranscribe(NumberOfSpeakers, SpeakerNames, audio, retries-1)
|
| 233 |
if not (os.path.isfile("temp_audio.wav")):
|
| 234 |
return AudioTranscribe(NumberOfSpeakers, SpeakerNames, audio, retries-1)
|
| 235 |
+
return Transcribe_V1(NumberOfSpeakers, SpeakerNames)
|
| 236 |
else:
|
| 237 |
raise gr.Error("There is some issue ith Audio Transcriber. Please try again later!")
|
| 238 |
|
|
|
|
| 248 |
return VideoTranscribe(NumberOfSpeakers, SpeakerNames, video, retries-1)
|
| 249 |
if not (os.path.isfile("temp_audio.wav")):
|
| 250 |
return VideoTranscribe(NumberOfSpeakers, SpeakerNames, video, retries-1)
|
| 251 |
+
return Transcribe_V1(NumberOfSpeakers, SpeakerNames)
|
| 252 |
else:
|
| 253 |
raise gr.Error("There is some issue ith Video Transcriber. Please try again later!")
|
| 254 |
+
return Transcribe_V1(NumberOfSpeakers, SpeakerNames)
|
| 255 |
|
| 256 |
def YoutubeTranscribe(NumberOfSpeakers=None, SpeakerNames="", URL="", retries = 5):
|
| 257 |
if retries:
|
|
|
|
| 275 |
stream = ffmpeg.input('temp_audio.m4a')
|
| 276 |
stream = ffmpeg.output(stream, 'temp_audio.wav')
|
| 277 |
RemoveFile("temp_audio.m4a")
|
| 278 |
+
return Transcribe_V1(NumberOfSpeakers, SpeakerNames)
|
| 279 |
else:
|
| 280 |
raise gr.Error(f"Unable to get video from {URL}")
|
| 281 |
|