Spaces:
Runtime error
Runtime error
Commit
·
b927090
1
Parent(s):
f848bd7
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,14 +22,38 @@ from pyannote.audio import Audio
|
|
| 22 |
from pyannote.core import Segment
|
| 23 |
import wave
|
| 24 |
import contextlib
|
| 25 |
-
from sklearn.cluster import
|
| 26 |
-
|
| 27 |
import numpy as np
|
| 28 |
import json
|
| 29 |
from datetime import timedelta
|
| 30 |
|
|
|
|
|
|
|
| 31 |
__FILES = set()
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
def CreateFile(filename):
|
| 35 |
__FILES.add(filename)
|
|
@@ -140,14 +164,16 @@ def Transcribe_V1(NumberOfSpeakers, SpeakerNames="", audio="temp_audio.wav"):
|
|
| 140 |
return (t_text, ({ "data": [{"speaker": speaker, "text": text} for speaker, text in conversation]}))
|
| 141 |
|
| 142 |
|
| 143 |
-
def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
|
| 144 |
-
model = whisper.load_model(
|
| 145 |
# embedding_model = SpeechBrainPretrainedSpeakerEmbedding("speechbrain/spkrec-ecapa-voxceleb")
|
|
|
|
| 146 |
embedding_model = SpeechBrainPretrainedSpeakerEmbedding(
|
| 147 |
"speechbrain/spkrec-ecapa-voxceleb",
|
| 148 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 149 |
)
|
| 150 |
SPEAKER_DICT = {}
|
|
|
|
| 151 |
SPEAKERS = [speaker.strip() for speaker in speaker_names.split(',') if len(speaker)]
|
| 152 |
def GetSpeaker(sp):
|
| 153 |
speaker = sp
|
|
@@ -156,6 +182,10 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
|
|
| 156 |
t = SPEAKERS.pop(0)
|
| 157 |
SPEAKER_DICT[sp] = t
|
| 158 |
speaker = SPEAKER_DICT[sp]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
else:
|
| 160 |
speaker = SPEAKER_DICT[sp]
|
| 161 |
return speaker
|
|
@@ -168,6 +198,9 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
|
|
| 168 |
return s
|
| 169 |
as_audio = AudioSegment.from_wav(audio)
|
| 170 |
DEMO_FILE = {'uri': 'blabal', 'audio': audio}
|
|
|
|
|
|
|
|
|
|
| 171 |
if num_speakers:
|
| 172 |
dz = pipeline(DEMO_FILE, num_speakers=num_speakers)
|
| 173 |
else:
|
|
@@ -201,6 +234,8 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
|
|
| 201 |
# conversation.append([GetSpeaker(segment["speaker"]), segment["text"][1:]]) # segment["speaker"] + ' ' + str(time(segment["start"])) + '\n\n'
|
| 202 |
# conversation[-1][1] += segment["text"][1:]
|
| 203 |
# return output
|
|
|
|
|
|
|
| 204 |
return ("".join([f"[{start}] - {speaker} \n{text}\n" for start, end, speaker, text in conversation])), ({ "data": [{"start": start, "end":end, "speaker": speaker, "text": text} for start, end, speaker, text in conversation]})
|
| 205 |
|
| 206 |
def get_duration(path):
|
|
@@ -224,7 +259,7 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
|
|
| 224 |
return embedding_model(waveform[None])
|
| 225 |
|
| 226 |
def add_speaker_labels(segments, embeddings, num_speakers):
|
| 227 |
-
clustering =
|
| 228 |
labels = clustering.labels_
|
| 229 |
for i in range(len(segments)):
|
| 230 |
segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1)
|
|
@@ -236,9 +271,9 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
|
|
| 236 |
if duration > 4 * 60 * 60:
|
| 237 |
return "Audio duration too long"
|
| 238 |
|
| 239 |
-
print(json.dumps(diarization(audio)))
|
| 240 |
result = model.transcribe(audio)
|
| 241 |
-
print(json.dumps(result))
|
| 242 |
|
| 243 |
segments = result["segments"]
|
| 244 |
|
|
@@ -251,7 +286,7 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
|
|
| 251 |
return get_output(segments)
|
| 252 |
# return output
|
| 253 |
|
| 254 |
-
def AudioTranscribe(NumberOfSpeakers=None, SpeakerNames="", audio="", retries=5):
|
| 255 |
print(f"{NumberOfSpeakers}, {SpeakerNames}, {retries}")
|
| 256 |
if retries:
|
| 257 |
# subprocess.call(['ffmpeg', '-i', audio,'temp_audio.wav'])
|
|
@@ -262,11 +297,11 @@ def AudioTranscribe(NumberOfSpeakers=None, SpeakerNames="", audio="", retries=5)
|
|
| 262 |
return AudioTranscribe(NumberOfSpeakers, SpeakerNames, audio, retries-1)
|
| 263 |
if not (os.path.isfile("temp_audio.wav")):
|
| 264 |
return AudioTranscribe(NumberOfSpeakers, SpeakerNames, audio, retries-1)
|
| 265 |
-
return Transcribe_V2(NumberOfSpeakers, SpeakerNames)
|
| 266 |
else:
|
| 267 |
raise gr.Error("There is some issue ith Audio Transcriber. Please try again later!")
|
| 268 |
|
| 269 |
-
def VideoTranscribe(NumberOfSpeakers=None, SpeakerNames="", video="", retries=5):
|
| 270 |
if retries:
|
| 271 |
try:
|
| 272 |
clip = mp.VideoFileClip(video)
|
|
@@ -278,12 +313,11 @@ def VideoTranscribe(NumberOfSpeakers=None, SpeakerNames="", video="", retries=5)
|
|
| 278 |
return VideoTranscribe(NumberOfSpeakers, SpeakerNames, video, retries-1)
|
| 279 |
if not (os.path.isfile("temp_audio.wav")):
|
| 280 |
return VideoTranscribe(NumberOfSpeakers, SpeakerNames, video, retries-1)
|
| 281 |
-
return Transcribe_V2(NumberOfSpeakers, SpeakerNames)
|
| 282 |
else:
|
| 283 |
raise gr.Error("There is some issue ith Video Transcriber. Please try again later!")
|
| 284 |
-
return Transcribe_V2(NumberOfSpeakers, SpeakerNames)
|
| 285 |
|
| 286 |
-
def YoutubeTranscribe(NumberOfSpeakers=None, SpeakerNames="", URL="", retries = 5):
|
| 287 |
if retries:
|
| 288 |
if "youtu" not in URL.lower():
|
| 289 |
raise gr.Error(f"{URL} is not a valid youtube URL.")
|
|
@@ -305,42 +339,28 @@ def YoutubeTranscribe(NumberOfSpeakers=None, SpeakerNames="", URL="", retries =
|
|
| 305 |
stream = ffmpeg.input('temp_audio.m4a')
|
| 306 |
stream = ffmpeg.output(stream, 'temp_audio.wav')
|
| 307 |
RemoveFile("temp_audio.m4a")
|
| 308 |
-
return Transcribe_V2(NumberOfSpeakers, SpeakerNames)
|
| 309 |
else:
|
| 310 |
raise gr.Error(f"Unable to get video from {URL}")
|
| 311 |
|
| 312 |
-
ut = gr.Interface(
|
| 313 |
-
fn=YoutubeTranscribe,
|
| 314 |
-
inputs=[gr.Number(label="Number of Speakers", placeholder="2"), gr.Textbox(label="Name of the Speakers (ordered by the time they speak and separated by comma)", placeholder="If Speaker 1 is first to speak followed by Speaker 2 then -> Speaker 1, Speaker 2"), gr.Textbox(label="Youtube Link", placeholder="https://www.youtube.com/watch?v=GECcjrYHH8w"),],
|
| 315 |
-
outputs=[gr.Textbox(label="Transcribed Text", lines=15), gr.JSON(label="Transcribed JSON")]
|
| 316 |
-
)
|
| 317 |
-
vt = gr.Interface(
|
| 318 |
-
fn=VideoTranscribe,
|
| 319 |
-
inputs=[gr.Number(label="Number of Speakers", placeholder="2"), gr.Textbox(label="Name of the Speakers (ordered by the time they speak and separated by comma)", placeholder="If Speaker 1 is first to speak followed by Speaker 2 then -> Speaker 1, Speaker 2"), 'video'],
|
| 320 |
-
outputs=[gr.Textbox(label="Transcribed Text", lines=15), gr.JSON(label="Transcribed JSON")]
|
| 321 |
-
)
|
| 322 |
-
at = gr.Interface(
|
| 323 |
-
fn=AudioTranscribe,
|
| 324 |
-
inputs=[gr.Number(label="Number of Speakers", placeholder="2"), gr.Textbox(label="Name of the Speakers (ordered by the time they speak and separated by comma)", placeholder="If Speaker 1 is first to speak followed by Speaker 2 then -> Speaker 1, Speaker 2"), 'audio'],
|
| 325 |
-
outputs=[gr.Textbox(label="Transcribed Text", lines=15), gr.JSON(label="Transcribed JSON")]
|
| 326 |
-
)
|
| 327 |
|
| 328 |
-
# demo = gr.TabbedInterface([ut, vt, at], ["Youtube URL", "Video", "Audio"])
|
| 329 |
-
# demo.launch()
|
| 330 |
with gr.Blocks() as yav_ui:
|
| 331 |
with gr.Row():
|
| 332 |
with gr.Column():
|
| 333 |
with gr.Tab("Youtube", id=1):
|
|
|
|
| 334 |
yinput_nos = gr.Number(label="Number of Speakers", placeholder="2")
|
| 335 |
yinput_sn = gr.Textbox(label="Name of the Speakers (ordered by the time they speak and separated by comma)", placeholder="If Speaker 1 is first to speak followed by Speaker 2 then -> Speaker 1, Speaker 2")
|
| 336 |
yinput = gr.Textbox(label="Youtube Link", placeholder="https://www.youtube.com/watch?v=GECcjrYHH8w")
|
| 337 |
ybutton_transcribe = gr.Button("Transcribe", show_progress=True, scroll_to_output=True)
|
| 338 |
with gr.Tab("Video", id=2):
|
|
|
|
| 339 |
vinput_nos = gr.Number(label="Number of Speakers", placeholder="2")
|
| 340 |
vinput_sn = gr.Textbox(label="Name of the Speakers (ordered by the time they speak and separated by comma)", placeholder="If Speaker 1 is first to speak followed by Speaker 2 then -> Speaker 1, Speaker 2")
|
| 341 |
vinput = gr.Video(label="Video")
|
| 342 |
vbutton_transcribe = gr.Button("Transcribe", show_progress=True, scroll_to_output=True)
|
| 343 |
with gr.Tab("Audio", id=3):
|
|
|
|
| 344 |
ainput_nos = gr.Number(label="Number of Speakers", placeholder="2")
|
| 345 |
ainput_sn = gr.Textbox(label="Name of the Speakers (ordered by the time they speak and separated by comma)", placeholder="If Speaker 1 is first to speak followed by Speaker 2 then -> Speaker 1, Speaker 2")
|
| 346 |
ainput = gr.Audio(label="Audio", type="filepath")
|
|
@@ -352,17 +372,17 @@ with gr.Blocks() as yav_ui:
|
|
| 352 |
output_json = gr.JSON(label="Transcribed JSON")
|
| 353 |
ybutton_transcribe.click(
|
| 354 |
fn=YoutubeTranscribe,
|
| 355 |
-
inputs=[yinput_nos,yinput_sn,yinput],
|
| 356 |
outputs=[output_textbox,output_json]
|
| 357 |
)
|
| 358 |
abutton_transcribe.click(
|
| 359 |
fn=AudioTranscribe,
|
| 360 |
-
inputs=[ainput_nos,ainput_sn,ainput],
|
| 361 |
outputs=[output_textbox,output_json]
|
| 362 |
)
|
| 363 |
vbutton_transcribe.click(
|
| 364 |
fn=VideoTranscribe,
|
| 365 |
-
inputs=[vinput_nos,vinput_sn,vinput],
|
| 366 |
outputs=[output_textbox,output_json]
|
| 367 |
)
|
| 368 |
yav_ui.launch(debug=True)
|
|
|
|
| 22 |
from pyannote.core import Segment
|
| 23 |
import wave
|
| 24 |
import contextlib
|
| 25 |
+
from sklearn.cluster import AgglomerativeClustering
|
|
|
|
| 26 |
import numpy as np
|
| 27 |
import json
|
| 28 |
from datetime import timedelta
|
| 29 |
|
| 30 |
+
from transformers import T5ForConditionalGeneration, T5Tokenizer
|
| 31 |
+
|
| 32 |
__FILES = set()
|
| 33 |
+
wispher_models = list(whisper._MODELS.keys())
|
| 34 |
+
|
| 35 |
+
def correct_grammar(input_text,num_return_sequences=1):
|
| 36 |
+
torch_device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
| 37 |
+
tokenizer = T5Tokenizer.from_pretrained('deep-learning-analytics/GrammarCorrector')
|
| 38 |
+
model = T5ForConditionalGeneration.from_pretrained('deep-learning-analytics/GrammarCorrector').to(torch_device)
|
| 39 |
+
batch = tokenizer([input_text],truncation=True,padding='max_length',max_length=len(input_text), return_tensors="pt").to(torch_device)
|
| 40 |
+
results = model.generate(**batch,max_length=len(input_text),num_beams=2, num_return_sequences=num_return_sequences, temperature=1.5)
|
| 41 |
+
generated_sequences = []
|
| 42 |
+
for generated_sequence_idx, generated_sequence in enumerate(results):
|
| 43 |
+
text = tokenizer.decode(generated_sequence, clean_up_tokenization_spaces=True, skip_special_tokens=True)
|
| 44 |
+
generated_sequences.append(text)
|
| 45 |
+
generated_text = "".join(generated_sequences)
|
| 46 |
+
_generated_text = ""
|
| 47 |
+
for idx, _sentence in enumerate(generated_text.split('.'), 0):
|
| 48 |
+
if not idx:
|
| 49 |
+
_generated_text+=_sentence+'.'
|
| 50 |
+
elif _sentence[:1]!=' ':
|
| 51 |
+
_generated_text+=' '+_sentence+'.'
|
| 52 |
+
elif _sentence[:1]=='':
|
| 53 |
+
pass
|
| 54 |
+
else:
|
| 55 |
+
_generated_text+=_sentence+'.'
|
| 56 |
+
return _generated_text
|
| 57 |
|
| 58 |
def CreateFile(filename):
|
| 59 |
__FILES.add(filename)
|
|
|
|
| 164 |
return (t_text, ({ "data": [{"speaker": speaker, "text": text} for speaker, text in conversation]}))
|
| 165 |
|
| 166 |
|
| 167 |
+
def Transcribe_V2(model, num_speakers, speaker_names, audio="temp_audio.wav"):
|
| 168 |
+
model = whisper.load_model(model)
|
| 169 |
# embedding_model = SpeechBrainPretrainedSpeakerEmbedding("speechbrain/spkrec-ecapa-voxceleb")
|
| 170 |
+
|
| 171 |
embedding_model = SpeechBrainPretrainedSpeakerEmbedding(
|
| 172 |
"speechbrain/spkrec-ecapa-voxceleb",
|
| 173 |
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
|
| 174 |
)
|
| 175 |
SPEAKER_DICT = {}
|
| 176 |
+
default_speaker_names = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
|
| 177 |
SPEAKERS = [speaker.strip() for speaker in speaker_names.split(',') if len(speaker)]
|
| 178 |
def GetSpeaker(sp):
|
| 179 |
speaker = sp
|
|
|
|
| 182 |
t = SPEAKERS.pop(0)
|
| 183 |
SPEAKER_DICT[sp] = t
|
| 184 |
speaker = SPEAKER_DICT[sp]
|
| 185 |
+
elif len(default_speaker_names):
|
| 186 |
+
t = default_speaker_names.pop(0)
|
| 187 |
+
SPEAKER_DICT[sp] = t
|
| 188 |
+
speaker = SPEAKER_DICT[sp]
|
| 189 |
else:
|
| 190 |
speaker = SPEAKER_DICT[sp]
|
| 191 |
return speaker
|
|
|
|
| 198 |
return s
|
| 199 |
as_audio = AudioSegment.from_wav(audio)
|
| 200 |
DEMO_FILE = {'uri': 'blabal', 'audio': audio}
|
| 201 |
+
hparams = pipeline.parameters(instantiated=True)
|
| 202 |
+
hparams["segmentation"]["min_duration_off"] -= 0.25
|
| 203 |
+
pipeline.instantiate(hparams)
|
| 204 |
if num_speakers:
|
| 205 |
dz = pipeline(DEMO_FILE, num_speakers=num_speakers)
|
| 206 |
else:
|
|
|
|
| 234 |
# conversation.append([GetSpeaker(segment["speaker"]), segment["text"][1:]]) # segment["speaker"] + ' ' + str(time(segment["start"])) + '\n\n'
|
| 235 |
# conversation[-1][1] += segment["text"][1:]
|
| 236 |
# return output
|
| 237 |
+
for idx in range(len(conversation)):
|
| 238 |
+
conversation[idx][3] = correct_grammar(conversation[idx][3])
|
| 239 |
return ("".join([f"[{start}] - {speaker} \n{text}\n" for start, end, speaker, text in conversation])), ({ "data": [{"start": start, "end":end, "speaker": speaker, "text": text} for start, end, speaker, text in conversation]})
|
| 240 |
|
| 241 |
def get_duration(path):
|
|
|
|
| 259 |
return embedding_model(waveform[None])
|
| 260 |
|
| 261 |
def add_speaker_labels(segments, embeddings, num_speakers):
|
| 262 |
+
clustering = AgglomerativeClustering(num_speakers).fit(embeddings)
|
| 263 |
labels = clustering.labels_
|
| 264 |
for i in range(len(segments)):
|
| 265 |
segments[i]["speaker"] = 'SPEAKER ' + str(labels[i] + 1)
|
|
|
|
| 271 |
if duration > 4 * 60 * 60:
|
| 272 |
return "Audio duration too long"
|
| 273 |
|
| 274 |
+
# print(json.dumps(diarization(audio)))
|
| 275 |
result = model.transcribe(audio)
|
| 276 |
+
# print(json.dumps(result))
|
| 277 |
|
| 278 |
segments = result["segments"]
|
| 279 |
|
|
|
|
| 286 |
return get_output(segments)
|
| 287 |
# return output
|
| 288 |
|
| 289 |
+
def AudioTranscribe(NumberOfSpeakers=None, SpeakerNames="", audio="", retries=5, model='base'):
|
| 290 |
print(f"{NumberOfSpeakers}, {SpeakerNames}, {retries}")
|
| 291 |
if retries:
|
| 292 |
# subprocess.call(['ffmpeg', '-i', audio,'temp_audio.wav'])
|
|
|
|
| 297 |
return AudioTranscribe(NumberOfSpeakers, SpeakerNames, audio, retries-1)
|
| 298 |
if not (os.path.isfile("temp_audio.wav")):
|
| 299 |
return AudioTranscribe(NumberOfSpeakers, SpeakerNames, audio, retries-1)
|
| 300 |
+
return Transcribe_V2(model, NumberOfSpeakers, SpeakerNames)
|
| 301 |
else:
|
| 302 |
raise gr.Error("There is some issue ith Audio Transcriber. Please try again later!")
|
| 303 |
|
| 304 |
+
def VideoTranscribe(NumberOfSpeakers=None, SpeakerNames="", video="", retries=5, model='base'):
|
| 305 |
if retries:
|
| 306 |
try:
|
| 307 |
clip = mp.VideoFileClip(video)
|
|
|
|
| 313 |
return VideoTranscribe(NumberOfSpeakers, SpeakerNames, video, retries-1)
|
| 314 |
if not (os.path.isfile("temp_audio.wav")):
|
| 315 |
return VideoTranscribe(NumberOfSpeakers, SpeakerNames, video, retries-1)
|
| 316 |
+
return Transcribe_V2(model, NumberOfSpeakers, SpeakerNames)
|
| 317 |
else:
|
| 318 |
raise gr.Error("There is some issue ith Video Transcriber. Please try again later!")
|
|
|
|
| 319 |
|
| 320 |
+
def YoutubeTranscribe(NumberOfSpeakers=None, SpeakerNames="", URL="", retries = 5, model='base'):
|
| 321 |
if retries:
|
| 322 |
if "youtu" not in URL.lower():
|
| 323 |
raise gr.Error(f"{URL} is not a valid youtube URL.")
|
|
|
|
| 339 |
stream = ffmpeg.input('temp_audio.m4a')
|
| 340 |
stream = ffmpeg.output(stream, 'temp_audio.wav')
|
| 341 |
RemoveFile("temp_audio.m4a")
|
| 342 |
+
return Transcribe_V2(model, NumberOfSpeakers, SpeakerNames)
|
| 343 |
else:
|
| 344 |
raise gr.Error(f"Unable to get video from {URL}")
|
| 345 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 346 |
|
|
|
|
|
|
|
| 347 |
with gr.Blocks() as yav_ui:
|
| 348 |
with gr.Row():
|
| 349 |
with gr.Column():
|
| 350 |
with gr.Tab("Youtube", id=1):
|
| 351 |
+
ysz = gr.Dropdown(label="Model Size", choices=wispher_models , value='base')
|
| 352 |
yinput_nos = gr.Number(label="Number of Speakers", placeholder="2")
|
| 353 |
yinput_sn = gr.Textbox(label="Name of the Speakers (ordered by the time they speak and separated by comma)", placeholder="If Speaker 1 is first to speak followed by Speaker 2 then -> Speaker 1, Speaker 2")
|
| 354 |
yinput = gr.Textbox(label="Youtube Link", placeholder="https://www.youtube.com/watch?v=GECcjrYHH8w")
|
| 355 |
ybutton_transcribe = gr.Button("Transcribe", show_progress=True, scroll_to_output=True)
|
| 356 |
with gr.Tab("Video", id=2):
|
| 357 |
+
vsz = gr.Dropdown(label="Model Size", choices=wispher_models, value='base')
|
| 358 |
vinput_nos = gr.Number(label="Number of Speakers", placeholder="2")
|
| 359 |
vinput_sn = gr.Textbox(label="Name of the Speakers (ordered by the time they speak and separated by comma)", placeholder="If Speaker 1 is first to speak followed by Speaker 2 then -> Speaker 1, Speaker 2")
|
| 360 |
vinput = gr.Video(label="Video")
|
| 361 |
vbutton_transcribe = gr.Button("Transcribe", show_progress=True, scroll_to_output=True)
|
| 362 |
with gr.Tab("Audio", id=3):
|
| 363 |
+
asz = gr.Dropdown(label="Model Size", choices=wispher_models , value='base')
|
| 364 |
ainput_nos = gr.Number(label="Number of Speakers", placeholder="2")
|
| 365 |
ainput_sn = gr.Textbox(label="Name of the Speakers (ordered by the time they speak and separated by comma)", placeholder="If Speaker 1 is first to speak followed by Speaker 2 then -> Speaker 1, Speaker 2")
|
| 366 |
ainput = gr.Audio(label="Audio", type="filepath")
|
|
|
|
| 372 |
output_json = gr.JSON(label="Transcribed JSON")
|
| 373 |
ybutton_transcribe.click(
|
| 374 |
fn=YoutubeTranscribe,
|
| 375 |
+
inputs=[yinput_nos,yinput_sn,yinput, ysz],
|
| 376 |
outputs=[output_textbox,output_json]
|
| 377 |
)
|
| 378 |
abutton_transcribe.click(
|
| 379 |
fn=AudioTranscribe,
|
| 380 |
+
inputs=[ainput_nos,ainput_sn,ainput, asz],
|
| 381 |
outputs=[output_textbox,output_json]
|
| 382 |
)
|
| 383 |
vbutton_transcribe.click(
|
| 384 |
fn=VideoTranscribe,
|
| 385 |
+
inputs=[vinput_nos,vinput_sn,vinput, vsz],
|
| 386 |
outputs=[output_textbox,output_json]
|
| 387 |
)
|
| 388 |
yav_ui.launch(debug=True)
|