Spaces:
Runtime error
Runtime error
Commit
·
5da7484
1
Parent(s):
6fc22e6
Update app.py
Browse files
app.py
CHANGED
|
@@ -9,9 +9,23 @@ import ffmpeg
|
|
| 9 |
import subprocess
|
| 10 |
import gradio as gr
|
| 11 |
import traceback
|
|
|
|
| 12 |
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token="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")
|
|
|
|
| 13 |
|
|
|
|
|
|
|
|
|
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def Transcribe(audio="temp_audio.wav"):
|
| 16 |
def millisec(timeStr):
|
| 17 |
spl = timeStr.split(":")
|
|
@@ -33,9 +47,9 @@ def Transcribe(audio="temp_audio.wav"):
|
|
| 33 |
as_audio = AudioSegment.from_wav(audio)
|
| 34 |
DEMO_FILE = {'uri': 'blabal', 'audio': audio}
|
| 35 |
dz = pipeline(DEMO_FILE)
|
| 36 |
-
with open(f"diarization_{audio}.txt", "w") as text_file:
|
| 37 |
text_file.write(str(dz))
|
| 38 |
-
dz = open(f"diarization_{audio}.txt").read().splitlines()
|
| 39 |
dzList = []
|
| 40 |
for l in dz:
|
| 41 |
start, end = tuple(re.findall('[0-9]+:[0-9]+:[0-9]+\.[0-9]+', string=l))
|
|
@@ -45,7 +59,7 @@ def Transcribe(audio="temp_audio.wav"):
|
|
| 45 |
dzList.append([start, end, lex])
|
| 46 |
sounds = spacer
|
| 47 |
segments = []
|
| 48 |
-
dz = open(f"diarization_{audio}.txt").read().splitlines()
|
| 49 |
for l in dz:
|
| 50 |
start, end = tuple(re.findall('[0-9]+:[0-9]+:[0-9]+\.[0-9]+', string=l))
|
| 51 |
start = millisec(start)
|
|
@@ -53,7 +67,7 @@ def Transcribe(audio="temp_audio.wav"):
|
|
| 53 |
segments.append(len(sounds))
|
| 54 |
sounds = sounds.append(as_audio[start:end], crossfade=0)
|
| 55 |
sounds = sounds.append(spacer, crossfade=0)
|
| 56 |
-
sounds.export(f"dz_{audio}.wav", format="wav")
|
| 57 |
return f"dz_{audio}.wav", dzList, segments
|
| 58 |
|
| 59 |
def transcribe(dz_audio):
|
|
@@ -82,11 +96,11 @@ def Transcribe(audio="temp_audio.wav"):
|
|
| 82 |
else:
|
| 83 |
conversation.append([dzList[i][2], c[2]])
|
| 84 |
#print(f"[{dzList[i][2]}] {c[2]}")
|
| 85 |
-
return ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation]))
|
| 86 |
|
| 87 |
spacermilli, spacer = preprocess(audio)
|
| 88 |
dz_audio, dzList, segments = diarization(audio)
|
| 89 |
-
t_text = transcribe(dz_audio)
|
| 90 |
try:
|
| 91 |
os.remove("temp_audio.wav")
|
| 92 |
except OSError:
|
|
@@ -99,9 +113,7 @@ def Transcribe(audio="temp_audio.wav"):
|
|
| 99 |
os.remove(f"diarization_{audio}.txt")
|
| 100 |
except OSError:
|
| 101 |
pass
|
| 102 |
-
return t_text
|
| 103 |
-
# subprocess.call(['ffmpeg', '-i', 'audio.mp3',
|
| 104 |
-
# 'audio.wav'])
|
| 105 |
|
| 106 |
def AudioTranscribe(audio, retries=5):
|
| 107 |
if retries:
|
|
@@ -116,9 +128,19 @@ def AudioTranscribe(audio, retries=5):
|
|
| 116 |
else:
|
| 117 |
raise gr.Error("There is some issue ith Audio Transcriber. Please try again later!")
|
| 118 |
|
| 119 |
-
def VideoTranscribe(video):
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
return Transcribe()
|
| 123 |
|
| 124 |
def YoutubeTranscribe(URL, retries = 5):
|
|
@@ -126,10 +148,7 @@ def YoutubeTranscribe(URL, retries = 5):
|
|
| 126 |
if "youtu" not in URL.lower():
|
| 127 |
raise gr.Error(f"{URL} is not a valid youtube URL.")
|
| 128 |
else:
|
| 129 |
-
|
| 130 |
-
os.remove("temp_audio.wav")
|
| 131 |
-
except OSError:
|
| 132 |
-
pass
|
| 133 |
ydl_opts = {
|
| 134 |
'format': 'bestaudio/best',
|
| 135 |
'outtmpl': 'temp_audio.%(ext)s',
|
|
@@ -145,10 +164,7 @@ def YoutubeTranscribe(URL, retries = 5):
|
|
| 145 |
return YoutubeTranscribe(URL, retries-1)
|
| 146 |
stream = ffmpeg.input('temp_audio.m4a')
|
| 147 |
stream = ffmpeg.output(stream, 'temp_audio.wav')
|
| 148 |
-
|
| 149 |
-
os.remove("temp_audio.m4a")
|
| 150 |
-
except OSError:
|
| 151 |
-
pass
|
| 152 |
return Transcribe()
|
| 153 |
else:
|
| 154 |
raise gr.Error(f"Unable to get video from {URL}")
|
|
@@ -170,5 +186,4 @@ at = gr.Interface(
|
|
| 170 |
)
|
| 171 |
|
| 172 |
demo = gr.TabbedInterface([ut, vt, at], ["Youtube URL", "Video", "Audio"])
|
| 173 |
-
demo.launch()
|
| 174 |
-
# YoutubeTranscribe('https://www.youtube.com/watch?v=GECcjrYHH8w')
|
|
|
|
| 9 |
import subprocess
|
| 10 |
import gradio as gr
|
| 11 |
import traceback
|
| 12 |
+
import json
|
| 13 |
pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization", use_auth_token="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")
|
| 14 |
+
__FILES = set()
|
| 15 |
|
| 16 |
+
def CreateFile(filename):
|
| 17 |
+
__FILES.add(filename)
|
| 18 |
+
return filename
|
| 19 |
|
| 20 |
+
def RemoveFile(filename):
|
| 21 |
+
if (os.path.exist(filename)):
|
| 22 |
+
os.remove(filename)
|
| 23 |
+
|
| 24 |
+
def RemoveAllFiles():
|
| 25 |
+
for file in __FILES:
|
| 26 |
+
if (os.path.exist(file)):
|
| 27 |
+
os.remove(file)
|
| 28 |
+
|
| 29 |
def Transcribe(audio="temp_audio.wav"):
|
| 30 |
def millisec(timeStr):
|
| 31 |
spl = timeStr.split(":")
|
|
|
|
| 47 |
as_audio = AudioSegment.from_wav(audio)
|
| 48 |
DEMO_FILE = {'uri': 'blabal', 'audio': audio}
|
| 49 |
dz = pipeline(DEMO_FILE)
|
| 50 |
+
with open(CreateFile(f"diarization_{audio}.txt"), "w") as text_file:
|
| 51 |
text_file.write(str(dz))
|
| 52 |
+
dz = open(CreateFile(f"diarization_{audio}.txt")).read().splitlines()
|
| 53 |
dzList = []
|
| 54 |
for l in dz:
|
| 55 |
start, end = tuple(re.findall('[0-9]+:[0-9]+:[0-9]+\.[0-9]+', string=l))
|
|
|
|
| 59 |
dzList.append([start, end, lex])
|
| 60 |
sounds = spacer
|
| 61 |
segments = []
|
| 62 |
+
dz = open(CreateFile(f"diarization_{audio}.txt")).read().splitlines()
|
| 63 |
for l in dz:
|
| 64 |
start, end = tuple(re.findall('[0-9]+:[0-9]+:[0-9]+\.[0-9]+', string=l))
|
| 65 |
start = millisec(start)
|
|
|
|
| 67 |
segments.append(len(sounds))
|
| 68 |
sounds = sounds.append(as_audio[start:end], crossfade=0)
|
| 69 |
sounds = sounds.append(spacer, crossfade=0)
|
| 70 |
+
sounds.export(CreateFile(f"dz_{audio}.wav"), format="wav")
|
| 71 |
return f"dz_{audio}.wav", dzList, segments
|
| 72 |
|
| 73 |
def transcribe(dz_audio):
|
|
|
|
| 96 |
else:
|
| 97 |
conversation.append([dzList[i][2], c[2]])
|
| 98 |
#print(f"[{dzList[i][2]}] {c[2]}")
|
| 99 |
+
return conversation, ("".join([f"{speaker} --> {text}\n" for speaker, text in conversation]))
|
| 100 |
|
| 101 |
spacermilli, spacer = preprocess(audio)
|
| 102 |
dz_audio, dzList, segments = diarization(audio)
|
| 103 |
+
conversation, t_text = transcribe(dz_audio)
|
| 104 |
try:
|
| 105 |
os.remove("temp_audio.wav")
|
| 106 |
except OSError:
|
|
|
|
| 113 |
os.remove(f"diarization_{audio}.txt")
|
| 114 |
except OSError:
|
| 115 |
pass
|
| 116 |
+
return t_text, json.dumps(conversation)
|
|
|
|
|
|
|
| 117 |
|
| 118 |
def AudioTranscribe(audio, retries=5):
|
| 119 |
if retries:
|
|
|
|
| 128 |
else:
|
| 129 |
raise gr.Error("There is some issue ith Audio Transcriber. Please try again later!")
|
| 130 |
|
| 131 |
+
def VideoTranscribe(video, retries=5):
|
| 132 |
+
if retries:
|
| 133 |
+
try:
|
| 134 |
+
command = f"ffmpeg -i {video} -ab 160k -ac 2 -ar 44100 -vn temp_audio.wav"
|
| 135 |
+
subprocess.call(command, shell=True)
|
| 136 |
+
except Exception as ex:
|
| 137 |
+
traceback.print_exc()
|
| 138 |
+
return VideoTranscribe(video, retries-1)
|
| 139 |
+
if not (os.path.exist("temp_audio.wav")):
|
| 140 |
+
return VideoTranscribe(video, retries-1)
|
| 141 |
+
return Transcribe()
|
| 142 |
+
else:
|
| 143 |
+
raise gr.Error("There is some issue ith Video Transcriber. Please try again later!")
|
| 144 |
return Transcribe()
|
| 145 |
|
| 146 |
def YoutubeTranscribe(URL, retries = 5):
|
|
|
|
| 148 |
if "youtu" not in URL.lower():
|
| 149 |
raise gr.Error(f"{URL} is not a valid youtube URL.")
|
| 150 |
else:
|
| 151 |
+
RemoveFile("temp_audio.wav")
|
|
|
|
|
|
|
|
|
|
| 152 |
ydl_opts = {
|
| 153 |
'format': 'bestaudio/best',
|
| 154 |
'outtmpl': 'temp_audio.%(ext)s',
|
|
|
|
| 164 |
return YoutubeTranscribe(URL, retries-1)
|
| 165 |
stream = ffmpeg.input('temp_audio.m4a')
|
| 166 |
stream = ffmpeg.output(stream, 'temp_audio.wav')
|
| 167 |
+
RemoveFile("temp_audio.m4a")
|
|
|
|
|
|
|
|
|
|
| 168 |
return Transcribe()
|
| 169 |
else:
|
| 170 |
raise gr.Error(f"Unable to get video from {URL}")
|
|
|
|
| 186 |
)
|
| 187 |
|
| 188 |
demo = gr.TabbedInterface([ut, vt, at], ["Youtube URL", "Video", "Audio"])
|
| 189 |
+
demo.launch()
|
|
|