Spaces:
Runtime error
Runtime error
Commit
·
628d0d9
1
Parent(s):
2f9b63d
Update app.py
Browse files
app.py
CHANGED
|
@@ -24,6 +24,7 @@ import wave
|
|
| 24 |
import contextlib
|
| 25 |
from sklearn.cluster import AgglomerativeClustering
|
| 26 |
import numpy as np
|
|
|
|
| 27 |
|
| 28 |
__FILES = set()
|
| 29 |
|
|
@@ -157,6 +158,32 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
|
|
| 157 |
return speaker
|
| 158 |
|
| 159 |
# audio = Audio()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 160 |
def get_output(segments):
|
| 161 |
# print(segments)
|
| 162 |
conversation=[]
|
|
@@ -209,6 +236,8 @@ def Transcribe_V2(num_speakers, speaker_names, audio="temp_audio.wav"):
|
|
| 209 |
return "Audio duration too long"
|
| 210 |
|
| 211 |
result = model.transcribe(audio)
|
|
|
|
|
|
|
| 212 |
|
| 213 |
segments = result["segments"]
|
| 214 |
|
|
|
|
| 24 |
import contextlib
|
| 25 |
from sklearn.cluster import AgglomerativeClustering
|
| 26 |
import numpy as np
|
| 27 |
+
import json
|
| 28 |
|
| 29 |
__FILES = set()
|
| 30 |
|
|
|
|
| 158 |
return speaker
|
| 159 |
|
| 160 |
# audio = Audio()
|
| 161 |
+
def diarization(audio):
|
| 162 |
+
def millisec(timeStr):
|
| 163 |
+
spl = timeStr.split(":")
|
| 164 |
+
s = (int)((int(spl[0]) * 60 * 60 + int(spl[1]) * 60 + float(spl[2]) )* 1000)
|
| 165 |
+
return s
|
| 166 |
+
as_audio = AudioSegment.from_wav(audio)
|
| 167 |
+
DEMO_FILE = {'uri': 'blabal', 'audio': audio}
|
| 168 |
+
hparams = pipeline.parameters(instantiated=True)
|
| 169 |
+
hparams["segmentation_onset"] += 0.1
|
| 170 |
+
pipeline.instantiate(hparams)
|
| 171 |
+
if num_speakers:
|
| 172 |
+
dz = pipeline(DEMO_FILE, num_speakers=num_speakers)
|
| 173 |
+
else:
|
| 174 |
+
dz = pipeline(DEMO_FILE)
|
| 175 |
+
with open(CreateFile(f"diarization_{audio}.txt"), "w") as text_file:
|
| 176 |
+
text_file.write(str(dz))
|
| 177 |
+
dz = open(CreateFile(f"diarization_{audio}.txt")).read().splitlines()
|
| 178 |
+
dzList = []
|
| 179 |
+
for l in dz:
|
| 180 |
+
start, end = tuple(re.findall('[0-9]+:[0-9]+:[0-9]+\.[0-9]+', string=l))
|
| 181 |
+
start = millisec(start)
|
| 182 |
+
end = millisec(end)
|
| 183 |
+
lex = GetSpeaker(re.findall('(SPEAKER_[0-9][0-9])', string=l)[0])
|
| 184 |
+
dzList.append([start, end, lex])
|
| 185 |
+
return dzList
|
| 186 |
+
|
| 187 |
def get_output(segments):
|
| 188 |
# print(segments)
|
| 189 |
conversation=[]
|
|
|
|
| 236 |
return "Audio duration too long"
|
| 237 |
|
| 238 |
result = model.transcribe(audio)
|
| 239 |
+
json.dumps(result)
|
| 240 |
+
json.dumps(diarization(audio))
|
| 241 |
|
| 242 |
segments = result["segments"]
|
| 243 |
|