Spaces:
Runtime error
Runtime error
Commit
·
cfd7673
1
Parent(s):
16e9291
output all texts
Browse files- .gitattributes +1 -0
- app.py +15 -5
- decode.py +12 -1
.gitattributes
CHANGED
|
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.wav filter=lfs diff=lfs merge=lfs -text
|
app.py
CHANGED
|
@@ -106,8 +106,8 @@ def process_uploaded_video_file(
|
|
| 106 |
|
| 107 |
logging.info(f"Processing uploaded file: {in_filename}")
|
| 108 |
|
| 109 |
-
ans = process(language, repo_id, add_punctuation, in_filename)
|
| 110 |
-
return (in_filename, ans[0]), ans[0], ans[1], ans[2]
|
| 111 |
|
| 112 |
|
| 113 |
def process_uploaded_audio_file(
|
|
@@ -142,8 +142,9 @@ def process(language: str, repo_id: str, add_punctuation: str, in_filename: str)
|
|
| 142 |
else:
|
| 143 |
punct = None
|
| 144 |
|
| 145 |
-
result = decode(recognizer, vad, punct, in_filename)
|
| 146 |
logging.info(result)
|
|
|
|
| 147 |
|
| 148 |
srt_filename = Path(in_filename).with_suffix(".srt")
|
| 149 |
with open(srt_filename, "w", encoding="utf-8") as f:
|
|
@@ -156,6 +157,7 @@ def process(language: str, repo_id: str, add_punctuation: str, in_filename: str)
|
|
| 156 |
str(srt_filename),
|
| 157 |
build_html_output("Done! Please download the SRT file", "result_item_success"),
|
| 158 |
result,
|
|
|
|
| 159 |
)
|
| 160 |
|
| 161 |
|
|
@@ -205,7 +207,10 @@ with demo:
|
|
| 205 |
|
| 206 |
output_info_video = gr.HTML(label="Info")
|
| 207 |
output_textbox_video = gr.Textbox(
|
| 208 |
-
label="Recognized speech from uploaded video file"
|
|
|
|
|
|
|
|
|
|
| 209 |
)
|
| 210 |
|
| 211 |
with gr.TabItem("Upload audio from disk"):
|
|
@@ -222,7 +227,10 @@ with demo:
|
|
| 222 |
|
| 223 |
output_info_audio = gr.HTML(label="Info")
|
| 224 |
output_textbox_audio = gr.Textbox(
|
| 225 |
-
label="Recognized speech from uploaded audio file"
|
|
|
|
|
|
|
|
|
|
| 226 |
)
|
| 227 |
|
| 228 |
upload_video_button.click(
|
|
@@ -238,6 +246,7 @@ with demo:
|
|
| 238 |
output_srt_file_video,
|
| 239 |
output_info_video,
|
| 240 |
output_textbox_video,
|
|
|
|
| 241 |
],
|
| 242 |
)
|
| 243 |
|
|
@@ -253,6 +262,7 @@ with demo:
|
|
| 253 |
output_srt_file_audio,
|
| 254 |
output_info_audio,
|
| 255 |
output_textbox_audio,
|
|
|
|
| 256 |
],
|
| 257 |
)
|
| 258 |
|
|
|
|
| 106 |
|
| 107 |
logging.info(f"Processing uploaded file: {in_filename}")
|
| 108 |
|
| 109 |
+
ans, all_text = process(language, repo_id, add_punctuation, in_filename)
|
| 110 |
+
return (in_filename, ans[0]), ans[0], ans[1], ans[2], all_text
|
| 111 |
|
| 112 |
|
| 113 |
def process_uploaded_audio_file(
|
|
|
|
| 142 |
else:
|
| 143 |
punct = None
|
| 144 |
|
| 145 |
+
result, all_text = decode(recognizer, vad, punct, in_filename)
|
| 146 |
logging.info(result)
|
| 147 |
+
logging.info(all_text)
|
| 148 |
|
| 149 |
srt_filename = Path(in_filename).with_suffix(".srt")
|
| 150 |
with open(srt_filename, "w", encoding="utf-8") as f:
|
|
|
|
| 157 |
str(srt_filename),
|
| 158 |
build_html_output("Done! Please download the SRT file", "result_item_success"),
|
| 159 |
result,
|
| 160 |
+
all_text,
|
| 161 |
)
|
| 162 |
|
| 163 |
|
|
|
|
| 207 |
|
| 208 |
output_info_video = gr.HTML(label="Info")
|
| 209 |
output_textbox_video = gr.Textbox(
|
| 210 |
+
label="Recognized speech from uploaded video file (srt format)"
|
| 211 |
+
)
|
| 212 |
+
all_output_textbox_video = gr.Textbox(
|
| 213 |
+
label="Recognized speech from uploaded video file (all in one)"
|
| 214 |
)
|
| 215 |
|
| 216 |
with gr.TabItem("Upload audio from disk"):
|
|
|
|
| 227 |
|
| 228 |
output_info_audio = gr.HTML(label="Info")
|
| 229 |
output_textbox_audio = gr.Textbox(
|
| 230 |
+
label="Recognized speech from uploaded audio file (srt format)"
|
| 231 |
+
)
|
| 232 |
+
all_output_textbox_audio = gr.Textbox(
|
| 233 |
+
label="Recognized speech from uploaded audio file (all in one)"
|
| 234 |
)
|
| 235 |
|
| 236 |
upload_video_button.click(
|
|
|
|
| 246 |
output_srt_file_video,
|
| 247 |
output_info_video,
|
| 248 |
output_textbox_video,
|
| 249 |
+
all_output_textbox_video,
|
| 250 |
],
|
| 251 |
)
|
| 252 |
|
|
|
|
| 262 |
output_srt_file_audio,
|
| 263 |
output_info_audio,
|
| 264 |
output_textbox_audio,
|
| 265 |
+
all_output_textbox_audio,
|
| 266 |
],
|
| 267 |
)
|
| 268 |
|
decode.py
CHANGED
|
@@ -81,6 +81,8 @@ def decode(
|
|
| 81 |
|
| 82 |
logging.info("Started!")
|
| 83 |
|
|
|
|
|
|
|
| 84 |
while True:
|
| 85 |
# *2 because int16_t has two bytes
|
| 86 |
data = process.stdout.read(frames_per_read * 2)
|
|
@@ -116,8 +118,17 @@ def decode(
|
|
| 116 |
|
| 117 |
for seg, stream in zip(segments, streams):
|
| 118 |
seg.text = stream.result.text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 119 |
if punct is not None:
|
| 120 |
seg.text = punct.add_punctuation(seg.text)
|
| 121 |
segment_list.append(seg)
|
|
|
|
|
|
|
|
|
|
| 122 |
|
| 123 |
-
return "\n\n".join(f"{i}\n{seg}" for i, seg in enumerate(segment_list, 1))
|
|
|
|
| 81 |
|
| 82 |
logging.info("Started!")
|
| 83 |
|
| 84 |
+
all_text = []
|
| 85 |
+
|
| 86 |
while True:
|
| 87 |
# *2 because int16_t has two bytes
|
| 88 |
data = process.stdout.read(frames_per_read * 2)
|
|
|
|
| 118 |
|
| 119 |
for seg, stream in zip(segments, streams):
|
| 120 |
seg.text = stream.result.text.strip()
|
| 121 |
+
if not all_text:
|
| 122 |
+
all_text.append(seg.text)
|
| 123 |
+
elif len(all_text[-1][0].encode()) == 1 and len(seg.text[0].encode()) == 1:
|
| 124 |
+
all_text.append(" ")
|
| 125 |
+
all_text.append(seg.text)
|
| 126 |
+
|
| 127 |
if punct is not None:
|
| 128 |
seg.text = punct.add_punctuation(seg.text)
|
| 129 |
segment_list.append(seg)
|
| 130 |
+
all_text = " ".join(all_text)
|
| 131 |
+
if punct is not None:
|
| 132 |
+
all_text = punct.add_punctuation(all_text)
|
| 133 |
|
| 134 |
+
return "\n\n".join(f"{i}\n{seg}" for i, seg in enumerate(segment_list, 1)), all_text
|