Spaces:
Runtime error
Runtime error
drown0315
commited on
Commit
·
5e3d623
1
Parent(s):
ada1a34
feat: 增加双语字幕
Browse files- decode.py +7 -3
- requirements.txt +3 -0
decode.py
CHANGED
|
@@ -32,6 +32,7 @@ class Segment:
|
|
| 32 |
start: float
|
| 33 |
duration: float
|
| 34 |
text: str = ""
|
|
|
|
| 35 |
|
| 36 |
@property
|
| 37 |
def end(self):
|
|
@@ -44,6 +45,8 @@ class Segment:
|
|
| 44 |
s = s.replace(".", ",")
|
| 45 |
s += "\n"
|
| 46 |
s += self.text
|
|
|
|
|
|
|
| 47 |
return s
|
| 48 |
|
| 49 |
|
|
@@ -124,12 +127,13 @@ def decode(
|
|
| 124 |
|
| 125 |
for seg, stream in zip(segments, streams):
|
| 126 |
en_text = stream.result.text.strip()
|
| 127 |
-
|
| 128 |
-
seg.text = en_text +"\n"+cn_text
|
| 129 |
if len(seg.text) == 0:
|
| 130 |
logging.info("Skip empty segment")
|
| 131 |
continue
|
| 132 |
|
|
|
|
|
|
|
| 133 |
if len(all_text) == 0:
|
| 134 |
all_text.append(seg.text)
|
| 135 |
elif len(all_text[-1][0].encode()) == 1 and len(seg.text[0].encode()) == 1:
|
|
@@ -171,7 +175,7 @@ class LLMTranslator:
|
|
| 171 |
def translate(self, src_text: str) -> str:
|
| 172 |
translated = self._model.generate(**self._tokenizer(src_text, return_tensors="pt", padding=True))
|
| 173 |
res = [self._tokenizer.decode(t, skip_special_tokens=True) for t in translated]
|
| 174 |
-
return res
|
| 175 |
|
| 176 |
|
| 177 |
_llm_translator = LLMTranslator()
|
|
|
|
| 32 |
start: float
|
| 33 |
duration: float
|
| 34 |
text: str = ""
|
| 35 |
+
cn_text: str = ""
|
| 36 |
|
| 37 |
@property
|
| 38 |
def end(self):
|
|
|
|
| 45 |
s = s.replace(".", ",")
|
| 46 |
s += "\n"
|
| 47 |
s += self.text
|
| 48 |
+
s += "\n"
|
| 49 |
+
s += self.cn_text
|
| 50 |
return s
|
| 51 |
|
| 52 |
|
|
|
|
| 127 |
|
| 128 |
for seg, stream in zip(segments, streams):
|
| 129 |
en_text = stream.result.text.strip()
|
| 130 |
+
seg.text = en_text
|
|
|
|
| 131 |
if len(seg.text) == 0:
|
| 132 |
logging.info("Skip empty segment")
|
| 133 |
continue
|
| 134 |
|
| 135 |
+
seg.cn_text = _llm_translator.translate(en_text)
|
| 136 |
+
|
| 137 |
if len(all_text) == 0:
|
| 138 |
all_text.append(seg.text)
|
| 139 |
elif len(all_text[-1][0].encode()) == 1 and len(seg.text[0].encode()) == 1:
|
|
|
|
| 175 |
def translate(self, src_text: str) -> str:
|
| 176 |
translated = self._model.generate(**self._tokenizer(src_text, return_tensors="pt", padding=True))
|
| 177 |
res = [self._tokenizer.decode(t, skip_special_tokens=True) for t in translated]
|
| 178 |
+
return "".join(str(itemText) for itemText in res)
|
| 179 |
|
| 180 |
|
| 181 |
_llm_translator = LLMTranslator()
|
requirements.txt
CHANGED
|
@@ -1,3 +1,6 @@
|
|
| 1 |
#https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/cpu/1.10.29/sherpa_onnx-1.10.29-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
| 2 |
sherpa-onnx>=1.10.35
|
| 3 |
ffmpeg-python
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
#https://huggingface.co/csukuangfj/sherpa-onnx-wheels/resolve/main/cpu/1.10.29/sherpa_onnx-1.10.29-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
| 2 |
sherpa-onnx>=1.10.35
|
| 3 |
ffmpeg-python
|
| 4 |
+
transformers
|
| 5 |
+
sentencepiece
|
| 6 |
+
torch
|