Spaces:
Runtime error
Runtime error
use run inference from Space
Browse files- app.py +16 -12
- lid.176.ftz +3 -0
- lid218e.bin +3 -0
- requirements.txt +1 -0
app.py
CHANGED
|
@@ -1,6 +1,7 @@
|
|
| 1 |
import requests
|
| 2 |
import os
|
| 3 |
|
|
|
|
| 4 |
import gradio as gr
|
| 5 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
| 6 |
import torch
|
|
@@ -11,11 +12,8 @@ When comments are created in the community tab, detect the language of the conte
|
|
| 11 |
Then, if the detected language is different from the user's language, display an option to translate it.
|
| 12 |
"""
|
| 13 |
|
| 14 |
-
|
| 15 |
-
TRANSLATION_API_URL = "https://api-inference.huggingface.co/models/t5-base"
|
| 16 |
-
LANG_ID_API_URL = "https://noe30ht5sav83xm1.us-east-1.aws.endpoints.huggingface.cloud"
|
| 17 |
ACCESS_TOKEN = os.environ.get("ACCESS_TOKEN")
|
| 18 |
-
ACCESS_TOKEN = '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'
|
| 19 |
headers = {"Authorization": f"Bearer {ACCESS_TOKEN}"}
|
| 20 |
|
| 21 |
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
|
|
@@ -32,18 +30,22 @@ language_code_map = {
|
|
| 32 |
"Japanese": "jpn_Jpan"
|
| 33 |
}
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
def translate(text, src_lang, tgt_lang):
|
| 44 |
src_lang_code = language_code_map[src_lang]
|
| 45 |
tgt_lang_code = language_code_map[tgt_lang]
|
| 46 |
-
|
| 47 |
translation_pipeline = pipeline(
|
| 48 |
"translation", model=model, tokenizer=tokenizer, src_lang=src_lang_code, tgt_lang=tgt_lang_code, device=device)
|
| 49 |
result = translation_pipeline(text)
|
|
@@ -55,8 +57,10 @@ def query(text, src_lang, tgt_lang):
|
|
| 55 |
lang_id_response = requests.post(LANG_ID_API_URL, headers=headers, json={
|
| 56 |
"inputs": text, "wait_for_model": True, "use_cache": True})
|
| 57 |
lang_id = lang_id_response.json()[0]
|
|
|
|
|
|
|
| 58 |
|
| 59 |
-
return [
|
| 60 |
|
| 61 |
|
| 62 |
examples = [
|
|
|
|
| 1 |
import requests
|
| 2 |
import os
|
| 3 |
|
| 4 |
+
import fasttext
|
| 5 |
import gradio as gr
|
| 6 |
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
| 7 |
import torch
|
|
|
|
| 12 |
Then, if the detected language is different from the user's language, display an option to translate it.
|
| 13 |
"""
|
| 14 |
|
| 15 |
+
LANG_ID_API_URL = "https://q5esh83u7boq5qwd.us-east-1.aws.endpoints.huggingface.cloud"
|
|
|
|
|
|
|
| 16 |
ACCESS_TOKEN = os.environ.get("ACCESS_TOKEN")
|
|
|
|
| 17 |
headers = {"Authorization": f"Bearer {ACCESS_TOKEN}"}
|
| 18 |
|
| 19 |
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-600M")
|
|
|
|
| 30 |
"Japanese": "jpn_Jpan"
|
| 31 |
}
|
| 32 |
|
| 33 |
+
def identify_language(text):
|
| 34 |
+
model_file = "lid218e.bin"
|
| 35 |
+
model_full_path = os.path.join(os.path.dirname(__file__), model_file)
|
| 36 |
+
model = fasttext.load_model(model_full_path)
|
| 37 |
+
predictions = model.predict(text, k=1) # e.g., (('__label__eng_Latn',), array([0.81148803]))
|
| 38 |
+
|
| 39 |
+
PREFIX_LENGTH = 7 # To strip away '__label__' from language code
|
| 40 |
+
language_code = predictions[0][0][PREFIX_LENGTH:]
|
| 41 |
+
return language_code
|
| 42 |
+
|
| 43 |
|
| 44 |
|
| 45 |
def translate(text, src_lang, tgt_lang):
|
| 46 |
src_lang_code = language_code_map[src_lang]
|
| 47 |
tgt_lang_code = language_code_map[tgt_lang]
|
| 48 |
+
|
| 49 |
translation_pipeline = pipeline(
|
| 50 |
"translation", model=model, tokenizer=tokenizer, src_lang=src_lang_code, tgt_lang=tgt_lang_code, device=device)
|
| 51 |
result = translation_pipeline(text)
|
|
|
|
| 57 |
lang_id_response = requests.post(LANG_ID_API_URL, headers=headers, json={
|
| 58 |
"inputs": text, "wait_for_model": True, "use_cache": True})
|
| 59 |
lang_id = lang_id_response.json()[0]
|
| 60 |
+
|
| 61 |
+
language_code = identify_language(text)
|
| 62 |
|
| 63 |
+
return [language_code, translation]
|
| 64 |
|
| 65 |
|
| 66 |
examples = [
|
lid.176.ftz
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8f3472cfe8738a7b6099e8e999c3cbfae0dcd15696aac7d7738a8039db603e83
|
| 3 |
+
size 938013
|
lid218e.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8ded5749a2ad79ae9ab7c9190c7c8b97ff20d54ad8b9527ffa50107238fc7f6a
|
| 3 |
+
size 1176355829
|
requirements.txt
CHANGED
|
@@ -1,2 +1,3 @@
|
|
|
|
|
| 1 |
torch
|
| 2 |
transformers
|
|
|
|
| 1 |
+
fasttext
|
| 2 |
torch
|
| 3 |
transformers
|