Update app.py
Browse files
app.py
CHANGED
|
@@ -22,7 +22,7 @@ iso1_to_name = {iso[1]: iso[0] for iso in non_empty_isos} # {'ro': 'Romanian', '
|
|
| 22 |
models = ["Helsinki-NLP", "QUICKMT", "Argos", "Google", "HPLT", "t5-base", "t5-small", "t5-large",
|
| 23 |
"utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
|
| 24 |
"Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
|
| 25 |
-
"HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2", "tencent/Hunyuan-MT-7B
|
| 26 |
"openGPT-X/Teuken-7B-instruct-commercial-v0.4", "openGPT-X/Teuken-7B-instruct-v0.6"]
|
| 27 |
allmodels = ["Helsinki-NLP",
|
| 28 |
"Helsinki-NLP/opus-mt-tc-bible-big-mul-mul", "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_nld",
|
|
@@ -144,22 +144,39 @@ class Translators:
|
|
| 144 |
else:
|
| 145 |
prompt = f"Translate the following segment into {self.tl}, without additional explanation.\n\n{self.input_text}."
|
| 146 |
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
| 147 |
-
model = AutoModelForCausalLM.from_pretrained(self.model_name, device_map="auto")
|
| 148 |
-
model.tie_weights() # fp8
|
| 149 |
messages = [{"role": "user", "content": prompt}]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
tokenized_chat = tokenizer.apply_chat_template(
|
| 151 |
messages,
|
| 152 |
tokenize=True,
|
| 153 |
-
add_generation_prompt=
|
| 154 |
-
return_tensors="pt"
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
|
| 164 |
def HelsinkiNLP_mulroa(self):
|
| 165 |
try:
|
|
|
|
| 22 |
models = ["Helsinki-NLP", "QUICKMT", "Argos", "Google", "HPLT", "t5-base", "t5-small", "t5-large",
|
| 23 |
"utter-project/EuroLLM-1.7B", "utter-project/EuroLLM-1.7B-Instruct",
|
| 24 |
"Unbabel/Tower-Plus-2B", "Unbabel/TowerInstruct-7B-v0.2", "Unbabel/TowerInstruct-Mistral-7B-v0.2",
|
| 25 |
+
"HuggingFaceTB/SmolLM3-3B", "winninghealth/WiNGPT-Babel-2", "tencent/Hunyuan-MT-7B",
|
| 26 |
"openGPT-X/Teuken-7B-instruct-commercial-v0.4", "openGPT-X/Teuken-7B-instruct-v0.6"]
|
| 27 |
allmodels = ["Helsinki-NLP",
|
| 28 |
"Helsinki-NLP/opus-mt-tc-bible-big-mul-mul", "Helsinki-NLP/opus-mt-tc-bible-big-mul-deu_eng_nld",
|
|
|
|
| 144 |
else:
|
| 145 |
prompt = f"Translate the following segment into {self.tl}, without additional explanation.\n\n{self.input_text}."
|
| 146 |
tokenizer = AutoTokenizer.from_pretrained(self.model_name)
|
| 147 |
+
model = AutoModelForCausalLM.from_pretrained(self.model_name, device_map="auto", torch_dtype=torch.bfloat16)
|
|
|
|
| 148 |
messages = [{"role": "user", "content": prompt}]
|
| 149 |
+
# tokenized_chat = tokenizer.apply_chat_template(
|
| 150 |
+
# messages,
|
| 151 |
+
# tokenize=True,
|
| 152 |
+
# add_generation_prompt=True,
|
| 153 |
+
# return_tensors="pt"
|
| 154 |
+
# )
|
| 155 |
+
# outputs = model.generate(tokenized_chat.to(model.device), max_new_tokens=512, top_k=20, top_p=0.6, repetition_penalty=1.05, temperature=0.7)
|
| 156 |
+
# output_text = tokenizer.decode(outputs[0])
|
| 157 |
+
# return output_text
|
| 158 |
+
# Tokenize the conversation
|
| 159 |
tokenized_chat = tokenizer.apply_chat_template(
|
| 160 |
messages,
|
| 161 |
tokenize=True,
|
| 162 |
+
add_generation_prompt=True,
|
| 163 |
+
return_tensors="pt"
|
| 164 |
+
)
|
| 165 |
+
# Generate response
|
| 166 |
+
temperature = 0.7
|
| 167 |
+
with torch.no_grad():
|
| 168 |
+
outputs = model.generate(
|
| 169 |
+
tokenized_chat.to(model.device),
|
| 170 |
+
max_new_tokens=512,
|
| 171 |
+
temperature=temperature,
|
| 172 |
+
top_p=0.6,
|
| 173 |
+
do_sample=True if temperature > 0 else False,
|
| 174 |
+
pad_token_id=tokenizer.eos_token_id
|
| 175 |
+
)
|
| 176 |
+
|
| 177 |
+
# Decode only the new tokens
|
| 178 |
+
response = tokenizer.decode(outputs[0][tokenized_chat.shape[-1]:], skip_special_tokens=True)
|
| 179 |
+
return response
|
| 180 |
|
| 181 |
def HelsinkiNLP_mulroa(self):
|
| 182 |
try:
|