Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
|
@@ -178,6 +178,7 @@ Your response should adapt to the norms and customs of the respective language a
|
|
| 178 |
# ============ CONSTANT ============
|
| 179 |
# https://github.com/gradio-app/gradio/issues/884
|
| 180 |
MODEL_NAME = "SeaLLM-13B"
|
|
|
|
| 181 |
|
| 182 |
MODEL_TITLE = """
|
| 183 |
<div class="container" style="
|
|
@@ -231,21 +232,24 @@ MODEL_TITLE = """
|
|
| 231 |
# </span>
|
| 232 |
# """.strip()
|
| 233 |
|
| 234 |
-
|
|
|
|
|
|
|
|
|
|
| 235 |
<div style='display:flex; gap: 0.25rem; '>
|
| 236 |
<a href='https://github.com/SeaLLMs/SeaLLMs'><img src='https://img.shields.io/badge/Github-Code-success'></a>
|
| 237 |
<a href='https://huggingface.co/spaces/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
|
| 238 |
<a href='https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue'></a>
|
| 239 |
</div>
|
| 240 |
<span style="font-size: larger">
|
| 241 |
-
|
| 242 |
-
Explore <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">our article</a> for more
|
| 243 |
</span>
|
| 244 |
<br>
|
| 245 |
<span>
|
| 246 |
-
<span style="color: red">NOTE
|
| 247 |
-
By using our service, you are required to
|
| 248 |
-
not to use our service to generate any harmful, inappropriate or
|
| 249 |
The service collects user dialogue data for testing and performance improvement, and reserves the right to distribute it under
|
| 250 |
<a href="https://creativecommons.org/licenses/by/4.0/">(CC-BY)</a> or similar license. So do not enter any personal information!
|
| 251 |
</span>
|
|
@@ -731,17 +735,6 @@ def llama_chat_sys_input_seq_constructor(text, sys_prompt=SYSTEM_PROMPT_1, bos_t
|
|
| 731 |
return f"{bos_token}{B_INST} {B_SYS} {sys_prompt} {E_SYS} {text} {E_INST}"
|
| 732 |
|
| 733 |
|
| 734 |
-
def few_shot_prompt(
|
| 735 |
-
message: str,
|
| 736 |
-
history: List[Tuple[str, str]],
|
| 737 |
-
sys_prompt=SYSTEM_PROMPT_1,
|
| 738 |
-
bos_token=BOS_TOKEN,
|
| 739 |
-
eos_token=EOS_TOKEN,
|
| 740 |
-
include_end_instruct=True,
|
| 741 |
-
):
|
| 742 |
-
return f"{bos_token} {message}"
|
| 743 |
-
|
| 744 |
-
|
| 745 |
def llama_chat_multiturn_sys_input_seq_constructor(
|
| 746 |
message: str,
|
| 747 |
history: List[Tuple[str, str]],
|
|
@@ -1572,10 +1565,9 @@ def batch_inference(
|
|
| 1572 |
prompt_format_fn = llama_chat_multiturn_sys_input_seq_constructor
|
| 1573 |
elif prompt_mode == 'few-shot':
|
| 1574 |
from functools import partial
|
| 1575 |
-
|
| 1576 |
-
|
| 1577 |
-
|
| 1578 |
-
prompt_format_fn = few_shot_prompt
|
| 1579 |
else:
|
| 1580 |
raise gr.Error(f'Wrong mode {prompt_mode}')
|
| 1581 |
|
|
@@ -1607,7 +1599,6 @@ def batch_inference(
|
|
| 1607 |
for res, item in zip(responses, all_items):
|
| 1608 |
item['response'] = res
|
| 1609 |
|
| 1610 |
-
# save_path = "/mnt/workspace/workgroup/phi/test.json"
|
| 1611 |
save_path = BATCH_INFER_SAVE_TMP_FILE
|
| 1612 |
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
| 1613 |
with open(save_path, 'w', encoding='utf-8') as f:
|
|
@@ -1629,6 +1620,15 @@ each item has `prompt` key. We put guardrails to enhance safety, so do not input
|
|
| 1629 |
```
|
| 1630 |
"""
|
| 1631 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1632 |
|
| 1633 |
def launch():
|
| 1634 |
global demo, llm, DEBUG, LOG_FILE
|
|
@@ -1701,7 +1701,7 @@ def launch():
|
|
| 1701 |
|
| 1702 |
if QUANTIZATION == 'awq':
|
| 1703 |
print(F'Load model in int4 quantization')
|
| 1704 |
-
llm = LLM(model=model_path, dtype=
|
| 1705 |
else:
|
| 1706 |
llm = LLM(model=model_path, dtype=dtype, tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization)
|
| 1707 |
|
|
@@ -1751,7 +1751,7 @@ def launch():
|
|
| 1751 |
["upload_chat.json", "chat", 0.2, 1024, 0.5, 0, "[STOP],[END],<s>,</s>"],
|
| 1752 |
["upload_few_shot.json", "few-shot", 0.2, 128, 0.5, 0, "[STOP],[END],<s>,</s>,\\n"]
|
| 1753 |
],
|
| 1754 |
-
cache_examples=
|
| 1755 |
)
|
| 1756 |
|
| 1757 |
demo_chat = gr.ChatInterface(
|
|
@@ -1765,7 +1765,7 @@ def launch():
|
|
| 1765 |
],
|
| 1766 |
show_copy_button=True,
|
| 1767 |
),
|
| 1768 |
-
textbox=gr.Textbox(placeholder='Type message', lines=
|
| 1769 |
submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
|
| 1770 |
# ! consider preventing the stop button
|
| 1771 |
# stop_btn=None,
|
|
@@ -1780,26 +1780,42 @@ def launch():
|
|
| 1780 |
# ! Remove the system prompt textbox to avoid jailbreaking
|
| 1781 |
# gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
|
| 1782 |
],
|
|
|
|
|
|
|
| 1783 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1784 |
demo = CustomTabbedInterface(
|
| 1785 |
interface_list=[demo_chat, demo_file_upload],
|
| 1786 |
tab_names=["Chat Interface", "Batch Inference"],
|
| 1787 |
title=f"{model_title}",
|
| 1788 |
-
description=
|
| 1789 |
)
|
| 1790 |
demo.title = MODEL_NAME
|
|
|
|
| 1791 |
with demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1792 |
gr.Markdown(cite_markdown)
|
| 1793 |
-
if DISPLAY_MODEL_PATH:
|
| 1794 |
-
|
| 1795 |
|
| 1796 |
if ENABLE_AGREE_POPUP:
|
| 1797 |
demo.load(None, None, None, _js=AGREE_POP_SCRIPTS)
|
| 1798 |
|
| 1799 |
-
|
| 1800 |
demo.queue()
|
| 1801 |
demo.launch(server_port=PORT)
|
| 1802 |
else:
|
|
|
|
|
|
|
|
|
|
| 1803 |
demo = gr.ChatInterface(
|
| 1804 |
response_fn,
|
| 1805 |
chatbot=ChatBot(
|
|
@@ -1811,12 +1827,12 @@ def launch():
|
|
| 1811 |
],
|
| 1812 |
show_copy_button=True,
|
| 1813 |
),
|
| 1814 |
-
textbox=gr.Textbox(placeholder='Type message', lines=
|
| 1815 |
submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
|
| 1816 |
# ! consider preventing the stop button
|
| 1817 |
# stop_btn=None,
|
| 1818 |
title=f"{model_title}",
|
| 1819 |
-
description=
|
| 1820 |
additional_inputs=[
|
| 1821 |
gr.Number(value=temperature, label='Temperature (higher -> more random)'),
|
| 1822 |
gr.Number(value=max_tokens, label='Max generated tokens (increase if want more generation)'),
|
|
@@ -1826,6 +1842,8 @@ def launch():
|
|
| 1826 |
# ! Remove the system prompt textbox to avoid jailbreaking
|
| 1827 |
# gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
|
| 1828 |
],
|
|
|
|
|
|
|
| 1829 |
)
|
| 1830 |
demo.title = MODEL_NAME
|
| 1831 |
with demo:
|
|
|
|
| 178 |
# ============ CONSTANT ============
|
| 179 |
# https://github.com/gradio-app/gradio/issues/884
|
| 180 |
MODEL_NAME = "SeaLLM-13B"
|
| 181 |
+
MODEL_NAME = str(os.environ.get("MODEL_NAME", "SeaLLM-13B"))
|
| 182 |
|
| 183 |
MODEL_TITLE = """
|
| 184 |
<div class="container" style="
|
|
|
|
| 232 |
# </span>
|
| 233 |
# """.strip()
|
| 234 |
|
| 235 |
+
# <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">SeaLLM-13B-Chat</a> - a helpful chatbot assistant for Southeast Asian Languages. It supports English 🇬🇧, Vietnamese 🇻🇳, Indonesian 🇮🇩, Thai 🇹🇭, Malay 🇲🇾, Khmer🇰🇭, Lao🇱🇦, Tagalog🇵🇭 and Burmese🇲🇲.
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
MODEL_DESC = f"""
|
| 239 |
<div style='display:flex; gap: 0.25rem; '>
|
| 240 |
<a href='https://github.com/SeaLLMs/SeaLLMs'><img src='https://img.shields.io/badge/Github-Code-success'></a>
|
| 241 |
<a href='https://huggingface.co/spaces/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
|
| 242 |
<a href='https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue'></a>
|
| 243 |
</div>
|
| 244 |
<span style="font-size: larger">
|
| 245 |
+
<a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">{MODEL_NAME}</a> - a helpful assistant for Southeast Asian Languages. It supports English 🇬🇧, Vietnamese 🇻🇳, Indonesian 🇮🇩, Thai 🇹🇭, Malay 🇲🇾, Khmer🇰🇭, Lao🇱🇦, Tagalog🇵🇭 and Burmese🇲🇲.
|
| 246 |
+
Explore <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">our article</a> for more.
|
| 247 |
</span>
|
| 248 |
<br>
|
| 249 |
<span>
|
| 250 |
+
<span style="color: red">NOTE: The chatbot may produce false and harmful content and does not have up-to-date knowledge.</span>
|
| 251 |
+
By using our service, you are required to agree to our <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b/blob/main/LICENSE" target="_blank" style="color: red">Terms Of Use</a>, which includes
|
| 252 |
+
not to use our service to generate any harmful, inappropriate or illegal content that violates local and international laws.
|
| 253 |
The service collects user dialogue data for testing and performance improvement, and reserves the right to distribute it under
|
| 254 |
<a href="https://creativecommons.org/licenses/by/4.0/">(CC-BY)</a> or similar license. So do not enter any personal information!
|
| 255 |
</span>
|
|
|
|
| 735 |
return f"{bos_token}{B_INST} {B_SYS} {sys_prompt} {E_SYS} {text} {E_INST}"
|
| 736 |
|
| 737 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 738 |
def llama_chat_multiturn_sys_input_seq_constructor(
|
| 739 |
message: str,
|
| 740 |
history: List[Tuple[str, str]],
|
|
|
|
| 1565 |
prompt_format_fn = llama_chat_multiturn_sys_input_seq_constructor
|
| 1566 |
elif prompt_mode == 'few-shot':
|
| 1567 |
from functools import partial
|
| 1568 |
+
prompt_format_fn = partial(
|
| 1569 |
+
llama_chat_multiturn_sys_input_seq_constructor, include_end_instruct=False
|
| 1570 |
+
)
|
|
|
|
| 1571 |
else:
|
| 1572 |
raise gr.Error(f'Wrong mode {prompt_mode}')
|
| 1573 |
|
|
|
|
| 1599 |
for res, item in zip(responses, all_items):
|
| 1600 |
item['response'] = res
|
| 1601 |
|
|
|
|
| 1602 |
save_path = BATCH_INFER_SAVE_TMP_FILE
|
| 1603 |
os.makedirs(os.path.dirname(save_path), exist_ok=True)
|
| 1604 |
with open(save_path, 'w', encoding='utf-8') as f:
|
|
|
|
| 1620 |
```
|
| 1621 |
"""
|
| 1622 |
|
| 1623 |
+
CHAT_EXAMPLES = [
|
| 1624 |
+
["Hãy giải thích thuyết tương đối rộng."],
|
| 1625 |
+
["Tolong bantu saya menulis email ke lembaga pemerintah untuk mencari dukungan finansial untuk penelitian AI."],
|
| 1626 |
+
["ຂໍແຈ້ງ 5 ສະຖານທີ່ທ່ອງທ່ຽວໃນນະຄອນຫຼວງວຽງຈັນ"],
|
| 1627 |
+
]
|
| 1628 |
+
|
| 1629 |
+
|
| 1630 |
+
# performance items
|
| 1631 |
+
|
| 1632 |
|
| 1633 |
def launch():
|
| 1634 |
global demo, llm, DEBUG, LOG_FILE
|
|
|
|
| 1701 |
|
| 1702 |
if QUANTIZATION == 'awq':
|
| 1703 |
print(F'Load model in int4 quantization')
|
| 1704 |
+
llm = LLM(model=model_path, dtype="float16", tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization, quantization="awq")
|
| 1705 |
else:
|
| 1706 |
llm = LLM(model=model_path, dtype=dtype, tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization)
|
| 1707 |
|
|
|
|
| 1751 |
["upload_chat.json", "chat", 0.2, 1024, 0.5, 0, "[STOP],[END],<s>,</s>"],
|
| 1752 |
["upload_few_shot.json", "few-shot", 0.2, 128, 0.5, 0, "[STOP],[END],<s>,</s>,\\n"]
|
| 1753 |
],
|
| 1754 |
+
# cache_examples=True,
|
| 1755 |
)
|
| 1756 |
|
| 1757 |
demo_chat = gr.ChatInterface(
|
|
|
|
| 1765 |
],
|
| 1766 |
show_copy_button=True,
|
| 1767 |
),
|
| 1768 |
+
textbox=gr.Textbox(placeholder='Type message', lines=4, max_lines=128, min_width=200),
|
| 1769 |
submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
|
| 1770 |
# ! consider preventing the stop button
|
| 1771 |
# stop_btn=None,
|
|
|
|
| 1780 |
# ! Remove the system prompt textbox to avoid jailbreaking
|
| 1781 |
# gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
|
| 1782 |
],
|
| 1783 |
+
examples=CHAT_EXAMPLES,
|
| 1784 |
+
cache_examples=False
|
| 1785 |
)
|
| 1786 |
+
descriptions = model_desc
|
| 1787 |
+
if DISPLAY_MODEL_PATH:
|
| 1788 |
+
descriptions += f"<br> {path_markdown.format(model_path=model_path)}"
|
| 1789 |
+
|
| 1790 |
demo = CustomTabbedInterface(
|
| 1791 |
interface_list=[demo_chat, demo_file_upload],
|
| 1792 |
tab_names=["Chat Interface", "Batch Inference"],
|
| 1793 |
title=f"{model_title}",
|
| 1794 |
+
description=descriptions,
|
| 1795 |
)
|
| 1796 |
demo.title = MODEL_NAME
|
| 1797 |
+
callback = None
|
| 1798 |
with demo:
|
| 1799 |
+
if DATA_SET_REPO_PATH != "":
|
| 1800 |
+
try:
|
| 1801 |
+
from performance_plot import attach_plot_to_demo
|
| 1802 |
+
attach_plot_to_demo(demo)
|
| 1803 |
+
except Exception as e:
|
| 1804 |
+
print(f'Fail to load DEMO plot: {str(e)}')
|
| 1805 |
+
|
| 1806 |
gr.Markdown(cite_markdown)
|
| 1807 |
+
# if DISPLAY_MODEL_PATH:
|
| 1808 |
+
# gr.Markdown(path_markdown.format(model_path=model_path))
|
| 1809 |
|
| 1810 |
if ENABLE_AGREE_POPUP:
|
| 1811 |
demo.load(None, None, None, _js=AGREE_POP_SCRIPTS)
|
| 1812 |
|
|
|
|
| 1813 |
demo.queue()
|
| 1814 |
demo.launch(server_port=PORT)
|
| 1815 |
else:
|
| 1816 |
+
descriptions = model_desc
|
| 1817 |
+
if DISPLAY_MODEL_PATH:
|
| 1818 |
+
descriptions += f"<br> {path_markdown.format(model_path=model_path)}"
|
| 1819 |
demo = gr.ChatInterface(
|
| 1820 |
response_fn,
|
| 1821 |
chatbot=ChatBot(
|
|
|
|
| 1827 |
],
|
| 1828 |
show_copy_button=True,
|
| 1829 |
),
|
| 1830 |
+
textbox=gr.Textbox(placeholder='Type message', lines=4, max_lines=128, min_width=200),
|
| 1831 |
submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
|
| 1832 |
# ! consider preventing the stop button
|
| 1833 |
# stop_btn=None,
|
| 1834 |
title=f"{model_title}",
|
| 1835 |
+
description=descriptions,
|
| 1836 |
additional_inputs=[
|
| 1837 |
gr.Number(value=temperature, label='Temperature (higher -> more random)'),
|
| 1838 |
gr.Number(value=max_tokens, label='Max generated tokens (increase if want more generation)'),
|
|
|
|
| 1842 |
# ! Remove the system prompt textbox to avoid jailbreaking
|
| 1843 |
# gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
|
| 1844 |
],
|
| 1845 |
+
examples=CHAT_EXAMPLES,
|
| 1846 |
+
cache_examples=False
|
| 1847 |
)
|
| 1848 |
demo.title = MODEL_NAME
|
| 1849 |
with demo:
|