SeaLLM-Chat

Paused

App Files Files Community

nxphi47 commited on Dec 3, 2023

Commit

f38f5d9

1 Parent(s): 5919bed

Update app.py

Browse files

Files changed (1) hide show

app.py +49 -31

app.py CHANGED Viewed

@@ -178,6 +178,7 @@ Your response should adapt to the norms and customs of the respective language a
 # ============ CONSTANT ============
 # https://github.com/gradio-app/gradio/issues/884
 MODEL_NAME = "SeaLLM-13B"
 MODEL_TITLE = """
 <div class="container" style="
@@ -231,21 +232,24 @@ MODEL_TITLE = """
 # </span>
 # """.strip()
-MODEL_DESC = """
 <div style='display:flex; gap: 0.25rem; '>
 <a href='https://github.com/SeaLLMs/SeaLLMs'><img src='https://img.shields.io/badge/Github-Code-success'></a>
 <a href='https://huggingface.co/spaces/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
 <a href='https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue'></a>
 </div>
 <span style="font-size: larger">
-This is <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">SeaLLM-13B-Chat</a> - a chatbot assistant optimized for Southeast Asian Languages. It produces helpful responses in English 🇬🇧, Vietnamese 🇻🇳, Indonesian 🇮🇩 and Thai 🇹🇭.
-Explore <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">our article</a> for more details.
 </span>
 <br>
 <span>
-<span style="color: red">NOTE:</span> The chatbot may produce inaccurate and harmful information.
-By using our service, you are required to <span style="color: red">agree to our <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b/blob/main/LICENSE" target="_blank" style="color: red">Terms Of Use</a>,</span> which includes
-not to use our service to generate any harmful, inappropriate or unethical or illegal content that violates locally applicable and international laws and regulations.
 The service collects user dialogue data for testing and performance improvement, and reserves the right to distribute it under
 <a href="https://creativecommons.org/licenses/by/4.0/">(CC-BY)</a> or similar license. So do not enter any personal information!
 </span>
@@ -731,17 +735,6 @@ def llama_chat_sys_input_seq_constructor(text, sys_prompt=SYSTEM_PROMPT_1, bos_t
     return f"{bos_token}{B_INST} {B_SYS} {sys_prompt} {E_SYS} {text} {E_INST}"
-def few_shot_prompt(
-    message: str,
-    history: List[Tuple[str, str]],
-    sys_prompt=SYSTEM_PROMPT_1,
-    bos_token=BOS_TOKEN,
-    eos_token=EOS_TOKEN,
-    include_end_instruct=True,
-):
-    return f"{bos_token} {message}"
 def llama_chat_multiturn_sys_input_seq_constructor(
     message: str,
     history: List[Tuple[str, str]],
@@ -1572,10 +1565,9 @@ def batch_inference(
         prompt_format_fn = llama_chat_multiturn_sys_input_seq_constructor
     elif prompt_mode == 'few-shot':
         from functools import partial
-        # prompt_format_fn = partial(
-        #     llama_chat_multiturn_sys_input_seq_constructor, include_end_instruct=False
-        # )
-        prompt_format_fn = few_shot_prompt
     else:
         raise gr.Error(f'Wrong mode {prompt_mode}')
@@ -1607,7 +1599,6 @@ def batch_inference(
     for res, item in zip(responses, all_items):
         item['response'] = res
-    # save_path = "/mnt/workspace/workgroup/phi/test.json"
     save_path = BATCH_INFER_SAVE_TMP_FILE
     os.makedirs(os.path.dirname(save_path), exist_ok=True)
     with open(save_path, 'w', encoding='utf-8') as f:
@@ -1629,6 +1620,15 @@ each item has `prompt` key. We put guardrails to enhance safety, so do not input
 ```
 """
 def launch():
     global demo, llm, DEBUG, LOG_FILE
@@ -1701,7 +1701,7 @@ def launch():
         if QUANTIZATION == 'awq':
             print(F'Load model in int4 quantization')
-            llm = LLM(model=model_path, dtype=dtype, tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization, quantization="awq")
         else:
             llm = LLM(model=model_path, dtype=dtype, tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization)
@@ -1751,7 +1751,7 @@ def launch():
                 ["upload_chat.json", "chat", 0.2, 1024, 0.5, 0, "[STOP],[END],<s>,</s>"],
                 ["upload_few_shot.json", "few-shot", 0.2, 128, 0.5, 0, "[STOP],[END],<s>,</s>,\\n"]
             ],
-            cache_examples=False,
         )
         demo_chat = gr.ChatInterface(
@@ -1765,7 +1765,7 @@ def launch():
                 ],
                 show_copy_button=True,
             ),
-            textbox=gr.Textbox(placeholder='Type message', lines=8, max_lines=128, min_width=200),
             submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
             # ! consider preventing the stop button
             # stop_btn=None,
@@ -1780,26 +1780,42 @@ def launch():
                 # ! Remove the system prompt textbox to avoid jailbreaking
                 # gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
             ],
         )
         demo = CustomTabbedInterface(
             interface_list=[demo_chat, demo_file_upload],
             tab_names=["Chat Interface", "Batch Inference"],
             title=f"{model_title}",
-            description=f"{model_desc}",
         )
         demo.title = MODEL_NAME
         with demo:
             gr.Markdown(cite_markdown)
-            if DISPLAY_MODEL_PATH:
-                gr.Markdown(path_markdown.format(model_path=model_path))
             if ENABLE_AGREE_POPUP:
                 demo.load(None, None, None, _js=AGREE_POP_SCRIPTS)
         demo.queue()
         demo.launch(server_port=PORT)
     else:
         demo = gr.ChatInterface(
             response_fn,
             chatbot=ChatBot(
@@ -1811,12 +1827,12 @@ def launch():
                 ],
                 show_copy_button=True,
             ),
-            textbox=gr.Textbox(placeholder='Type message', lines=8, max_lines=128, min_width=200),
             submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
             # ! consider preventing the stop button
             # stop_btn=None,
             title=f"{model_title}",
-            description=f"{model_desc}",
             additional_inputs=[
                 gr.Number(value=temperature, label='Temperature (higher -> more random)'),
                 gr.Number(value=max_tokens, label='Max generated tokens (increase if want more generation)'),
@@ -1826,6 +1842,8 @@ def launch():
                 # ! Remove the system prompt textbox to avoid jailbreaking
                 # gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
             ],
         )
         demo.title = MODEL_NAME
         with demo:

 # ============ CONSTANT ============
 # https://github.com/gradio-app/gradio/issues/884
 MODEL_NAME = "SeaLLM-13B"
+MODEL_NAME = str(os.environ.get("MODEL_NAME", "SeaLLM-13B"))
 MODEL_TITLE = """
 <div class="container" style="
 # </span>
 # """.strip()
+# <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">SeaLLM-13B-Chat</a> - a helpful chatbot assistant for Southeast Asian Languages. It supports English 🇬🇧, Vietnamese 🇻🇳, Indonesian 🇮🇩, Thai 🇹🇭, Malay 🇲🇾, Khmer🇰🇭, Lao🇱🇦, Tagalog🇵🇭 and Burmese🇲🇲.
+MODEL_DESC = f"""
 <div style='display:flex; gap: 0.25rem; '>
 <a href='https://github.com/SeaLLMs/SeaLLMs'><img src='https://img.shields.io/badge/Github-Code-success'></a>
 <a href='https://huggingface.co/spaces/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Spaces-blue'></a>
 <a href='https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b'><img src='https://img.shields.io/badge/%F0%9F%A4%97%20Hugging%20Face-Model-blue'></a>
 </div>
 <span style="font-size: larger">
+<a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">{MODEL_NAME}</a> - a helpful assistant for Southeast Asian Languages. It supports English 🇬🇧, Vietnamese 🇻🇳, Indonesian 🇮🇩, Thai 🇹🇭, Malay 🇲🇾, Khmer🇰🇭, Lao🇱🇦, Tagalog🇵🇭 and Burmese🇲🇲.
+Explore <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b" target="_blank">our article</a> for more.
 </span>
 <br>
 <span>
+<span style="color: red">NOTE: The chatbot may produce false and harmful content and does not have up-to-date knowledge.</span>
+By using our service, you are required to agree to our <a href="https://huggingface.co/SeaLLMs/SeaLLM-Chat-13b/blob/main/LICENSE" target="_blank" style="color: red">Terms Of Use</a>, which includes
+not to use our service to generate any harmful, inappropriate or illegal content that violates local and international laws.
 The service collects user dialogue data for testing and performance improvement, and reserves the right to distribute it under
 <a href="https://creativecommons.org/licenses/by/4.0/">(CC-BY)</a> or similar license. So do not enter any personal information!
 </span>
     return f"{bos_token}{B_INST} {B_SYS} {sys_prompt} {E_SYS} {text} {E_INST}"
 def llama_chat_multiturn_sys_input_seq_constructor(
     message: str,
     history: List[Tuple[str, str]],
         prompt_format_fn = llama_chat_multiturn_sys_input_seq_constructor
     elif prompt_mode == 'few-shot':
         from functools import partial
+        prompt_format_fn = partial(
+            llama_chat_multiturn_sys_input_seq_constructor, include_end_instruct=False
+        )
     else:
         raise gr.Error(f'Wrong mode {prompt_mode}')
     for res, item in zip(responses, all_items):
         item['response'] = res
     save_path = BATCH_INFER_SAVE_TMP_FILE
     os.makedirs(os.path.dirname(save_path), exist_ok=True)
     with open(save_path, 'w', encoding='utf-8') as f:
 ```
 """
+CHAT_EXAMPLES = [
+    ["Hãy giải thích thuyết tương đối rộng."],
+    ["Tolong bantu saya menulis email ke lembaga pemerintah untuk mencari dukungan finansial untuk penelitian AI."],
+    ["ຂໍແຈ້ງ 5 ສະຖານທີ່ທ່ອງທ່ຽວໃນນະຄອນຫຼວງວຽງຈັນ"],
+]
+# performance items
 def launch():
     global demo, llm, DEBUG, LOG_FILE
         if QUANTIZATION == 'awq':
             print(F'Load model in int4 quantization')
+            llm = LLM(model=model_path, dtype="float16", tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization, quantization="awq")
         else:
             llm = LLM(model=model_path, dtype=dtype, tensor_parallel_size=tensor_parallel, gpu_memory_utilization=gpu_memory_utilization)
                 ["upload_chat.json", "chat", 0.2, 1024, 0.5, 0, "[STOP],[END],<s>,</s>"],
                 ["upload_few_shot.json", "few-shot", 0.2, 128, 0.5, 0, "[STOP],[END],<s>,</s>,\\n"]
             ],
+            # cache_examples=True,
         )
         demo_chat = gr.ChatInterface(
                 ],
                 show_copy_button=True,
             ),
+            textbox=gr.Textbox(placeholder='Type message', lines=4, max_lines=128, min_width=200),
             submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
             # ! consider preventing the stop button
             # stop_btn=None,
                 # ! Remove the system prompt textbox to avoid jailbreaking
                 # gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
             ],
+            examples=CHAT_EXAMPLES,
+            cache_examples=False
         )
+        descriptions = model_desc
+        if DISPLAY_MODEL_PATH:
+            descriptions += f"<br> {path_markdown.format(model_path=model_path)}"
         demo = CustomTabbedInterface(
             interface_list=[demo_chat, demo_file_upload],
             tab_names=["Chat Interface", "Batch Inference"],
             title=f"{model_title}",
+            description=descriptions,
         )
         demo.title = MODEL_NAME
+        callback = None
         with demo:
+            if DATA_SET_REPO_PATH != "":
+                try:
+                    from performance_plot import attach_plot_to_demo
+                    attach_plot_to_demo(demo)
+                except Exception as e:
+                    print(f'Fail to load DEMO plot: {str(e)}')
             gr.Markdown(cite_markdown)
+            # if DISPLAY_MODEL_PATH:
+            #     gr.Markdown(path_markdown.format(model_path=model_path))
             if ENABLE_AGREE_POPUP:
                 demo.load(None, None, None, _js=AGREE_POP_SCRIPTS)
         demo.queue()
         demo.launch(server_port=PORT)
     else:
+        descriptions = model_desc
+        if DISPLAY_MODEL_PATH:
+            descriptions += f"<br> {path_markdown.format(model_path=model_path)}"
         demo = gr.ChatInterface(
             response_fn,
             chatbot=ChatBot(
                 ],
                 show_copy_button=True,
             ),
+            textbox=gr.Textbox(placeholder='Type message', lines=4, max_lines=128, min_width=200),
             submit_btn=gr.Button(value='Submit', variant="primary", scale=0),
             # ! consider preventing the stop button
             # stop_btn=None,
             title=f"{model_title}",
+            description=descriptions,
             additional_inputs=[
                 gr.Number(value=temperature, label='Temperature (higher -> more random)'),
                 gr.Number(value=max_tokens, label='Max generated tokens (increase if want more generation)'),
                 # ! Remove the system prompt textbox to avoid jailbreaking
                 # gr.Textbox(value=sys_prompt, label='System prompt', lines=8)
             ],
+            examples=CHAT_EXAMPLES,
+            cache_examples=False
         )
         demo.title = MODEL_NAME
         with demo: