Spaces:

John6666
/

testvp

Paused

App Files Files Community

John6666 commited on Oct 27, 2024

Commit

aae8857

verified ·

1 Parent(s): a09e5e0

Upload 3 files

Browse files

Files changed (2) hide show

app.py +1 -1
llmdolphin.py +195 -184

app.py CHANGED Viewed

@@ -221,7 +221,7 @@ with gr.Blocks(fill_width=True, elem_id="container", css=css, delete_cache=(60,
     ).success(
         fn=dolphin_respond_auto,
         inputs=[prompt, chatbot],
-        outputs=[chatbot, result],
         queue=True,
         show_progress="full",
         show_api=False,

     ).success(
         fn=dolphin_respond_auto,
         inputs=[prompt, chatbot],
+        outputs=[chatbot, result, prompt],
         queue=True,
         show_progress="full",
         show_api=False,

llmdolphin.py CHANGED Viewed

@@ -976,19 +976,19 @@ def add_dolphin_models(query, format_name):
         if s and  "" in s: s.remove("")
         if len(s) == 1:
             repo = s[0]
-            if not api.repo_exists(repo_id = repo): return gr.update(visible=True)
             files = api.list_repo_files(repo_id = repo)
             for file in files:
                 if str(file).endswith(".gguf"): add_models[filename] = [repo, format]
         elif len(s) >= 2:
             repo = s[0]
             filename = s[1]
-            if not api.repo_exists(repo_id = repo) or not api.file_exists(repo_id = repo, filename = filename): return gr.update(visible=True)
             add_models[filename] = [repo, format]
-        else: return gr.update(visible=True)
     except Exception as e:
         print(e)
-        return gr.update(visible=True)
     llm_models = (llm_models | add_models).copy()
     update_llm_model_tupled_list()
     choices = get_dolphin_models()
@@ -1235,84 +1235,89 @@ def dolphin_respond(
     repeat_penalty: float = 1.1,
     progress=gr.Progress(track_tqdm=True),
 ):
-    progress(0, desc="Processing...")
-    if override_llm_format:
-        chat_template = override_llm_format
-    else:
-        chat_template = llm_models[model][1]
-    llm = Llama(
-        model_path=str(Path(f"{llm_models_dir}/{model}")),
-        flash_attn=True,
-        n_gpu_layers=81, # 81
-        n_batch=1024,
-        n_ctx=8192, #8192
-    )
-    provider = LlamaCppPythonProvider(llm)
-    agent = LlamaCppAgent(
-        provider,
-        system_prompt=f"{system_message}",
-        predefined_messages_formatter_type=chat_template if not isinstance(chat_template, MessagesFormatter) else None,
-        custom_messages_formatter=chat_template if isinstance(chat_template, MessagesFormatter) else None,
-        debug_output=False
-    )
-    settings = provider.get_provider_default_settings()
-    settings.temperature = temperature
-    settings.top_k = top_k
-    settings.top_p = top_p
-    settings.max_tokens = max_tokens
-    settings.repeat_penalty = repeat_penalty
-    settings.stream = True
-    messages = BasicChatHistory()
-    for msn in history:
-        user = {
-            'role': Roles.user,
-            'content': msn[0]
-        }
-        assistant = {
-            'role': Roles.assistant,
-            'content': msn[1]
-        }
-        messages.add_message(user)
-        messages.add_message(assistant)
-    stream = agent.get_chat_response(
-        message,
-        llm_sampling_settings=settings,
-        chat_history=messages,
-        returns_streaming_generator=True,
-        print_output=False
-    )
-    progress(0.5, desc="Processing...")
-    outputs = ""
-    for output in stream:
-        outputs += output
-        yield [(outputs, None)]
 def dolphin_parse(
     history: list[tuple[str, str]],
 ):
-    if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1:
-        return "", gr.update(visible=True), gr.update(visible=True)
     try:
         msg = history[-1][0]
         raw_prompt = get_raw_prompt(msg)
-    except Exception:
-        return "", gr.update(visible=True), gr.update(visible=True)
-    prompts = []
-    if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
-        prompts = list_uniq(jatags_to_danbooru_tags(to_list_ja(raw_prompt)) + ["nsfw", "explicit"])
-    else:
-        prompts = list_uniq(to_list(raw_prompt) + ["nsfw", "explicit"])
-    return ", ".join(prompts), gr.update(interactive=True), gr.update(interactive=True)
 @torch.inference_mode()
@@ -1329,87 +1334,92 @@ def dolphin_respond_auto(
     repeat_penalty: float = 1.1,
     progress=gr.Progress(track_tqdm=True),
 ):
-    #if not is_japanese(message): return [(None, None)]
-    progress(0, desc="Processing...")
-    if override_llm_format:
-        chat_template = override_llm_format
-    else:
-        chat_template = llm_models[model][1]
-    llm = Llama(
-        model_path=str(Path(f"{llm_models_dir}/{model}")),
-        flash_attn=True,
-        n_gpu_layers=81, # 81
-        n_batch=1024,
-        n_ctx=8192, #8192
-    )
-    provider = LlamaCppPythonProvider(llm)
-    agent = LlamaCppAgent(
-        provider,
-        system_prompt=f"{system_message}",
-        predefined_messages_formatter_type=chat_template if not isinstance(chat_template, MessagesFormatter) else None,
-        custom_messages_formatter=chat_template if isinstance(chat_template, MessagesFormatter) else None,
-        debug_output=False
-    )
-    settings = provider.get_provider_default_settings()
-    settings.temperature = temperature
-    settings.top_k = top_k
-    settings.top_p = top_p
-    settings.max_tokens = max_tokens
-    settings.repeat_penalty = repeat_penalty
-    settings.stream = True
-    messages = BasicChatHistory()
-    for msn in history:
-        user = {
-            'role': Roles.user,
-            'content': msn[0]
-        }
-        assistant = {
-            'role': Roles.assistant,
-            'content': msn[1]
-        }
-        messages.add_message(user)
-        messages.add_message(assistant)
-    progress(0, desc="Translating...")
-    stream = agent.get_chat_response(
-        message,
-        llm_sampling_settings=settings,
-        chat_history=messages,
-        returns_streaming_generator=True,
-        print_output=False
-    )
-    progress(0.5, desc="Processing...")
-    outputs = ""
-    for output in stream:
-        outputs += output
-        yield [(outputs, None)], gr.update()
 def dolphin_parse_simple(
     message: str,
     history: list[tuple[str, str]],
 ):
-    #if not is_japanese(message): return message
-    if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1: return message
     try:
         msg = history[-1][0]
         raw_prompt = get_raw_prompt(msg)
-    except Exception:
         return ""
-    prompts = []
-    if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
-        prompts = list_uniq(jatags_to_danbooru_tags(to_list_ja(raw_prompt)) + ["nsfw", "explicit", "rating_explicit"])
-    else:
-        prompts = list_uniq(to_list(raw_prompt) + ["nsfw", "explicit", "rating_explicit"])
-    return ", ".join(prompts)
 # https://huggingface.co/spaces/CaioXapelaum/GGUF-Playground
@@ -1430,47 +1440,47 @@ def respond_playground(
     top_k,
     repeat_penalty,
 ):
-    if override_llm_format:
-        chat_template = override_llm_format
-    else:
-        chat_template = llm_models[model][1]
-    llm = Llama(
-        model_path=str(Path(f"{llm_models_dir}/{model}")),
-        flash_attn=True,
-        n_gpu_layers=81, # 81
-        n_batch=1024,
-        n_ctx=8192, #8192
-    )
-    provider = LlamaCppPythonProvider(llm)
-    agent = LlamaCppAgent(
-        provider,
-        system_prompt=f"{system_message}",
-        predefined_messages_formatter_type=chat_template if not isinstance(chat_template, MessagesFormatter) else None,
-        custom_messages_formatter=chat_template if isinstance(chat_template, MessagesFormatter) else None,
-        debug_output=False
-    )
-    settings = provider.get_provider_default_settings()
-    settings.temperature = temperature
-    settings.top_k = top_k
-    settings.top_p = top_p
-    settings.max_tokens = max_tokens
-    settings.repeat_penalty = repeat_penalty
-    settings.stream = True
-    messages = BasicChatHistory()
-    # Add user and assistant messages to the history
-    for msn in history:
-        user = {'role': Roles.user, 'content': msn[0]}
-        assistant = {'role': Roles.assistant, 'content': msn[1]}
-        messages.add_message(user)
-        messages.add_message(assistant)
-    # Stream the response
     try:
         stream = agent.get_chat_response(
             message,
             llm_sampling_settings=settings,
@@ -1484,4 +1494,5 @@ def respond_playground(
             outputs += output
             yield outputs
     except Exception as e:
-        yield f"Error during response generation: {str(e)}"

         if s and  "" in s: s.remove("")
         if len(s) == 1:
             repo = s[0]
+            if not api.repo_exists(repo_id = repo): return gr.update()
             files = api.list_repo_files(repo_id = repo)
             for file in files:
                 if str(file).endswith(".gguf"): add_models[filename] = [repo, format]
         elif len(s) >= 2:
             repo = s[0]
             filename = s[1]
+            if not api.repo_exists(repo_id = repo) or not api.file_exists(repo_id = repo, filename = filename): return gr.update()
             add_models[filename] = [repo, format]
+        else: return gr.update()
     except Exception as e:
         print(e)
+        return gr.update()
     llm_models = (llm_models | add_models).copy()
     update_llm_model_tupled_list()
     choices = get_dolphin_models()
     repeat_penalty: float = 1.1,
     progress=gr.Progress(track_tqdm=True),
 ):
+    try:
+        progress(0, desc="Processing...")
+        if override_llm_format:
+            chat_template = override_llm_format
+        else:
+            chat_template = llm_models[model][1]
+        llm = Llama(
+            model_path=str(Path(f"{llm_models_dir}/{model}")),
+            flash_attn=True,
+            n_gpu_layers=81, # 81
+            n_batch=1024,
+            n_ctx=8192, #8192
+        )
+        provider = LlamaCppPythonProvider(llm)
+        agent = LlamaCppAgent(
+            provider,
+            system_prompt=f"{system_message}",
+            predefined_messages_formatter_type=chat_template if not isinstance(chat_template, MessagesFormatter) else None,
+            custom_messages_formatter=chat_template if isinstance(chat_template, MessagesFormatter) else None,
+            debug_output=False
+        )
+        settings = provider.get_provider_default_settings()
+        settings.temperature = temperature
+        settings.top_k = top_k
+        settings.top_p = top_p
+        settings.max_tokens = max_tokens
+        settings.repeat_penalty = repeat_penalty
+        settings.stream = True
+        messages = BasicChatHistory()
+        for msn in history:
+            user = {
+                'role': Roles.user,
+                'content': msn[0]
+            }
+            assistant = {
+                'role': Roles.assistant,
+                'content': msn[1]
+            }
+            messages.add_message(user)
+            messages.add_message(assistant)
+        stream = agent.get_chat_response(
+            message,
+            llm_sampling_settings=settings,
+            chat_history=messages,
+            returns_streaming_generator=True,
+            print_output=False
+        )
+        progress(0.5, desc="Processing...")
+        outputs = ""
+        for output in stream:
+            outputs += output
+            yield [(outputs, None)]
+    except Exception as e:
+        print(e)
+        yield [("", None)]
 def dolphin_parse(
     history: list[tuple[str, str]],
 ):
     try:
+        if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1:
+            return "", gr.update(), gr.update()
         msg = history[-1][0]
         raw_prompt = get_raw_prompt(msg)
+        prompts = []
+        if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
+            prompts = list_uniq(jatags_to_danbooru_tags(to_list_ja(raw_prompt)) + ["nsfw", "explicit"])
+        else:
+            prompts = list_uniq(to_list(raw_prompt) + ["nsfw", "explicit"])
+        return ", ".join(prompts), gr.update(interactive=True), gr.update(interactive=True)
+    except Exception as e:
+        print(e)
+        return "", gr.update(), gr.update()
 @torch.inference_mode()
     repeat_penalty: float = 1.1,
     progress=gr.Progress(track_tqdm=True),
 ):
+    try:
+        #if not is_japanese(message): return [(None, None)]
+        progress(0, desc="Processing...")
+        if override_llm_format:
+            chat_template = override_llm_format
+        else:
+            chat_template = llm_models[model][1]
+        llm = Llama(
+            model_path=str(Path(f"{llm_models_dir}/{model}")),
+            flash_attn=True,
+            n_gpu_layers=81, # 81
+            n_batch=1024,
+            n_ctx=8192, #8192
+        )
+        provider = LlamaCppPythonProvider(llm)
+        agent = LlamaCppAgent(
+            provider,
+            system_prompt=f"{system_message}",
+            predefined_messages_formatter_type=chat_template if not isinstance(chat_template, MessagesFormatter) else None,
+            custom_messages_formatter=chat_template if isinstance(chat_template, MessagesFormatter) else None,
+            debug_output=False
+        )
+        settings = provider.get_provider_default_settings()
+        settings.temperature = temperature
+        settings.top_k = top_k
+        settings.top_p = top_p
+        settings.max_tokens = max_tokens
+        settings.repeat_penalty = repeat_penalty
+        settings.stream = True
+        messages = BasicChatHistory()
+        for msn in history:
+            user = {
+                'role': Roles.user,
+                'content': msn[0]
+            }
+            assistant = {
+                'role': Roles.assistant,
+                'content': msn[1]
+            }
+            messages.add_message(user)
+            messages.add_message(assistant)
+        progress(0, desc="Translating...")
+        stream = agent.get_chat_response(
+            message,
+            llm_sampling_settings=settings,
+            chat_history=messages,
+            returns_streaming_generator=True,
+            print_output=False
+        )
+        progress(0.5, desc="Processing...")
+        outputs = ""
+        for output in stream:
+            outputs += output
+            yield [(outputs, None)], gr.update(), gr.update()
+    except Exception as e:
+        print(e)
+        yield [("", None)], gr.update(), gr.update()
 def dolphin_parse_simple(
     message: str,
     history: list[tuple[str, str]],
 ):
     try:
+        #if not is_japanese(message): return message
+        if dolphin_sysprompt_mode == "Chat with LLM" or not history or len(history) < 1: return message
         msg = history[-1][0]
         raw_prompt = get_raw_prompt(msg)
+        prompts = []
+        if dolphin_sysprompt_mode == "Japanese to Danbooru Dictionary" and is_japanese(raw_prompt):
+            prompts = list_uniq(jatags_to_danbooru_tags(to_list_ja(raw_prompt)) + ["nsfw", "explicit", "rating_explicit"])
+        else:
+            prompts = list_uniq(to_list(raw_prompt) + ["nsfw", "explicit", "rating_explicit"])
+        return ", ".join(prompts)
+    except Exception as e:
+        print(e)
         return ""
 # https://huggingface.co/spaces/CaioXapelaum/GGUF-Playground
     top_k,
     repeat_penalty,
 ):
     try:
+        if override_llm_format:
+            chat_template = override_llm_format
+        else:
+            chat_template = llm_models[model][1]
+        llm = Llama(
+            model_path=str(Path(f"{llm_models_dir}/{model}")),
+            flash_attn=True,
+            n_gpu_layers=81, # 81
+            n_batch=1024,
+            n_ctx=8192, #8192
+        )
+        provider = LlamaCppPythonProvider(llm)
+        agent = LlamaCppAgent(
+            provider,
+            system_prompt=f"{system_message}",
+            predefined_messages_formatter_type=chat_template if not isinstance(chat_template, MessagesFormatter) else None,
+            custom_messages_formatter=chat_template if isinstance(chat_template, MessagesFormatter) else None,
+            debug_output=False
+        )
+        settings = provider.get_provider_default_settings()
+        settings.temperature = temperature
+        settings.top_k = top_k
+        settings.top_p = top_p
+        settings.max_tokens = max_tokens
+        settings.repeat_penalty = repeat_penalty
+        settings.stream = True
+        messages = BasicChatHistory()
+        # Add user and assistant messages to the history
+        for msn in history:
+            user = {'role': Roles.user, 'content': msn[0]}
+            assistant = {'role': Roles.assistant, 'content': msn[1]}
+            messages.add_message(user)
+            messages.add_message(assistant)
+        # Stream the response
         stream = agent.get_chat_response(
             message,
             llm_sampling_settings=settings,
             outputs += output
             yield outputs
     except Exception as e:
+        print(e)
+        yield ""