Spaces:
				
			
			
	
			
			
		Runtime error
		
	
	
	
			
			
	
	
	
	
		
		
		Runtime error
		
	app.py
Browse files
    	
        app.py
    ADDED
    
    | @@ -0,0 +1,97 @@ | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | |
|  | 
|  | |
| 1 | 
            +
            from huggingface_hub import InferenceClient
         | 
| 2 | 
            +
            import gradio as gr
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            client = InferenceClient(
         | 
| 5 | 
            +
                "BioMistral/BioMistral-7B"
         | 
| 6 | 
            +
            )
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            def format_prompt(message, history):
         | 
| 9 | 
            +
                prompt = "<s>"
         | 
| 10 | 
            +
                for user_prompt, bot_response in history:
         | 
| 11 | 
            +
                    prompt += f"[INST] {user_prompt} [/INST]"
         | 
| 12 | 
            +
                    prompt += f" {bot_response}</s> "
         | 
| 13 | 
            +
                prompt += f"[INST] {message} [/INST]"
         | 
| 14 | 
            +
                return prompt
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            def generate(
         | 
| 17 | 
            +
                prompt, history, temperature=0.9, max_new_tokens=256, top_p=0.95, repetition_penalty=1.0,
         | 
| 18 | 
            +
            ):
         | 
| 19 | 
            +
                temperature = float(temperature)
         | 
| 20 | 
            +
                if temperature < 1e-2:
         | 
| 21 | 
            +
                    temperature = 1e-2
         | 
| 22 | 
            +
                top_p = float(top_p)
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                generate_kwargs = dict(
         | 
| 25 | 
            +
                    temperature=temperature,
         | 
| 26 | 
            +
                    max_new_tokens=max_new_tokens,
         | 
| 27 | 
            +
                    top_p=top_p,
         | 
| 28 | 
            +
                    repetition_penalty=repetition_penalty,
         | 
| 29 | 
            +
                    do_sample=True,
         | 
| 30 | 
            +
                    seed=42,
         | 
| 31 | 
            +
                )
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                formatted_prompt = format_prompt(prompt, history)
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
         | 
| 36 | 
            +
                output = ""
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                for response in stream:
         | 
| 39 | 
            +
                    output += response.token.text
         | 
| 40 | 
            +
                    yield output
         | 
| 41 | 
            +
                return output
         | 
| 42 | 
            +
             | 
| 43 | 
            +
             | 
| 44 | 
            +
            additional_inputs=[
         | 
| 45 | 
            +
                gr.Slider(
         | 
| 46 | 
            +
                    label="Temperature",
         | 
| 47 | 
            +
                    value=0.9,
         | 
| 48 | 
            +
                    minimum=0.0,
         | 
| 49 | 
            +
                    maximum=1.0,
         | 
| 50 | 
            +
                    step=0.05,
         | 
| 51 | 
            +
                    interactive=True,
         | 
| 52 | 
            +
                    info="Higher values produce more diverse outputs",
         | 
| 53 | 
            +
                ),
         | 
| 54 | 
            +
                gr.Slider(
         | 
| 55 | 
            +
                    label="Max new tokens",
         | 
| 56 | 
            +
                    value=512,
         | 
| 57 | 
            +
                    minimum=0,
         | 
| 58 | 
            +
                    maximum=1048,
         | 
| 59 | 
            +
                    step=64,
         | 
| 60 | 
            +
                    interactive=True,
         | 
| 61 | 
            +
                    info="The maximum numbers of new tokens",
         | 
| 62 | 
            +
                ),
         | 
| 63 | 
            +
                gr.Slider(
         | 
| 64 | 
            +
                    label="Top-p (nucleus sampling)",
         | 
| 65 | 
            +
                    value=0.90,
         | 
| 66 | 
            +
                    minimum=0.0,
         | 
| 67 | 
            +
                    maximum=1,
         | 
| 68 | 
            +
                    step=0.05,
         | 
| 69 | 
            +
                    interactive=True,
         | 
| 70 | 
            +
                    info="Higher values sample more low-probability tokens",
         | 
| 71 | 
            +
                ),
         | 
| 72 | 
            +
                gr.Slider(
         | 
| 73 | 
            +
                    label="Repetition penalty",
         | 
| 74 | 
            +
                    value=1.2,
         | 
| 75 | 
            +
                    minimum=1.0,
         | 
| 76 | 
            +
                    maximum=2.0,
         | 
| 77 | 
            +
                    step=0.05,
         | 
| 78 | 
            +
                    interactive=True,
         | 
| 79 | 
            +
                    info="Penalize repeated tokens",
         | 
| 80 | 
            +
                )
         | 
| 81 | 
            +
            ]
         | 
| 82 | 
            +
             | 
| 83 | 
            +
            # Create a Chatbot object with the desired height
         | 
| 84 | 
            +
            chatbot = gr.Chatbot(height=450,
         | 
| 85 | 
            +
                                 layout="bubble")
         | 
| 86 | 
            +
             | 
| 87 | 
            +
            with gr.Blocks() as demo:
         | 
| 88 | 
            +
                gr.HTML("<h1><center>🤖 Mistral-7B-Chat 💬<h1><center>")
         | 
| 89 | 
            +
                gr.ChatInterface(
         | 
| 90 | 
            +
                    generate,
         | 
| 91 | 
            +
                    chatbot=chatbot,  # Use the created Chatbot object
         | 
| 92 | 
            +
                    additional_inputs=additional_inputs,
         | 
| 93 | 
            +
                    examples=[["Give me the code for Binary Search in C++"], ["Explain the chapter of The Grand Inquistor from The Brothers Karmazov"]],
         | 
| 94 | 
            +
             | 
| 95 | 
            +
                )
         | 
| 96 | 
            +
             | 
| 97 | 
            +
            demo.queue().launch(debug=True)
         | 
