Spaces:

shukdevdattaEX
/

Gemma-3n-Multi-modal-chatbot

Sleeping

App Files Files Community

shukdevdattaEX commited on Jul 19

Commit

49c9e15

verified ·

1 Parent(s): 38b2ece

Update app.py

Browse files

Files changed (1) hide show

app.py +74 -16

app.py CHANGED Viewed

@@ -213,9 +213,8 @@ class MultimodalChatbot:
 def create_interface():
     """Create the Gradio interface"""
-    # Initialize chatbot (you'll need to set your API key)
-    api_key = os.getenv("OPENROUTER_API_KEY", "your_api_key_here")
-    chatbot = MultimodalChatbot(api_key)
     with gr.Blocks(title="Multimodal Chatbot with Gemma 3n", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
@@ -228,9 +227,24 @@ def create_interface():
         - **Images**: Analyze visual content
         - **Video**: Extract frames and analyze video content
-        **Setup**: Set your OpenRouter API key as an environment variable `OPENROUTER_API_KEY`
         """)
         with gr.Row():
             with gr.Column(scale=1):
                 # Input components
@@ -263,7 +277,7 @@ def create_interface():
                     type="filepath"
                 )
-                submit_btn = gr.Button("🚀 Send", variant="primary", size="lg")
                 clear_btn = gr.Button("🗑️ Clear", variant="secondary")
             with gr.Column(scale=2):
@@ -275,16 +289,45 @@ def create_interface():
                 )
         # Event handlers
-        def process_input(text, pdf, audio, image, video, history):
             return chatbot.chat(text, pdf, audio, image, video, history)
         def clear_all():
             return [], "", None, None, None, None
         # Button events
         submit_btn.click(
             process_input,
-            inputs=[text_input, pdf_input, audio_input, image_input, video_input, chatbot_interface],
             outputs=[chatbot_interface, text_input]
         )
@@ -296,18 +339,27 @@ def create_interface():
         # Enter key support
         text_input.submit(
             process_input,
-            inputs=[text_input, pdf_input, audio_input, image_input, video_input, chatbot_interface],
             outputs=[chatbot_interface, text_input]
         )
         # Examples
         gr.Markdown("""
         ### 🎯 Example Usage:
-        - Upload a PDF and ask "Summarize this document"
-        - Upload an image and ask "What do you see in this image?"
-        - Record audio and ask "What did I say?"
-        - Upload a video and ask "Describe what's happening"
-        - Combine multiple inputs: "Compare this image with the PDF content"
         """)
     return demo
@@ -324,11 +376,17 @@ if __name__ == "__main__":
         "numpy"
     ]
     print("Required packages:", ", ".join(required_packages))
-    print("\nTo install: pip install " + " ".join(required_packages))
-    print("\nDon't forget to set your OPENROUTER_API_KEY environment variable!")
     demo = create_interface()
     demo.launch(
-        share=True
     )

 def create_interface():
     """Create the Gradio interface"""
+    # Chatbot will be initialized when API key is provided
+    chatbot = None
     with gr.Blocks(title="Multimodal Chatbot with Gemma 3n", theme=gr.themes.Soft()) as demo:
         gr.Markdown("""
         - **Images**: Analyze visual content
         - **Video**: Extract frames and analyze video content
+        **Setup**: Enter your OpenRouter API key below to get started
         """)
+        # API Key Input Section
+        with gr.Row():
+            with gr.Column():
+                api_key_input = gr.Textbox(
+                    label="🔑 OpenRouter API Key",
+                    placeholder="Enter your OpenRouter API key here...",
+                    type="password",
+                    info="Your API key is not stored and only used for this session"
+                )
+                api_status = gr.Textbox(
+                    label="Connection Status",
+                    value="❌ API Key not provided",
+                    interactive=False
+                )
         with gr.Row():
             with gr.Column(scale=1):
                 # Input components
                     type="filepath"
                 )
+                submit_btn = gr.Button("🚀 Send", variant="primary", size="lg", interactive=False)
                 clear_btn = gr.Button("🗑️ Clear", variant="secondary")
             with gr.Column(scale=2):
                 )
         # Event handlers
+        def validate_api_key(api_key):
+            if not api_key or len(api_key.strip()) == 0:
+                return "❌ API Key not provided", gr.update(interactive=False)
+            try:
+                # Test the API key by creating a client
+                test_client = OpenAI(
+                    base_url="https://openrouter.ai/api/v1",
+                    api_key=api_key.strip(),
+                )
+                return "✅ API Key validated successfully", gr.update(interactive=True)
+            except Exception as e:
+                return f"❌ API Key validation failed: {str(e)}", gr.update(interactive=False)
+        def process_input(api_key, text, pdf, audio, image, video, history):
+            if not api_key or len(api_key.strip()) == 0:
+                if history is None:
+                    history = []
+                history.append(("Error", "❌ Please provide a valid API key first"))
+                return history, ""
+            # Initialize chatbot with the provided API key
+            chatbot = MultimodalChatbot(api_key.strip())
             return chatbot.chat(text, pdf, audio, image, video, history)
         def clear_all():
             return [], "", None, None, None, None
+        # API Key validation
+        api_key_input.change(
+            validate_api_key,
+            inputs=[api_key_input],
+            outputs=[api_status, submit_btn]
+        )
         # Button events
         submit_btn.click(
             process_input,
+            inputs=[api_key_input, text_input, pdf_input, audio_input, image_input, video_input, chatbot_interface],
             outputs=[chatbot_interface, text_input]
         )
         # Enter key support
         text_input.submit(
             process_input,
+            inputs=[api_key_input, text_input, pdf_input, audio_input, image_input, video_input, chatbot_interface],
             outputs=[chatbot_interface, text_input]
         )
         # Examples
         gr.Markdown("""
         ### 🎯 Example Usage:
+        1. **First**: Enter your OpenRouter API key in the field above
+        2. **Then try these examples**:
+           - Upload a PDF and ask "Summarize this document"
+           - Upload an image and ask "What do you see in this image?"
+           - Record audio and ask "What did I say?"
+           - Upload a video and ask "Describe what's happening"
+           - Combine multiple inputs: "Compare this image with the PDF content"
+        ### 🔑 Getting an API Key:
+        1. Go to [OpenRouter.ai](https://openrouter.ai)
+        2. Sign up for an account
+        3. Navigate to the API Keys section
+        4. Create a new API key
+        5. Copy and paste it in the field above
         """)
     return demo
         "numpy"
     ]
+    print("🚀 Multimodal Chatbot with Gemma 3n")
+    print("=" * 50)
     print("Required packages:", ", ".join(required_packages))
+    print("\n📦 To install: pip install " + " ".join(required_packages))
+    print("\n🔑 Get your API key from: https://openrouter.ai")
+    print("💡 Enter your API key in the web interface when it loads")
     demo = create_interface()
     demo.launch(
+        share=True,
+        server_name="0.0.0.0",
+        server_port=7860,
+        debug=True
     )