Spaces:

bigcode
/

arena

Running

App Files Files Community

terryyz commited on Sep 24

Commit

48a3a23

1 Parent(s): c833d3f

fix

Browse files

Files changed (1) hide show

app.py +144 -11

app.py CHANGED Viewed

@@ -594,7 +594,11 @@ def add_text_and_generate(state0, state1, text, temperature, max_tokens, model_a
     chat_stats_a = f"**Conversation:** {turn_count_a} turns | **Total Messages:** {len(state0['messages']) if state0 else 0}"
     chat_stats_b = f"**Conversation:** {turn_count_b} turns | **Total Messages:** {len(state1['messages']) if state1 else 0}"
-    return state0, state1, chat0, chat1, response0, response1, code0, code1, env0, env1, sandbox_state0, sandbox_state1, sandbox_output0, sandbox_output1, sandbox_component_update0, sandbox_component_update1, chat_stats_a, chat_stats_b, sandbox_view_a, sandbox_view_b
 def format_chat_history(messages):
     """Format messages for chat display with turn numbers"""
@@ -649,6 +653,8 @@ def clear_chat(state0, state1):
         "**Conversation:** 0 turns | **Total Messages:** 0",  # chat_stats_b
         "",    # sandbox_view_a (duplicate)
         "",    # sandbox_view_b (duplicate)
         f"**Model A:** {model_a}",  # model_display_a
         f"**Model B:** {model_b}",  # model_display_b
         "",    # text_input
@@ -828,6 +834,8 @@ def send_to_left_only(state0, state1, text, temperature, max_tokens, model_a, mo
         "",  # sandbox_view_b (empty)
         state0,  # state0_var
         state1,  # state1_var
         text,  # Keep original text input
         f"**Model A:** {model_a}",  # Update model display A
         f"**Model B:** {model_b}",  # Update model display B
@@ -926,6 +934,8 @@ def send_to_right_only(state0, state1, text, temperature, max_tokens, model_a, m
         sandbox_view_b,  # sandbox_view_b
         state0,  # state0_var
         state1,  # state1_var
         text,  # Keep original text input
         f"**Model A:** {model_a}",  # Update model display A
         f"**Model B:** {model_b}",  # Update model display B
@@ -939,6 +949,42 @@ def send_to_right_only(state0, state1, text, temperature, max_tokens, model_a, m
     )
 def run_sandbox_code(sandbox_state: dict, code: str, install_command: str) -> tuple[str, str, str]:
     """Run code in the appropriate sandbox environment"""
     if not code.strip():
@@ -972,7 +1018,6 @@ def run_sandbox_code(sandbox_state: dict, code: str, install_command: str) -> tu
             return result['sandbox_url'], "", result['stderr']
         elif env == SandboxEnvironment.GRADIO:
-            print(f"DEBUG: running gradio sandbox")
             sandbox_url, sandbox_id, stderr = run_gradio_sandbox(code, install_command, sandbox_state.get('sandbox_id'))
             sandbox_state['sandbox_id'] = sandbox_id
             return sandbox_url, "", stderr
@@ -990,7 +1035,6 @@ def run_sandbox_code(sandbox_state: dict, code: str, install_command: str) -> tu
             return sandbox_url, "", stderr
         elif env == SandboxEnvironment.PYTHON_RUNNER:
-            print(f"DEBUG: running python runner")
             output, stderr = run_code_interpreter(code, 'python', install_command)
             return "", output, stderr
@@ -1186,8 +1230,8 @@ def build_ui():
                         # Model A Sandbox
                         with gr.Column():
                             gr.Markdown("### Model A Sandbox")
-                            with gr.Tabs():
-                                with gr.Tab("View"):
                                     sandbox_view_a = gr.Markdown(
                                         "**Sandbox output will appear here automatically**"
                                     )
@@ -1196,19 +1240,29 @@ def build_ui():
                                         label="Model A Sandbox",
                                         visible=False,
                                     )
-                                with gr.Tab("Code"):
                                     code_a = gr.Code(
                                         label="Extracted Code",
                                         language="python",
                                         lines=8,
-                                        interactive=False,
                                     )
                         # Model B Sandbox
                         with gr.Column():
                             gr.Markdown("### Model B Sandbox")
-                            with gr.Tabs():
-                                with gr.Tab("View"):
                                     sandbox_view_b = gr.Markdown(
                                         "**Sandbox output will appear here automatically**"
                                     )
@@ -1217,12 +1271,22 @@ def build_ui():
                                         label="Model B Sandbox",
                                         visible=False,
                                     )
-                                with gr.Tab("Code"):
                                     code_b = gr.Code(
                                         label="Extracted Code",
                                         language="python",
                                         lines=8,
-                                        interactive=False,
                                     )
                 # Vote UI components
@@ -1519,6 +1583,8 @@ def build_ui():
                 result[19] if len(result) > 19 else "",  # sandbox_view_b
                 new_state0,  # state0_var
                 new_state1,  # state1_var
                 text,  # Keep original text input
                 f"**Model A:** {model_a}",  # Update model display A
                 f"**Model B:** {model_b}",  # Update model display B
@@ -1563,6 +1629,8 @@ def build_ui():
                 sandbox_view_b,  # Sandbox view for model B
                 state0_var,  # Updated state for run button A
                 state1_var,  # Updated state for run button B
                 text_input,  # Clear the text input after sending
                 model_display_a,  # Update model display A
                 model_display_b,  # Update model display B
@@ -1609,6 +1677,8 @@ def build_ui():
                 sandbox_view_b,  # Sandbox view for model B
                 state0_var,  # Updated state for run button A
                 state1_var,  # Updated state for run button B
                 text_input,  # Clear the text input after sending
                 model_display_a,  # Update model display A
                 model_display_b,  # Update model display B
@@ -1652,6 +1722,8 @@ def build_ui():
                 sandbox_view_b,  # Sandbox view for model B
                 state0_var,  # Updated state for run button A
                 state1_var,  # Updated state for run button B
                 text_input,  # Clear the text input after sending
                 model_display_a,  # Update model display A
                 model_display_b,  # Update model display B
@@ -1698,6 +1770,8 @@ def build_ui():
                 sandbox_view_b,  # Sandbox view for model B
                 state0_var,  # Updated state for run button A
                 state1_var,  # Updated state for run button B
                 text_input,  # Clear the text input after sending
                 model_display_a,  # Update model display A
                 model_display_b,  # Update model display B
@@ -1744,6 +1818,8 @@ def build_ui():
                 sandbox_view_b,  # Sandbox view for model B
                 state0_var,  # Updated state for run button A
                 state1_var,  # Updated state for run button B
                 text_input,  # Clear the text input after sending
                 model_display_a,  # Update model display A
                 model_display_b,  # Update model display B
@@ -1779,6 +1855,10 @@ def build_ui():
                 chat_stats_b,    # Reset conversation statistics for model B
                 sandbox_view_a,  # Reset sandbox view for model A
                 sandbox_view_b,  # Reset sandbox view for model B
                 model_display_a, # Reset model display A
                 model_display_b, # Reset model display B
                 text_input,      # Clear text input
@@ -1906,6 +1986,59 @@ def build_ui():
                 "",  # Clear text_input to preserve examples
             )
         # Vote button click handlers
         for vote_btn, vote_type in [
             (vote_left_btn, "left"),

     chat_stats_a = f"**Conversation:** {turn_count_a} turns | **Total Messages:** {len(state0['messages']) if state0 else 0}"
     chat_stats_b = f"**Conversation:** {turn_count_b} turns | **Total Messages:** {len(state1['messages']) if state1 else 0}"
+    # Get install commands from sandbox states
+    install_command0 = sandbox_state0.get('install_command', '') if sandbox_state0 else ''
+    install_command1 = sandbox_state1.get('install_command', '') if sandbox_state1 else ''
+    return state0, state1, chat0, chat1, response0, response1, code0, code1, env0, env1, sandbox_state0, sandbox_state1, sandbox_output0, sandbox_output1, sandbox_component_update0, sandbox_component_update1, chat_stats_a, chat_stats_b, sandbox_view_a, sandbox_view_b, install_command0, install_command1
 def format_chat_history(messages):
     """Format messages for chat display with turn numbers"""
         "**Conversation:** 0 turns | **Total Messages:** 0",  # chat_stats_b
         "",    # sandbox_view_a (duplicate)
         "",    # sandbox_view_b (duplicate)
+        "",    # install_command_a
+        "",    # install_command_b
         f"**Model A:** {model_a}",  # model_display_a
         f"**Model B:** {model_b}",  # model_display_b
         "",    # text_input
         "",  # sandbox_view_b (empty)
         state0,  # state0_var
         state1,  # state1_var
+        state0.get('install_command', ''),  # state0_install_command
+        state1.get('install_command', ''),  # state1_install_command
         text,  # Keep original text input
         f"**Model A:** {model_a}",  # Update model display A
         f"**Model B:** {model_b}",  # Update model display B
         sandbox_view_b,  # sandbox_view_b
         state0,  # state0_var
         state1,  # state1_var
+        state0.get('install_command', ''),  # state0_install_command
+        state1.get('install_command', ''),  # state1_install_command
         text,  # Keep original text input
         f"**Model A:** {model_a}",  # Update model display A
         f"**Model B:** {model_b}",  # Update model display B
     )
+def rerun_code_execution(state, current_code: str, current_install_command: str, model_name: str) -> tuple[dict, str, str, str]:
+    """Re-run code execution for a specific model using the current code and install command from the UI components"""
+    if not state or not state.get("sandbox_state"):
+        return state, "", "", ""
+    sandbox_state = state["sandbox_state"]
+    if not current_code.strip():
+        return state, "", "", "No code to re-run"
+    # Update the sandbox state with the current code and install command
+    sandbox_state['code_to_execute'] = current_code
+    sandbox_state['install_command'] = current_install_command
+    # Re-run the code execution with the updated code and install command
+    sandbox_url, sandbox_output, sandbox_error = run_sandbox_code(sandbox_state, current_code, current_install_command)
+    # Update sandbox view with new output
+    sandbox_view = ""
+    if sandbox_output:
+        sandbox_view += sandbox_output
+    if sandbox_error:
+        sandbox_view = f"<details closed><summary><strong>🚨 Errors/Warnings</strong></summary>\n\n```\n{sandbox_error}\n```\n\n</details>\n\n" + sandbox_view
+    # Update sandbox component if we have a URL
+    sandbox_component_update = None
+    if sandbox_url:
+        sandbox_component_update = gr.update(
+            value=(sandbox_url, True, []),
+            visible=True,
+            key=f"sandbox_{model_name.lower()}_{int(time.time() * 1000)}"  # Unique key to force refresh
+        )
+    return state, sandbox_view, sandbox_component_update, sandbox_error
 def run_sandbox_code(sandbox_state: dict, code: str, install_command: str) -> tuple[str, str, str]:
     """Run code in the appropriate sandbox environment"""
     if not code.strip():
             return result['sandbox_url'], "", result['stderr']
         elif env == SandboxEnvironment.GRADIO:
             sandbox_url, sandbox_id, stderr = run_gradio_sandbox(code, install_command, sandbox_state.get('sandbox_id'))
             sandbox_state['sandbox_id'] = sandbox_id
             return sandbox_url, "", stderr
             return sandbox_url, "", stderr
         elif env == SandboxEnvironment.PYTHON_RUNNER:
             output, stderr = run_code_interpreter(code, 'python', install_command)
             return "", output, stderr
                         # Model A Sandbox
                         with gr.Column():
                             gr.Markdown("### Model A Sandbox")
+                            with gr.Tabs() as tabs_a:
+                                with gr.Tab("View", id=0):
                                     sandbox_view_a = gr.Markdown(
                                         "**Sandbox output will appear here automatically**"
                                     )
                                         label="Model A Sandbox",
                                         visible=False,
                                     )
+                                with gr.Tab("Code", id=1):
                                     code_a = gr.Code(
                                         label="Extracted Code",
                                         language="python",
                                         lines=8,
+                                        interactive=True,
+                                    )
+                                    install_command_a = gr.Textbox(
+                                        label="Install Command",
+                                        placeholder="bash command to install dependencies",
+                                        interactive=True,
+                                        lines=1,
+                                    )
+                                    rerun_code_a_btn = gr.Button(
+                                        "🔄 Re-run Code",
+                                        variant="huggingface",
                                     )
                         # Model B Sandbox
                         with gr.Column():
                             gr.Markdown("### Model B Sandbox")
+                            with gr.Tabs() as tabs_b:
+                                with gr.Tab("View", id=2):
                                     sandbox_view_b = gr.Markdown(
                                         "**Sandbox output will appear here automatically**"
                                     )
                                         label="Model B Sandbox",
                                         visible=False,
                                     )
+                                with gr.Tab("Code", id=3):
                                     code_b = gr.Code(
                                         label="Extracted Code",
                                         language="python",
                                         lines=8,
+                                        interactive=True,
+                                    )
+                                    install_command_b = gr.Textbox(
+                                        label="Install Command",
+                                        placeholder="bash command to install dependencies",
+                                        interactive=True,
+                                        lines=1,
+                                    )
+                                    rerun_code_b_btn = gr.Button(
+                                        "🔄 Re-run Code",
+                                        size="huggingface"
                                     )
                 # Vote UI components
                 result[19] if len(result) > 19 else "",  # sandbox_view_b
                 new_state0,  # state0_var
                 new_state1,  # state1_var
+                new_state0.get('install_command', ''),  # state0_install_command
+                new_state1.get('install_command', ''),  # state1_install_command
                 text,  # Keep original text input
                 f"**Model A:** {model_a}",  # Update model display A
                 f"**Model B:** {model_b}",  # Update model display B
                 sandbox_view_b,  # Sandbox view for model B
                 state0_var,  # Updated state for run button A
                 state1_var,  # Updated state for run button B
+                install_command_a,  # Install command for model A
+                install_command_b,  # Install command for model B
                 text_input,  # Clear the text input after sending
                 model_display_a,  # Update model display A
                 model_display_b,  # Update model display B
                 sandbox_view_b,  # Sandbox view for model B
                 state0_var,  # Updated state for run button A
                 state1_var,  # Updated state for run button B
+                install_command_a,  # Install command for model A
+                install_command_b,  # Install command for model B
                 text_input,  # Clear the text input after sending
                 model_display_a,  # Update model display A
                 model_display_b,  # Update model display B
                 sandbox_view_b,  # Sandbox view for model B
                 state0_var,  # Updated state for run button A
                 state1_var,  # Updated state for run button B
+                install_command_a,  # Install command for model A
+                install_command_b,  # Install command for model B
                 text_input,  # Clear the text input after sending
                 model_display_a,  # Update model display A
                 model_display_b,  # Update model display B
                 sandbox_view_b,  # Sandbox view for model B
                 state0_var,  # Updated state for run button A
                 state1_var,  # Updated state for run button B
+                install_command_a,  # Install command for model A
+                install_command_b,  # Install command for model B
                 text_input,  # Clear the text input after sending
                 model_display_a,  # Update model display A
                 model_display_b,  # Update model display B
                 sandbox_view_b,  # Sandbox view for model B
                 state0_var,  # Updated state for run button A
                 state1_var,  # Updated state for run button B
+                install_command_a,  # Install command for model A
+                install_command_b,  # Install command for model B
                 text_input,  # Clear the text input after sending
                 model_display_a,  # Update model display A
                 model_display_b,  # Update model display B
                 chat_stats_b,    # Reset conversation statistics for model B
                 sandbox_view_a,  # Reset sandbox view for model A
                 sandbox_view_b,  # Reset sandbox view for model B
+                state0_var,  # Updated state for run button A
+                state1_var,  # Updated state for run button B
+                install_command_a,  # Clear install command for model A
+                install_command_b,  # Clear install command for model B
                 model_display_a, # Reset model display A
                 model_display_b, # Reset model display B
                 text_input,      # Clear text input
                 "",  # Clear text_input to preserve examples
             )
+        # Re-run code button handlers
+        def rerun_code_a(state0, current_code_a, current_install_command_a):
+            """Re-run code execution for Model A"""
+            updated_state, sandbox_view, sandbox_component_update, error = rerun_code_execution(state0, current_code_a, current_install_command_a, "A")
+            return (
+                updated_state,  # state0_var
+                sandbox_view,   # sandbox_view_a
+                sandbox_component_update if sandbox_component_update else gr.skip(),  # sandbox_component_a
+            )
+        def rerun_code_b(state1, current_code_b, current_install_command_b):
+            """Re-run code execution for Model B"""
+            updated_state, sandbox_view, sandbox_component_update, error = rerun_code_execution(state1, current_code_b, current_install_command_b, "B")
+            return (
+                updated_state,  # state1_var
+                sandbox_view,   # sandbox_view_b
+                sandbox_component_update if sandbox_component_update else gr.skip(),  # sandbox_component_b
+            )
+        def change_to_view_a():
+            return gr.Tabs(selected=0)
+        def change_to_view_b():
+            return gr.Tabs(selected=2)
+        rerun_code_a_btn.click(
+            fn=change_to_view_a,
+            inputs=[],
+            outputs=[tabs_a]
+        ).then(
+            fn=rerun_code_a,
+            inputs=[state0_var, code_a, install_command_a],
+            outputs=[
+                state0_var,           # state0_var
+                sandbox_view_a,       # sandbox_view_a
+                sandbox_component_a,  # sandbox_component_a
+            ]
+        )
+        rerun_code_b_btn.click(
+            fn=change_to_view_b,
+            inputs=[],
+            outputs=[tabs_b]
+        ).then(
+            fn=rerun_code_b,
+            inputs=[state1_var, code_b, install_command_b],
+            outputs=[
+                state1_var,           # state1_var
+                sandbox_view_b,       # sandbox_view_b
+                sandbox_component_b,  # sandbox_component_b
+            ]
+        )
         # Vote button click handlers
         for vote_btn, vote_type in [
             (vote_left_btn, "left"),