# Application Configuration app: title: "DataEngEval" description: "A config-driven evaluation platform for English → SQL tasks across Presto, BigQuery, and Snowflake." theme: "soft" server: host: "0.0.0.0" port: 7860 share: true # Leaderboard Configuration leaderboard: path: "tasks/leaderboard.parquet" columns: - "timestamp" - "dataset_name" - "case_id" - "dialect" - "model_name" - "question" - "reference_sql" - "candidate_sql" - "correctness_exact" - "result_match_f1" - "exec_success" - "latency_ms" - "readability" - "dialect_ok" - "composite_score" display: top_results: 50 results_table_headers: - "Rank" - "Model" - "Composite Score" - "Correctness" - "Result F1" - "Exec Success" - "Latency" - "Dataset" - "Case ID" - "Question" - "Reference SQL" - "Generated SQL" - "Dialect OK" # Available SQL Dialects dialects: - "presto" - "bigquery" - "snowflake" # Available Use Cases use_cases: - "sql_generation" - "code_generation" - "documentation" # Visible Datasets (control which datasets appear in UI) visible_datasets: - "sql_generation/nyc_taxi_small" # - "code_generation/python_algorithms" # Disabled # - "code_generation/go_algorithms" # Disabled # - "documentation/technical_docs" # Disabled # - "documentation/api_documentation" # Disabled # Available Programming Languages (for code generation) languages: - "python" - "go" - "javascript" - "java" # Available Documentation Formats doc_formats: - "markdown" - "html" - "json" - "yaml" # Prompt Template Configuration prompts: template_path: "prompts/" fallback_template: | You are an expert SQL developer specializing in {dialect} SQL dialect. Given the following database schema and a natural language question, generate a correct SQL query in {dialect} syntax. Database Schema: {{schema}} Question: {{question}} Requirements: - Use proper {dialect} SQL syntax - Ensure the query is syntactically correct - Return only the SQL query, no explanations SQL Query: # Environment Configuration environment: mock_mode_env: "MOCK_MODE" hf_token_env: "HF_TOKEN" mock_mode_default: false # UI Configuration ui: tabs: - name: "Evaluate" label: "Evaluate" - name: "Leaderboard" label: "Leaderboard" - name: "Info" label: "Info" buttons: refresh: text: "Refresh Leaderboard" variant: "secondary" size: "sm" run_evaluation: text: "Run Evaluation" variant: "primary" inputs: dataset: label: "Dataset" dialect: label: "SQL Dialect" default: "presto" case: label: "Test Case" models: label: "Models to Evaluate" outputs: status: label: "Status" results: label: "Results" headers: - "Model" - "Composite Score" - "Correctness" - "Exec Success" - "Result F1" - "Latency" detailed: label: "Detailed Results" leaderboard: label: "Global Leaderboard (Top 50)"