Spaces:
				
			
			
	
			
			
					
		Running
		
	
	
	
			
			
	
	
	
	
		
		
					
		Running
		
	| # Application Configuration | |
| app: | |
| title: "DataEngEval" | |
| description: "A config-driven evaluation platform for English → SQL tasks across Presto, BigQuery, and Snowflake." | |
| theme: "soft" | |
| server: | |
| host: "0.0.0.0" | |
| port: 7860 | |
| share: true | |
| # Leaderboard Configuration | |
| leaderboard: | |
| path: "tasks/leaderboard.parquet" | |
| columns: | |
| - "timestamp" | |
| - "dataset_name" | |
| - "case_id" | |
| - "dialect" | |
| - "model_name" | |
| - "question" | |
| - "reference_sql" | |
| - "candidate_sql" | |
| - "correctness_exact" | |
| - "result_match_f1" | |
| - "exec_success" | |
| - "latency_ms" | |
| - "readability" | |
| - "dialect_ok" | |
| - "composite_score" | |
| display: | |
| top_results: 50 | |
| results_table_headers: | |
| - "Rank" | |
| - "Model" | |
| - "Composite Score" | |
| - "Correctness" | |
| - "Result F1" | |
| - "Exec Success" | |
| - "Latency" | |
| - "Dataset" | |
| - "Case ID" | |
| - "Question" | |
| - "Reference SQL" | |
| - "Generated SQL" | |
| - "Dialect OK" | |
| # Available SQL Dialects | |
| dialects: | |
| - "presto" | |
| - "bigquery" | |
| - "snowflake" | |
| # Available Use Cases | |
| use_cases: | |
| - "sql_generation" | |
| - "code_generation" | |
| - "documentation" | |
| # Visible Datasets (control which datasets appear in UI) | |
| visible_datasets: | |
| - "sql_generation/nyc_taxi_small" | |
| # - "code_generation/python_algorithms" # Disabled | |
| # - "code_generation/go_algorithms" # Disabled | |
| # - "documentation/technical_docs" # Disabled | |
| # - "documentation/api_documentation" # Disabled | |
| # Available Programming Languages (for code generation) | |
| languages: | |
| - "python" | |
| - "go" | |
| - "javascript" | |
| - "java" | |
| # Available Documentation Formats | |
| doc_formats: | |
| - "markdown" | |
| - "html" | |
| - "json" | |
| - "yaml" | |
| # Prompt Template Configuration | |
| prompts: | |
| template_path: "prompts/" | |
| fallback_template: | | |
| You are an expert SQL developer specializing in {dialect} SQL dialect. | |
| Given the following database schema and a natural language question, generate a correct SQL query in {dialect} syntax. | |
| Database Schema: | |
| {{schema}} | |
| Question: {{question}} | |
| Requirements: | |
| - Use proper {dialect} SQL syntax | |
| - Ensure the query is syntactically correct | |
| - Return only the SQL query, no explanations | |
| SQL Query: | |
| # Environment Configuration | |
| environment: | |
| mock_mode_env: "MOCK_MODE" | |
| hf_token_env: "HF_TOKEN" | |
| mock_mode_default: false | |
| # UI Configuration | |
| ui: | |
| tabs: | |
| - name: "Evaluate" | |
| label: "Evaluate" | |
| - name: "Leaderboard" | |
| label: "Leaderboard" | |
| - name: "Info" | |
| label: "Info" | |
| buttons: | |
| refresh: | |
| text: "Refresh Leaderboard" | |
| variant: "secondary" | |
| size: "sm" | |
| run_evaluation: | |
| text: "Run Evaluation" | |
| variant: "primary" | |
| inputs: | |
| dataset: | |
| label: "Dataset" | |
| dialect: | |
| label: "SQL Dialect" | |
| default: "presto" | |
| case: | |
| label: "Test Case" | |
| models: | |
| label: "Models to Evaluate" | |
| outputs: | |
| status: | |
| label: "Status" | |
| results: | |
| label: "Results" | |
| headers: | |
| - "Model" | |
| - "Composite Score" | |
| - "Correctness" | |
| - "Exec Success" | |
| - "Result F1" | |
| - "Latency" | |
| detailed: | |
| label: "Detailed Results" | |
| leaderboard: | |
| label: "Global Leaderboard (Top 50)" | |