DataEngEval

Running

App Files Files Community

DataEngEval / config /app.yaml

uparekh01151

Add dataset visibility configuration - only show nyc_taxi_small by default

a026fe5 about 1 month ago

raw

history blame contribute delete

3.22 kB

	# Application Configuration
	app:
	title: "DataEngEval"
	description: "A config-driven evaluation platform for English → SQL tasks across Presto, BigQuery, and Snowflake."
	theme: "soft"
	server:
	host: "0.0.0.0"
	port: 7860
	share: true

	# Leaderboard Configuration
	leaderboard:
	path: "tasks/leaderboard.parquet"
	columns:
	- "timestamp"
	- "dataset_name"
	- "case_id"
	- "dialect"
	- "model_name"
	- "question"
	- "reference_sql"
	- "candidate_sql"
	- "correctness_exact"
	- "result_match_f1"
	- "exec_success"
	- "latency_ms"
	- "readability"
	- "dialect_ok"
	- "composite_score"
	display:
	top_results: 50
	results_table_headers:
	- "Rank"
	- "Model"
	- "Composite Score"
	- "Correctness"
	- "Result F1"
	- "Exec Success"
	- "Latency"
	- "Dataset"
	- "Case ID"
	- "Question"
	- "Reference SQL"
	- "Generated SQL"
	- "Dialect OK"

	# Available SQL Dialects
	dialects:
	- "presto"
	- "bigquery"
	- "snowflake"

	# Available Use Cases
	use_cases:
	- "sql_generation"
	- "code_generation"
	- "documentation"

	# Visible Datasets (control which datasets appear in UI)
	visible_datasets:
	- "sql_generation/nyc_taxi_small"
	# - "code_generation/python_algorithms" # Disabled
	# - "code_generation/go_algorithms" # Disabled
	# - "documentation/technical_docs" # Disabled
	# - "documentation/api_documentation" # Disabled

	# Available Programming Languages (for code generation)
	languages:
	- "python"
	- "go"
	- "javascript"
	- "java"

	# Available Documentation Formats
	doc_formats:
	- "markdown"
	- "html"
	- "json"
	- "yaml"

	# Prompt Template Configuration
	prompts:
	template_path: "prompts/"
	fallback_template: \|
	You are an expert SQL developer specializing in {dialect} SQL dialect.

	Given the following database schema and a natural language question, generate a correct SQL query in {dialect} syntax.

	Database Schema:
	{{schema}}

	Question: {{question}}

	Requirements:
	- Use proper {dialect} SQL syntax
	- Ensure the query is syntactically correct
	- Return only the SQL query, no explanations

	SQL Query:

	# Environment Configuration
	environment:
	mock_mode_env: "MOCK_MODE"
	hf_token_env: "HF_TOKEN"
	mock_mode_default: false

	# UI Configuration
	ui:
	tabs:
	- name: "Evaluate"
	label: "Evaluate"
	- name: "Leaderboard"
	label: "Leaderboard"
	- name: "Info"
	label: "Info"

	buttons:
	refresh:
	text: "Refresh Leaderboard"
	variant: "secondary"
	size: "sm"
	run_evaluation:
	text: "Run Evaluation"
	variant: "primary"

	inputs:
	dataset:
	label: "Dataset"
	dialect:
	label: "SQL Dialect"
	default: "presto"
	case:
	label: "Test Case"
	models:
	label: "Models to Evaluate"

	outputs:
	status:
	label: "Status"
	results:
	label: "Results"
	headers:
	- "Model"
	- "Composite Score"
	- "Correctness"
	- "Exec Success"
	- "Result F1"
	- "Latency"
	detailed:
	label: "Detailed Results"
	leaderboard:
	label: "Global Leaderboard (Top 50)"