|
|
|
|
|
import os |
|
|
import logging |
|
|
from typing import Optional, Dict, Any |
|
|
from datetime import datetime |
|
|
|
|
|
import pandas as pd |
|
|
from jinja2 import Environment, FileSystemLoader, TemplateNotFound |
|
|
|
|
|
from utils.tracing import Tracer |
|
|
from utils.config import AppConfig |
|
|
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
|
|
|
|
|
MAX_PREVIEW_ROWS = 100 |
|
|
MAX_REPORT_SIZE_MB = 50 |
|
|
|
|
|
|
|
|
class ReportToolError(Exception): |
|
|
"""Custom exception for report tool errors.""" |
|
|
pass |
|
|
|
|
|
|
|
|
class ReportTool: |
|
|
""" |
|
|
Generates HTML reports from analysis results. |
|
|
Includes error handling, size limits, and proper template management. |
|
|
""" |
|
|
|
|
|
def __init__(self, cfg: AppConfig, tracer: Tracer): |
|
|
self.cfg = cfg |
|
|
self.tracer = tracer |
|
|
|
|
|
|
|
|
try: |
|
|
templates_dir = os.path.abspath( |
|
|
os.path.join(os.path.dirname(__file__), "..", "templates") |
|
|
) |
|
|
|
|
|
if not os.path.exists(templates_dir): |
|
|
logger.warning(f"Templates directory not found: {templates_dir}. Creating it.") |
|
|
os.makedirs(templates_dir, exist_ok=True) |
|
|
|
|
|
self.env = Environment( |
|
|
loader=FileSystemLoader(templates_dir), |
|
|
autoescape=False, |
|
|
trim_blocks=True, |
|
|
lstrip_blocks=True |
|
|
) |
|
|
|
|
|
logger.info(f"Report tool initialized with templates from: {templates_dir}") |
|
|
|
|
|
except Exception as e: |
|
|
raise ReportToolError(f"Failed to initialize report tool: {e}") from e |
|
|
|
|
|
def _validate_inputs( |
|
|
self, |
|
|
user_query: str, |
|
|
sql_preview: Optional[pd.DataFrame], |
|
|
predict_preview: Optional[pd.DataFrame], |
|
|
explain_images: Dict[str, str], |
|
|
plan: Dict[str, Any] |
|
|
) -> tuple[bool, str]: |
|
|
""" |
|
|
Validate report generation inputs. |
|
|
Returns (is_valid, error_message). |
|
|
""" |
|
|
if not user_query or not user_query.strip(): |
|
|
return False, "User query is empty" |
|
|
|
|
|
if not plan or not isinstance(plan, dict): |
|
|
return False, "Plan is invalid" |
|
|
|
|
|
|
|
|
if explain_images: |
|
|
total_size = sum(len(img) for img in explain_images.values()) |
|
|
size_mb = total_size / (1024 * 1024) |
|
|
if size_mb > MAX_REPORT_SIZE_MB: |
|
|
return False, f"Embedded images too large: {size_mb:.2f} MB (max {MAX_REPORT_SIZE_MB} MB)" |
|
|
|
|
|
return True, "" |
|
|
|
|
|
def _prepare_dataframe_preview(self, df: Optional[pd.DataFrame], max_rows: int = MAX_PREVIEW_ROWS) -> str: |
|
|
""" |
|
|
Convert dataframe to markdown table with row limit. |
|
|
Returns empty string if no data. |
|
|
""" |
|
|
if df is None or df.empty: |
|
|
return "" |
|
|
|
|
|
try: |
|
|
|
|
|
if len(df) > max_rows: |
|
|
preview_df = df.head(max_rows) |
|
|
suffix = f"\n\n*... and {len(df) - max_rows} more rows*" |
|
|
else: |
|
|
preview_df = df |
|
|
suffix = "" |
|
|
|
|
|
|
|
|
markdown = preview_df.to_markdown(index=False, tablefmt="github") |
|
|
return markdown + suffix |
|
|
|
|
|
except Exception as e: |
|
|
logger.warning(f"Failed to convert dataframe to markdown: {e}") |
|
|
return f"*Error displaying data: {str(e)}*" |
|
|
|
|
|
def _get_template_name(self) -> str: |
|
|
""" |
|
|
Determine which template to use. |
|
|
Falls back to creating a default if none exists. |
|
|
""" |
|
|
template_name = "report_template.md" |
|
|
|
|
|
try: |
|
|
|
|
|
self.env.get_template(template_name) |
|
|
return template_name |
|
|
except TemplateNotFound: |
|
|
logger.warning(f"Template '{template_name}' not found. Creating default template.") |
|
|
self._create_default_template() |
|
|
return template_name |
|
|
|
|
|
def _create_default_template(self): |
|
|
"""Create a default report template if none exists.""" |
|
|
default_template = """# Analysis Report |
|
|
|
|
|
**Generated:** {{ timestamp }} |
|
|
|
|
|
## User Query |
|
|
{{ user_query }} |
|
|
|
|
|
## Execution Plan |
|
|
**Steps:** {{ plan.steps | join(', ') }} |
|
|
|
|
|
**Rationale:** {{ plan.rationale }} |
|
|
|
|
|
{% if sql_preview %} |
|
|
## Data Query Results |
|
|
{{ sql_preview }} |
|
|
{% endif %} |
|
|
|
|
|
{% if predict_preview %} |
|
|
## Predictions |
|
|
{{ predict_preview }} |
|
|
{% endif %} |
|
|
|
|
|
{% if explain_images %} |
|
|
## Model Explanations |
|
|
|
|
|
{% if explain_images.global_bar %} |
|
|
### Feature Importance |
|
|
 |
|
|
{% endif %} |
|
|
|
|
|
{% if explain_images.beeswarm %} |
|
|
### Feature Effects |
|
|
 |
|
|
{% endif %} |
|
|
{% endif %} |
|
|
|
|
|
--- |
|
|
*Report generated by Tabular Agentic XAI* |
|
|
""" |
|
|
|
|
|
templates_dir = self.env.loader.searchpath[0] |
|
|
template_path = os.path.join(templates_dir, "report_template.md") |
|
|
|
|
|
try: |
|
|
with open(template_path, 'w', encoding='utf-8') as f: |
|
|
f.write(default_template) |
|
|
logger.info(f"Created default template at: {template_path}") |
|
|
except Exception as e: |
|
|
logger.error(f"Failed to create default template: {e}") |
|
|
|
|
|
def _render_template( |
|
|
self, |
|
|
user_query: str, |
|
|
sql_preview_md: str, |
|
|
predict_preview_md: str, |
|
|
explain_images: Dict[str, str], |
|
|
plan: Dict[str, Any] |
|
|
) -> str: |
|
|
""" |
|
|
Render the report template with provided data. |
|
|
""" |
|
|
try: |
|
|
template_name = self._get_template_name() |
|
|
template = self.env.get_template(template_name) |
|
|
|
|
|
context = { |
|
|
"timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"), |
|
|
"user_query": user_query, |
|
|
"plan": plan, |
|
|
"sql_preview": sql_preview_md, |
|
|
"predict_preview": predict_preview_md, |
|
|
"explain_images": explain_images or {} |
|
|
} |
|
|
|
|
|
html_body = template.render(**context) |
|
|
logger.info(f"Template rendered successfully: {len(html_body)} characters") |
|
|
|
|
|
return html_body |
|
|
|
|
|
except Exception as e: |
|
|
raise ReportToolError(f"Template rendering failed: {e}") from e |
|
|
|
|
|
def _save_report(self, html_content: str) -> str: |
|
|
""" |
|
|
Save HTML report to file. |
|
|
Returns the filename. |
|
|
""" |
|
|
try: |
|
|
|
|
|
timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S') |
|
|
filename = f"report_{timestamp}.html" |
|
|
|
|
|
|
|
|
output_dir = os.getenv("REPORT_OUTPUT_DIR", os.getcwd()) |
|
|
os.makedirs(output_dir, exist_ok=True) |
|
|
|
|
|
filepath = os.path.abspath(os.path.join(output_dir, filename)) |
|
|
|
|
|
|
|
|
css_path = os.path.join( |
|
|
os.path.dirname(__file__), "..", "templates", "report_styles.css" |
|
|
) |
|
|
|
|
|
if os.path.exists(css_path): |
|
|
css_link = f'<link rel="stylesheet" href="{css_path}">' |
|
|
else: |
|
|
|
|
|
css_link = """ |
|
|
<style> |
|
|
body { font-family: Arial, sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; } |
|
|
h1 { color: #2c3e50; border-bottom: 3px solid #3498db; padding-bottom: 10px; } |
|
|
h2 { color: #34495e; margin-top: 30px; } |
|
|
table { border-collapse: collapse; width: 100%; margin: 20px 0; } |
|
|
th, td { border: 1px solid #ddd; padding: 8px; text-align: left; } |
|
|
th { background-color: #3498db; color: white; } |
|
|
tr:nth-child(even) { background-color: #f2f2f2; } |
|
|
img { max-width: 100%; height: auto; margin: 20px 0; } |
|
|
code { background-color: #f4f4f4; padding: 2px 6px; border-radius: 3px; } |
|
|
pre { background-color: #f4f4f4; padding: 15px; border-radius: 5px; overflow-x: auto; } |
|
|
</style> |
|
|
""" |
|
|
|
|
|
|
|
|
full_html = f"""<!DOCTYPE html> |
|
|
<html lang="en"> |
|
|
<head> |
|
|
<meta charset="UTF-8"> |
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0"> |
|
|
<title>Analysis Report - {timestamp}</title> |
|
|
{css_link} |
|
|
</head> |
|
|
<body> |
|
|
{html_content} |
|
|
</body> |
|
|
</html> |
|
|
""" |
|
|
|
|
|
|
|
|
with open(filepath, 'w', encoding='utf-8') as f: |
|
|
f.write(full_html) |
|
|
|
|
|
|
|
|
file_size_mb = os.path.getsize(filepath) / (1024 * 1024) |
|
|
logger.info(f"Report saved: {filepath} ({file_size_mb:.2f} MB)") |
|
|
|
|
|
if file_size_mb > MAX_REPORT_SIZE_MB: |
|
|
logger.warning(f"Report file is large: {file_size_mb:.2f} MB") |
|
|
|
|
|
return filename |
|
|
|
|
|
except Exception as e: |
|
|
raise ReportToolError(f"Failed to save report: {e}") from e |
|
|
|
|
|
def render_and_save( |
|
|
self, |
|
|
user_query: str, |
|
|
sql_preview: Optional[pd.DataFrame], |
|
|
predict_preview: Optional[pd.DataFrame], |
|
|
explain_images: Dict[str, str], |
|
|
plan: Dict[str, Any], |
|
|
) -> str: |
|
|
""" |
|
|
Render and save analysis report. |
|
|
|
|
|
Args: |
|
|
user_query: Original user query |
|
|
sql_preview: SQL query results (optional) |
|
|
predict_preview: Prediction results (optional) |
|
|
explain_images: Dictionary of explanation plots (name -> data URI) |
|
|
plan: Execution plan dictionary |
|
|
|
|
|
Returns: |
|
|
Filename of saved report |
|
|
|
|
|
Raises: |
|
|
ReportToolError: If report generation fails |
|
|
""" |
|
|
try: |
|
|
logger.info("Generating analysis report...") |
|
|
|
|
|
|
|
|
is_valid, error_msg = self._validate_inputs( |
|
|
user_query, sql_preview, predict_preview, explain_images, plan |
|
|
) |
|
|
if not is_valid: |
|
|
raise ReportToolError(f"Invalid inputs: {error_msg}") |
|
|
|
|
|
|
|
|
sql_preview_md = self._prepare_dataframe_preview(sql_preview) |
|
|
predict_preview_md = self._prepare_dataframe_preview(predict_preview) |
|
|
|
|
|
|
|
|
html_content = self._render_template( |
|
|
user_query=user_query, |
|
|
sql_preview_md=sql_preview_md, |
|
|
predict_preview_md=predict_preview_md, |
|
|
explain_images=explain_images, |
|
|
plan=plan |
|
|
) |
|
|
|
|
|
|
|
|
filename = self._save_report(html_content) |
|
|
|
|
|
|
|
|
if self.tracer: |
|
|
self.tracer.trace_event("report", { |
|
|
"filename": filename, |
|
|
"has_sql": bool(sql_preview_md), |
|
|
"has_predictions": bool(predict_preview_md), |
|
|
"num_images": len(explain_images) if explain_images else 0 |
|
|
}) |
|
|
|
|
|
logger.info(f"Report generation successful: {filename}") |
|
|
return filename |
|
|
|
|
|
except ReportToolError: |
|
|
raise |
|
|
except Exception as e: |
|
|
error_msg = f"Report generation failed: {str(e)}" |
|
|
logger.error(error_msg) |
|
|
if self.tracer: |
|
|
self.tracer.trace_event("report_error", {"error": error_msg}) |
|
|
raise ReportToolError(error_msg) from e |
|
|
|
|
|
def list_available_templates(self) -> list: |
|
|
"""List all available report templates.""" |
|
|
try: |
|
|
templates_dir = self.env.loader.searchpath[0] |
|
|
templates = [ |
|
|
f for f in os.listdir(templates_dir) |
|
|
if f.endswith(('.md', '.html', '.jinja2')) |
|
|
] |
|
|
return templates |
|
|
except Exception as e: |
|
|
logger.warning(f"Failed to list templates: {e}") |
|
|
return [] |