File size: 12,102 Bytes
7caa980
 
6860773
7caa980
6860773
7caa980
 
6860773
7caa980
 
 
 
6860773
 
 
 
 
 
 
 
 
 
 
7caa980
 
6860773
 
 
 
 
7caa980
 
 
6860773
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7caa980
6860773
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7caa980
 
 
 
 
 
 
 
6860773
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
# space/tools/report_tool.py
import os
import logging
from typing import Optional, Dict, Any
from datetime import datetime

import pandas as pd
from jinja2 import Environment, FileSystemLoader, TemplateNotFound

from utils.tracing import Tracer
from utils.config import AppConfig

logger = logging.getLogger(__name__)

# Constants
MAX_PREVIEW_ROWS = 100
MAX_REPORT_SIZE_MB = 50


class ReportToolError(Exception):
    """Custom exception for report tool errors."""
    pass


class ReportTool:
    """
    Generates HTML reports from analysis results.
    Includes error handling, size limits, and proper template management.
    """
    
    def __init__(self, cfg: AppConfig, tracer: Tracer):
        self.cfg = cfg
        self.tracer = tracer
        
        # Setup Jinja2 environment
        try:
            templates_dir = os.path.abspath(
                os.path.join(os.path.dirname(__file__), "..", "templates")
            )
            
            if not os.path.exists(templates_dir):
                logger.warning(f"Templates directory not found: {templates_dir}. Creating it.")
                os.makedirs(templates_dir, exist_ok=True)
            
            self.env = Environment(
                loader=FileSystemLoader(templates_dir),
                autoescape=False,  # We control the content
                trim_blocks=True,
                lstrip_blocks=True
            )
            
            logger.info(f"Report tool initialized with templates from: {templates_dir}")
            
        except Exception as e:
            raise ReportToolError(f"Failed to initialize report tool: {e}") from e
    
    def _validate_inputs(
        self,
        user_query: str,
        sql_preview: Optional[pd.DataFrame],
        predict_preview: Optional[pd.DataFrame],
        explain_images: Dict[str, str],
        plan: Dict[str, Any]
    ) -> tuple[bool, str]:
        """
        Validate report generation inputs.
        Returns (is_valid, error_message).
        """
        if not user_query or not user_query.strip():
            return False, "User query is empty"
        
        if not plan or not isinstance(plan, dict):
            return False, "Plan is invalid"
        
        # Check explain_images size
        if explain_images:
            total_size = sum(len(img) for img in explain_images.values())
            size_mb = total_size / (1024 * 1024)
            if size_mb > MAX_REPORT_SIZE_MB:
                return False, f"Embedded images too large: {size_mb:.2f} MB (max {MAX_REPORT_SIZE_MB} MB)"
        
        return True, ""
    
    def _prepare_dataframe_preview(self, df: Optional[pd.DataFrame], max_rows: int = MAX_PREVIEW_ROWS) -> str:
        """
        Convert dataframe to markdown table with row limit.
        Returns empty string if no data.
        """
        if df is None or df.empty:
            return ""
        
        try:
            # Limit rows
            if len(df) > max_rows:
                preview_df = df.head(max_rows)
                suffix = f"\n\n*... and {len(df) - max_rows} more rows*"
            else:
                preview_df = df
                suffix = ""
            
            # Convert to markdown
            markdown = preview_df.to_markdown(index=False, tablefmt="github")
            return markdown + suffix
            
        except Exception as e:
            logger.warning(f"Failed to convert dataframe to markdown: {e}")
            return f"*Error displaying data: {str(e)}*"
    
    def _get_template_name(self) -> str:
        """
        Determine which template to use.
        Falls back to creating a default if none exists.
        """
        template_name = "report_template.md"
        
        try:
            # Check if template exists
            self.env.get_template(template_name)
            return template_name
        except TemplateNotFound:
            logger.warning(f"Template '{template_name}' not found. Creating default template.")
            self._create_default_template()
            return template_name
    
    def _create_default_template(self):
        """Create a default report template if none exists."""
        default_template = """# Analysis Report

**Generated:** {{ timestamp }}

## User Query
{{ user_query }}

## Execution Plan
**Steps:** {{ plan.steps | join(', ') }}

**Rationale:** {{ plan.rationale }}

{% if sql_preview %}
## Data Query Results
{{ sql_preview }}
{% endif %}

{% if predict_preview %}
## Predictions
{{ predict_preview }}
{% endif %}

{% if explain_images %}
## Model Explanations

{% if explain_images.global_bar %}
### Feature Importance
![Feature Importance]({{ explain_images.global_bar }})
{% endif %}

{% if explain_images.beeswarm %}
### Feature Effects
![Feature Effects]({{ explain_images.beeswarm }})
{% endif %}
{% endif %}

---
*Report generated by Tabular Agentic XAI*
"""
        
        templates_dir = self.env.loader.searchpath[0]
        template_path = os.path.join(templates_dir, "report_template.md")
        
        try:
            with open(template_path, 'w', encoding='utf-8') as f:
                f.write(default_template)
            logger.info(f"Created default template at: {template_path}")
        except Exception as e:
            logger.error(f"Failed to create default template: {e}")
    
    def _render_template(
        self,
        user_query: str,
        sql_preview_md: str,
        predict_preview_md: str,
        explain_images: Dict[str, str],
        plan: Dict[str, Any]
    ) -> str:
        """
        Render the report template with provided data.
        """
        try:
            template_name = self._get_template_name()
            template = self.env.get_template(template_name)
            
            context = {
                "timestamp": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S UTC"),
                "user_query": user_query,
                "plan": plan,
                "sql_preview": sql_preview_md,
                "predict_preview": predict_preview_md,
                "explain_images": explain_images or {}
            }
            
            html_body = template.render(**context)
            logger.info(f"Template rendered successfully: {len(html_body)} characters")
            
            return html_body
            
        except Exception as e:
            raise ReportToolError(f"Template rendering failed: {e}") from e
    
    def _save_report(self, html_content: str) -> str:
        """
        Save HTML report to file.
        Returns the filename.
        """
        try:
            # Generate unique filename
            timestamp = datetime.utcnow().strftime('%Y%m%d_%H%M%S')
            filename = f"report_{timestamp}.html"
            
            # Determine output path
            output_dir = os.getenv("REPORT_OUTPUT_DIR", os.getcwd())
            os.makedirs(output_dir, exist_ok=True)
            
            filepath = os.path.abspath(os.path.join(output_dir, filename))
            
            # Add CSS styling
            css_path = os.path.join(
                os.path.dirname(__file__), "..", "templates", "report_styles.css"
            )
            
            if os.path.exists(css_path):
                css_link = f'<link rel="stylesheet" href="{css_path}">'
            else:
                # Inline basic CSS if external file not found
                css_link = """
<style>
body { font-family: Arial, sans-serif; max-width: 1200px; margin: 0 auto; padding: 20px; }
h1 { color: #2c3e50; border-bottom: 3px solid #3498db; padding-bottom: 10px; }
h2 { color: #34495e; margin-top: 30px; }
table { border-collapse: collapse; width: 100%; margin: 20px 0; }
th, td { border: 1px solid #ddd; padding: 8px; text-align: left; }
th { background-color: #3498db; color: white; }
tr:nth-child(even) { background-color: #f2f2f2; }
img { max-width: 100%; height: auto; margin: 20px 0; }
code { background-color: #f4f4f4; padding: 2px 6px; border-radius: 3px; }
pre { background-color: #f4f4f4; padding: 15px; border-radius: 5px; overflow-x: auto; }
</style>
"""
            
            # Construct full HTML
            full_html = f"""<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Analysis Report - {timestamp}</title>
    {css_link}
</head>
<body>
{html_content}
</body>
</html>
"""
            
            # Write to file
            with open(filepath, 'w', encoding='utf-8') as f:
                f.write(full_html)
            
            # Check file size
            file_size_mb = os.path.getsize(filepath) / (1024 * 1024)
            logger.info(f"Report saved: {filepath} ({file_size_mb:.2f} MB)")
            
            if file_size_mb > MAX_REPORT_SIZE_MB:
                logger.warning(f"Report file is large: {file_size_mb:.2f} MB")
            
            return filename
            
        except Exception as e:
            raise ReportToolError(f"Failed to save report: {e}") from e
    
    def render_and_save(
        self,
        user_query: str,
        sql_preview: Optional[pd.DataFrame],
        predict_preview: Optional[pd.DataFrame],
        explain_images: Dict[str, str],
        plan: Dict[str, Any],
    ) -> str:
        """
        Render and save analysis report.
        
        Args:
            user_query: Original user query
            sql_preview: SQL query results (optional)
            predict_preview: Prediction results (optional)
            explain_images: Dictionary of explanation plots (name -> data URI)
            plan: Execution plan dictionary
            
        Returns:
            Filename of saved report
            
        Raises:
            ReportToolError: If report generation fails
        """
        try:
            logger.info("Generating analysis report...")
            
            # Validate inputs
            is_valid, error_msg = self._validate_inputs(
                user_query, sql_preview, predict_preview, explain_images, plan
            )
            if not is_valid:
                raise ReportToolError(f"Invalid inputs: {error_msg}")
            
            # Prepare dataframe previews
            sql_preview_md = self._prepare_dataframe_preview(sql_preview)
            predict_preview_md = self._prepare_dataframe_preview(predict_preview)
            
            # Render template
            html_content = self._render_template(
                user_query=user_query,
                sql_preview_md=sql_preview_md,
                predict_preview_md=predict_preview_md,
                explain_images=explain_images,
                plan=plan
            )
            
            # Save report
            filename = self._save_report(html_content)
            
            # Trace event
            if self.tracer:
                self.tracer.trace_event("report", {
                    "filename": filename,
                    "has_sql": bool(sql_preview_md),
                    "has_predictions": bool(predict_preview_md),
                    "num_images": len(explain_images) if explain_images else 0
                })
            
            logger.info(f"Report generation successful: {filename}")
            return filename
            
        except ReportToolError:
            raise
        except Exception as e:
            error_msg = f"Report generation failed: {str(e)}"
            logger.error(error_msg)
            if self.tracer:
                self.tracer.trace_event("report_error", {"error": error_msg})
            raise ReportToolError(error_msg) from e
    
    def list_available_templates(self) -> list:
        """List all available report templates."""
        try:
            templates_dir = self.env.loader.searchpath[0]
            templates = [
                f for f in os.listdir(templates_dir)
                if f.endswith(('.md', '.html', '.jinja2'))
            ]
            return templates
        except Exception as e:
            logger.warning(f"Failed to list templates: {e}")
            return []