Spaces:
Running
Running
| """ | |
| Simple PDF generation fallback using reportlab (if available) | |
| or browser-based PDF conversion instructions | |
| """ | |
| import io | |
| from typing import Dict, Any | |
| class SimplePDFGenerator: | |
| def __init__(self): | |
| self.available = False | |
| try: | |
| import reportlab | |
| self.available = True | |
| except ImportError: | |
| self.available = False | |
| def generate_pdf(self, html_content: str) -> bytes: | |
| """ | |
| Generate PDF from HTML content using simple text-based approach | |
| """ | |
| if not self.available: | |
| raise ImportError("PDF generation requires reportlab: pip install reportlab") | |
| # Import reportlab components | |
| from reportlab.pdfgen import canvas | |
| from reportlab.lib.pagesizes import letter, A4 | |
| from reportlab.lib.styles import getSampleStyleSheet | |
| from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer | |
| from reportlab.lib.units import inch | |
| from bs4 import BeautifulSoup | |
| # Parse HTML and extract text content | |
| soup = BeautifulSoup(html_content, 'html.parser') | |
| # Remove style and script tags | |
| for tag in soup(["style", "script"]): | |
| tag.decompose() | |
| # Create PDF buffer | |
| buffer = io.BytesIO() | |
| # Create PDF document | |
| doc = SimpleDocTemplate(buffer, pagesize=A4) | |
| styles = getSampleStyleSheet() | |
| story = [] | |
| # Extract title | |
| title_tag = soup.find('title') | |
| title = title_tag.text if title_tag else "SEO Report" | |
| # Add title | |
| story.append(Paragraph(title, styles['Title'])) | |
| story.append(Spacer(1, 12)) | |
| # Extract main content sections | |
| sections = soup.find_all(['h1', 'h2', 'h3', 'p', 'div']) | |
| for section in sections: | |
| if section.name in ['h1', 'h2', 'h3']: | |
| # Headers | |
| text = section.get_text().strip() | |
| if text: | |
| if section.name == 'h1': | |
| story.append(Paragraph(text, styles['Heading1'])) | |
| elif section.name == 'h2': | |
| story.append(Paragraph(text, styles['Heading2'])) | |
| else: | |
| story.append(Paragraph(text, styles['Heading3'])) | |
| story.append(Spacer(1, 6)) | |
| elif section.name in ['p', 'div']: | |
| # Paragraphs | |
| text = section.get_text().strip() | |
| if text and len(text) > 20: # Skip very short text | |
| try: | |
| story.append(Paragraph(text[:500], styles['Normal'])) # Limit length | |
| story.append(Spacer(1, 6)) | |
| except: | |
| pass # Skip problematic content | |
| # Build PDF | |
| doc.build(story) | |
| # Get PDF data | |
| buffer.seek(0) | |
| return buffer.getvalue() | |
| def create_browser_pdf_instructions() -> str: | |
| """ | |
| Return instructions for manual PDF creation using browser | |
| """ | |
| return """ | |
| ## How to Create PDF from HTML Report: | |
| 1. **Download the HTML report** using the button above | |
| 2. **Open the HTML file** in your web browser (Chrome, Firefox, Edge) | |
| 3. **Print the page**: Press Ctrl+P (Windows) or Cmd+P (Mac) | |
| 4. **Select destination**: Choose "Save as PDF" or "Microsoft Print to PDF" | |
| 5. **Adjust settings**: Select A4 size, include background graphics | |
| 6. **Save**: Click Save and choose your location | |
| This will create a high-quality PDF with all charts and formatting preserved. | |
| """ |