ThinklySEO / simple_pdf_generator.py
yashgori20's picture
Initial commit: SEO Report Generator
c0caea8
raw
history blame
3.71 kB
"""
Simple PDF generation fallback using reportlab (if available)
or browser-based PDF conversion instructions
"""
import io
from typing import Dict, Any
class SimplePDFGenerator:
def __init__(self):
self.available = False
try:
import reportlab
self.available = True
except ImportError:
self.available = False
def generate_pdf(self, html_content: str) -> bytes:
"""
Generate PDF from HTML content using simple text-based approach
"""
if not self.available:
raise ImportError("PDF generation requires reportlab: pip install reportlab")
# Import reportlab components
from reportlab.pdfgen import canvas
from reportlab.lib.pagesizes import letter, A4
from reportlab.lib.styles import getSampleStyleSheet
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
from reportlab.lib.units import inch
from bs4 import BeautifulSoup
# Parse HTML and extract text content
soup = BeautifulSoup(html_content, 'html.parser')
# Remove style and script tags
for tag in soup(["style", "script"]):
tag.decompose()
# Create PDF buffer
buffer = io.BytesIO()
# Create PDF document
doc = SimpleDocTemplate(buffer, pagesize=A4)
styles = getSampleStyleSheet()
story = []
# Extract title
title_tag = soup.find('title')
title = title_tag.text if title_tag else "SEO Report"
# Add title
story.append(Paragraph(title, styles['Title']))
story.append(Spacer(1, 12))
# Extract main content sections
sections = soup.find_all(['h1', 'h2', 'h3', 'p', 'div'])
for section in sections:
if section.name in ['h1', 'h2', 'h3']:
# Headers
text = section.get_text().strip()
if text:
if section.name == 'h1':
story.append(Paragraph(text, styles['Heading1']))
elif section.name == 'h2':
story.append(Paragraph(text, styles['Heading2']))
else:
story.append(Paragraph(text, styles['Heading3']))
story.append(Spacer(1, 6))
elif section.name in ['p', 'div']:
# Paragraphs
text = section.get_text().strip()
if text and len(text) > 20: # Skip very short text
try:
story.append(Paragraph(text[:500], styles['Normal'])) # Limit length
story.append(Spacer(1, 6))
except:
pass # Skip problematic content
# Build PDF
doc.build(story)
# Get PDF data
buffer.seek(0)
return buffer.getvalue()
def create_browser_pdf_instructions() -> str:
"""
Return instructions for manual PDF creation using browser
"""
return """
## How to Create PDF from HTML Report:
1. **Download the HTML report** using the button above
2. **Open the HTML file** in your web browser (Chrome, Firefox, Edge)
3. **Print the page**: Press Ctrl+P (Windows) or Cmd+P (Mac)
4. **Select destination**: Choose "Save as PDF" or "Microsoft Print to PDF"
5. **Adjust settings**: Select A4 size, include background graphics
6. **Save**: Click Save and choose your location
This will create a high-quality PDF with all charts and formatting preserved.
"""