Spaces:

yashgori20
/

ThinklySEO

Running

App Files Files Community

ThinklySEO / simple_pdf_generator.py

yashgori20

Initial commit: SEO Report Generator

c0caea8 3 months ago

raw

history blame

3.71 kB

	"""
	Simple PDF generation fallback using reportlab (if available)
	or browser-based PDF conversion instructions
	"""

	import io
	from typing import Dict, Any

	class SimplePDFGenerator:
	def __init__(self):
	self.available = False
	try:
	import reportlab
	self.available = True
	except ImportError:
	self.available = False

	def generate_pdf(self, html_content: str) -> bytes:
	"""
	Generate PDF from HTML content using simple text-based approach
	"""
	if not self.available:
	raise ImportError("PDF generation requires reportlab: pip install reportlab")

	# Import reportlab components
	from reportlab.pdfgen import canvas
	from reportlab.lib.pagesizes import letter, A4
	from reportlab.lib.styles import getSampleStyleSheet
	from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
	from reportlab.lib.units import inch
	from bs4 import BeautifulSoup

	# Parse HTML and extract text content
	soup = BeautifulSoup(html_content, 'html.parser')

	# Remove style and script tags
	for tag in soup(["style", "script"]):
	tag.decompose()

	# Create PDF buffer
	buffer = io.BytesIO()

	# Create PDF document
	doc = SimpleDocTemplate(buffer, pagesize=A4)
	styles = getSampleStyleSheet()
	story = []

	# Extract title
	title_tag = soup.find('title')
	title = title_tag.text if title_tag else "SEO Report"

	# Add title
	story.append(Paragraph(title, styles['Title']))
	story.append(Spacer(1, 12))

	# Extract main content sections
	sections = soup.find_all(['h1', 'h2', 'h3', 'p', 'div'])

	for section in sections:
	if section.name in ['h1', 'h2', 'h3']:
	# Headers
	text = section.get_text().strip()
	if text:
	if section.name == 'h1':
	story.append(Paragraph(text, styles['Heading1']))
	elif section.name == 'h2':
	story.append(Paragraph(text, styles['Heading2']))
	else:
	story.append(Paragraph(text, styles['Heading3']))
	story.append(Spacer(1, 6))

	elif section.name in ['p', 'div']:
	# Paragraphs
	text = section.get_text().strip()
	if text and len(text) > 20: # Skip very short text
	try:
	story.append(Paragraph(text[:500], styles['Normal'])) # Limit length
	story.append(Spacer(1, 6))
	except:
	pass # Skip problematic content

	# Build PDF
	doc.build(story)

	# Get PDF data
	buffer.seek(0)
	return buffer.getvalue()

	def create_browser_pdf_instructions() -> str:
	"""
	Return instructions for manual PDF creation using browser
	"""
	return """
	## How to Create PDF from HTML Report:

	1. Download the HTML report using the button above
	2. Open the HTML file in your web browser (Chrome, Firefox, Edge)
	3. Print the page: Press Ctrl+P (Windows) or Cmd+P (Mac)
	4. Select destination: Choose "Save as PDF" or "Microsoft Print to PDF"
	5. Adjust settings: Select A4 size, include background graphics
	6. Save: Click Save and choose your location

	This will create a high-quality PDF with all charts and formatting preserved.
	"""