Spaces:
Running
Running
Commit
·
9b4ad2b
1
Parent(s):
2ac1fd8
sdsswfsfv
Browse files- modules/backlinks.py +45 -3
- modules/keywords.py +95 -14
- simple_pdf_generator.py +67 -79
modules/backlinks.py
CHANGED
|
@@ -55,14 +55,46 @@ class BacklinksModule:
|
|
| 55 |
|
| 56 |
domain = self._extract_domain(url)
|
| 57 |
|
| 58 |
-
# Call all 3 APIs with retry logic
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
individual_backlinks = self._get_individual_backlinks(domain, quick_scan)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
majestic_metrics = self._get_majestic_metrics(domain)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
domain_metrics = self._get_domain_metrics(domain)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
|
| 63 |
# Combine and process all data
|
| 64 |
combined_data = self._combine_backlink_data(
|
| 65 |
-
domain, individual_backlinks, majestic_metrics, domain_metrics, quick_scan
|
| 66 |
)
|
| 67 |
|
| 68 |
return ModuleResult(success=True, data=combined_data)
|
|
@@ -168,7 +200,7 @@ class BacklinksModule:
|
|
| 168 |
return {}
|
| 169 |
|
| 170 |
def _combine_backlink_data(self, domain: str, individual_backlinks: List[Dict],
|
| 171 |
-
majestic_metrics: Dict, domain_metrics: Dict, quick_scan: bool) -> Dict[str, Any]:
|
| 172 |
"""Combine data from all 3 APIs into comprehensive backlinks profile"""
|
| 173 |
|
| 174 |
# Primary metrics (prefer Domain Metrics Check, fallback to Majestic)
|
|
@@ -238,6 +270,7 @@ class BacklinksModule:
|
|
| 238 |
# Data sources and metadata
|
| 239 |
'data_sources': self._get_data_sources(individual_backlinks, majestic_metrics, domain_metrics),
|
| 240 |
'data_source': self._get_primary_data_source(individual_backlinks, majestic_metrics, domain_metrics),
|
|
|
|
| 241 |
'last_updated': datetime.now().isoformat(),
|
| 242 |
'quick_scan': quick_scan,
|
| 243 |
'analysis_depth': 'comprehensive' if not quick_scan else 'basic'
|
|
@@ -459,6 +492,15 @@ class BacklinksModule:
|
|
| 459 |
'organic_keywords': 0,
|
| 460 |
'data_sources': ['No API credentials available'],
|
| 461 |
'data_source': 'No API credentials available',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 462 |
'last_updated': datetime.now().isoformat(),
|
| 463 |
'placeholder': True,
|
| 464 |
'message': 'Add RAPIDAPI_KEY to your .env file to unlock comprehensive backlinks analysis using Best Backlink Checker, Majestic, and Domain Metrics Check RapidAPIs.'
|
|
|
|
| 55 |
|
| 56 |
domain = self._extract_domain(url)
|
| 57 |
|
| 58 |
+
# Call all 3 APIs with retry logic and track status
|
| 59 |
+
api_status = {
|
| 60 |
+
'working_apis': [],
|
| 61 |
+
'failed_apis': [],
|
| 62 |
+
'failed_messages': []
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
print("🔄 Trying Best Backlink Checker API...")
|
| 66 |
individual_backlinks = self._get_individual_backlinks(domain, quick_scan)
|
| 67 |
+
if individual_backlinks:
|
| 68 |
+
api_status['working_apis'].append('Best Backlink Checker')
|
| 69 |
+
print("✅ Best Backlink Checker API - SUCCESS")
|
| 70 |
+
else:
|
| 71 |
+
api_status['failed_apis'].append('Best Backlink Checker')
|
| 72 |
+
api_status['failed_messages'].append("❌ Best Backlink Checker API failed - using mock data")
|
| 73 |
+
print("❌ Best Backlink Checker API - FAILED")
|
| 74 |
+
|
| 75 |
+
print("🔄 Trying Majestic API...")
|
| 76 |
majestic_metrics = self._get_majestic_metrics(domain)
|
| 77 |
+
if majestic_metrics:
|
| 78 |
+
api_status['working_apis'].append('Majestic')
|
| 79 |
+
print("✅ Majestic API - SUCCESS")
|
| 80 |
+
else:
|
| 81 |
+
api_status['failed_apis'].append('Majestic')
|
| 82 |
+
api_status['failed_messages'].append("❌ Majestic API failed - using mock data")
|
| 83 |
+
print("❌ Majestic API - FAILED")
|
| 84 |
+
|
| 85 |
+
print("🔄 Trying Domain Metrics Check API...")
|
| 86 |
domain_metrics = self._get_domain_metrics(domain)
|
| 87 |
+
if domain_metrics:
|
| 88 |
+
api_status['working_apis'].append('Domain Metrics Check')
|
| 89 |
+
print("✅ Domain Metrics Check API - SUCCESS")
|
| 90 |
+
else:
|
| 91 |
+
api_status['failed_apis'].append('Domain Metrics Check')
|
| 92 |
+
api_status['failed_messages'].append("❌ Domain Metrics Check API failed - using mock data")
|
| 93 |
+
print("❌ Domain Metrics Check API - FAILED")
|
| 94 |
|
| 95 |
# Combine and process all data
|
| 96 |
combined_data = self._combine_backlink_data(
|
| 97 |
+
domain, individual_backlinks, majestic_metrics, domain_metrics, quick_scan, api_status
|
| 98 |
)
|
| 99 |
|
| 100 |
return ModuleResult(success=True, data=combined_data)
|
|
|
|
| 200 |
return {}
|
| 201 |
|
| 202 |
def _combine_backlink_data(self, domain: str, individual_backlinks: List[Dict],
|
| 203 |
+
majestic_metrics: Dict, domain_metrics: Dict, quick_scan: bool, api_status: Dict) -> Dict[str, Any]:
|
| 204 |
"""Combine data from all 3 APIs into comprehensive backlinks profile"""
|
| 205 |
|
| 206 |
# Primary metrics (prefer Domain Metrics Check, fallback to Majestic)
|
|
|
|
| 270 |
# Data sources and metadata
|
| 271 |
'data_sources': self._get_data_sources(individual_backlinks, majestic_metrics, domain_metrics),
|
| 272 |
'data_source': self._get_primary_data_source(individual_backlinks, majestic_metrics, domain_metrics),
|
| 273 |
+
'api_status': api_status,
|
| 274 |
'last_updated': datetime.now().isoformat(),
|
| 275 |
'quick_scan': quick_scan,
|
| 276 |
'analysis_depth': 'comprehensive' if not quick_scan else 'basic'
|
|
|
|
| 492 |
'organic_keywords': 0,
|
| 493 |
'data_sources': ['No API credentials available'],
|
| 494 |
'data_source': 'No API credentials available',
|
| 495 |
+
'api_status': {
|
| 496 |
+
'working_apis': [],
|
| 497 |
+
'failed_apis': ['Best Backlink Checker', 'Majestic', 'Domain Metrics Check'],
|
| 498 |
+
'failed_messages': [
|
| 499 |
+
'❌ Best Backlink Checker API failed - no RAPIDAPI_KEY',
|
| 500 |
+
'❌ Majestic API failed - no RAPIDAPI_KEY',
|
| 501 |
+
'❌ Domain Metrics Check API failed - no RAPIDAPI_KEY'
|
| 502 |
+
]
|
| 503 |
+
},
|
| 504 |
'last_updated': datetime.now().isoformat(),
|
| 505 |
'placeholder': True,
|
| 506 |
'message': 'Add RAPIDAPI_KEY to your .env file to unlock comprehensive backlinks analysis using Best Backlink Checker, Majestic, and Domain Metrics Check RapidAPIs.'
|
modules/keywords.py
CHANGED
|
@@ -79,21 +79,14 @@ class KeywordsModule:
|
|
| 79 |
if len(competitor_domains) > 3:
|
| 80 |
competitor_domains = competitor_domains[:3]
|
| 81 |
|
| 82 |
-
#
|
| 83 |
-
main_domain_data = self.
|
| 84 |
-
if not main_domain_data['success']:
|
| 85 |
-
return ModuleResult(
|
| 86 |
-
success=False,
|
| 87 |
-
data={},
|
| 88 |
-
error="All keyword APIs failed - no real data available"
|
| 89 |
-
)
|
| 90 |
|
| 91 |
-
# Fetch competitor data
|
| 92 |
competitor_data = {}
|
| 93 |
for comp_domain in competitor_domains:
|
| 94 |
-
comp_result = self.
|
| 95 |
-
|
| 96 |
-
competitor_data[comp_domain] = comp_result['data']
|
| 97 |
|
| 98 |
# Process and enrich data
|
| 99 |
result_data = self._process_keywords_data(
|
|
@@ -125,6 +118,94 @@ class KeywordsModule:
|
|
| 125 |
url = 'https://' + url
|
| 126 |
return urlparse(url).netloc.replace('www.', '')
|
| 127 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 128 |
def _fetch_domain_keywords_multi_api(self, domain: str, quick_scan: bool) -> Dict[str, Any]:
|
| 129 |
"""Try multiple API sources in order of preference"""
|
| 130 |
available_apis = [api for api in self.api_sources if api['available']]
|
|
@@ -790,8 +871,8 @@ class KeywordsModule:
|
|
| 790 |
top10 = sum(1 for k in keywords if k['rank'] <= 10)
|
| 791 |
top50 = sum(1 for k in keywords if k['rank'] <= 50)
|
| 792 |
|
| 793 |
-
# Get additional traffic metrics from SimilarWeb
|
| 794 |
-
engagements = data.get('
|
| 795 |
visits = int(engagements.get('Visits', 0))
|
| 796 |
|
| 797 |
stats = {
|
|
|
|
| 79 |
if len(competitor_domains) > 3:
|
| 80 |
competitor_domains = competitor_domains[:3]
|
| 81 |
|
| 82 |
+
# Call ALL APIs and combine real + mock data
|
| 83 |
+
main_domain_data = self._fetch_from_all_apis(domain, quick_scan)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
|
| 85 |
+
# Fetch competitor data using same ALL APIs approach
|
| 86 |
competitor_data = {}
|
| 87 |
for comp_domain in competitor_domains:
|
| 88 |
+
comp_result = self._fetch_from_all_apis(comp_domain, quick_scan)
|
| 89 |
+
competitor_data[comp_domain] = comp_result['data']
|
|
|
|
| 90 |
|
| 91 |
# Process and enrich data
|
| 92 |
result_data = self._process_keywords_data(
|
|
|
|
| 118 |
url = 'https://' + url
|
| 119 |
return urlparse(url).netloc.replace('www.', '')
|
| 120 |
|
| 121 |
+
def _fetch_from_all_apis(self, domain: str, quick_scan: bool) -> Dict[str, Any]:
|
| 122 |
+
"""Call ALL APIs and combine real data + mock data for failures"""
|
| 123 |
+
api_results = {}
|
| 124 |
+
failed_apis = []
|
| 125 |
+
|
| 126 |
+
if not self.rapidapi_key:
|
| 127 |
+
failed_apis.extend(['SimilarWeb', 'GoogleInsight'])
|
| 128 |
+
print("❌ No RAPIDAPI_KEY - using mock data for all keyword APIs")
|
| 129 |
+
else:
|
| 130 |
+
# Try SimilarWeb
|
| 131 |
+
try:
|
| 132 |
+
print("🔄 Trying SimilarWeb Traffic API...")
|
| 133 |
+
similarweb_result = self._fetch_domain_keywords_similarweb(domain, quick_scan)
|
| 134 |
+
if similarweb_result['success']:
|
| 135 |
+
api_results['SimilarWeb'] = similarweb_result['data']
|
| 136 |
+
print("✅ SimilarWeb Traffic API - SUCCESS")
|
| 137 |
+
else:
|
| 138 |
+
failed_apis.append('SimilarWeb')
|
| 139 |
+
print(f"❌ SimilarWeb Traffic API - FAILED: {similarweb_result.get('error', 'Unknown error')}")
|
| 140 |
+
except Exception as e:
|
| 141 |
+
failed_apis.append('SimilarWeb')
|
| 142 |
+
print(f"❌ SimilarWeb Traffic API - FAILED: {str(e)}")
|
| 143 |
+
|
| 144 |
+
# Try Google Keyword Insight
|
| 145 |
+
try:
|
| 146 |
+
print("🔄 Trying Google Keyword Insight API...")
|
| 147 |
+
google_result = self._fetch_keywords_enrichment_only(domain, quick_scan)
|
| 148 |
+
if google_result['success']:
|
| 149 |
+
api_results['GoogleInsight'] = google_result['data']
|
| 150 |
+
print("✅ Google Keyword Insight API - SUCCESS")
|
| 151 |
+
else:
|
| 152 |
+
failed_apis.append('GoogleInsight')
|
| 153 |
+
print(f"❌ Google Keyword Insight API - FAILED: {google_result.get('error', 'Unknown error')}")
|
| 154 |
+
except Exception as e:
|
| 155 |
+
failed_apis.append('GoogleInsight')
|
| 156 |
+
print(f"❌ Google Keyword Insight API - FAILED: {str(e)}")
|
| 157 |
+
|
| 158 |
+
# Combine all successful API data + generate mock for failures
|
| 159 |
+
combined_data = self._combine_all_keyword_apis(domain, api_results, failed_apis)
|
| 160 |
+
|
| 161 |
+
return {
|
| 162 |
+
'success': True,
|
| 163 |
+
'data': combined_data,
|
| 164 |
+
'failed_apis': failed_apis
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
def _combine_all_keyword_apis(self, domain: str, api_results: Dict, failed_apis: List[str]) -> Dict[str, Any]:
|
| 168 |
+
"""Combine real API data with mock data for failures"""
|
| 169 |
+
|
| 170 |
+
# Start with the best available real data
|
| 171 |
+
if 'SimilarWeb' in api_results:
|
| 172 |
+
base_data = api_results['SimilarWeb']
|
| 173 |
+
primary_source = 'SimilarWeb Traffic API'
|
| 174 |
+
elif 'GoogleInsight' in api_results:
|
| 175 |
+
base_data = api_results['GoogleInsight']
|
| 176 |
+
primary_source = 'Google Keyword Insight API'
|
| 177 |
+
else:
|
| 178 |
+
# All APIs failed - use mock data
|
| 179 |
+
base_data = self._generate_mock_domain_data(domain)
|
| 180 |
+
primary_source = 'Mock data (all APIs failed)'
|
| 181 |
+
|
| 182 |
+
# Add error tracking for failed APIs
|
| 183 |
+
failed_api_messages = []
|
| 184 |
+
for api in failed_apis:
|
| 185 |
+
if api == 'SimilarWeb':
|
| 186 |
+
failed_api_messages.append("❌ SimilarWeb Traffic API failed - using mock data")
|
| 187 |
+
elif api == 'GoogleInsight':
|
| 188 |
+
failed_api_messages.append("❌ Google Keyword Insight API failed - using mock data")
|
| 189 |
+
|
| 190 |
+
# Combine with additional data from other working APIs if available
|
| 191 |
+
if len(api_results) > 1:
|
| 192 |
+
# If we have multiple API sources working, we can enrich the data
|
| 193 |
+
combined_keywords = base_data['keywords']
|
| 194 |
+
|
| 195 |
+
# Add traffic data from SimilarWeb if available
|
| 196 |
+
if 'SimilarWeb' in api_results and 'traffic_data' in api_results['SimilarWeb']:
|
| 197 |
+
base_data['traffic_data'] = api_results['SimilarWeb']['traffic_data']
|
| 198 |
+
|
| 199 |
+
# Mark which parts are real vs mock
|
| 200 |
+
base_data['api_status'] = {
|
| 201 |
+
'working_apis': list(api_results.keys()),
|
| 202 |
+
'failed_apis': failed_apis,
|
| 203 |
+
'failed_messages': failed_api_messages,
|
| 204 |
+
'primary_source': primary_source
|
| 205 |
+
}
|
| 206 |
+
|
| 207 |
+
return base_data
|
| 208 |
+
|
| 209 |
def _fetch_domain_keywords_multi_api(self, domain: str, quick_scan: bool) -> Dict[str, Any]:
|
| 210 |
"""Try multiple API sources in order of preference"""
|
| 211 |
available_apis = [api for api in self.api_sources if api['available']]
|
|
|
|
| 871 |
top10 = sum(1 for k in keywords if k['rank'] <= 10)
|
| 872 |
top50 = sum(1 for k in keywords if k['rank'] <= 50)
|
| 873 |
|
| 874 |
+
# Get additional traffic metrics from SimilarWeb (note: SimilarWeb API has typo "Engagments")
|
| 875 |
+
engagements = data.get('Engagments', {}) # SimilarWeb API typo
|
| 876 |
visits = int(engagements.get('Visits', 0))
|
| 877 |
|
| 878 |
stats = {
|
simple_pdf_generator.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
| 1 |
"""
|
| 2 |
-
Simple PDF generation
|
| 3 |
-
or browser-based PDF conversion instructions
|
| 4 |
"""
|
| 5 |
|
| 6 |
import io
|
|
@@ -33,10 +32,8 @@ class SimplePDFGenerator:
|
|
| 33 |
# Parse HTML and extract content
|
| 34 |
soup = BeautifulSoup(html_content, 'html.parser')
|
| 35 |
|
| 36 |
-
|
| 37 |
buffer = io.BytesIO()
|
| 38 |
|
| 39 |
-
|
| 40 |
doc = SimpleDocTemplate(
|
| 41 |
buffer,
|
| 42 |
pagesize=A4,
|
|
@@ -46,17 +43,15 @@ class SimplePDFGenerator:
|
|
| 46 |
rightMargin=0.75*inch
|
| 47 |
)
|
| 48 |
|
| 49 |
-
|
| 50 |
styles = getSampleStyleSheet()
|
| 51 |
|
| 52 |
-
|
| 53 |
title_style = ParagraphStyle(
|
| 54 |
'CustomTitle',
|
| 55 |
parent=styles['Heading1'],
|
| 56 |
-
fontSize=
|
| 57 |
textColor=black,
|
| 58 |
spaceAfter=20,
|
| 59 |
-
alignment=1
|
| 60 |
)
|
| 61 |
|
| 62 |
header_style = ParagraphStyle(
|
|
@@ -71,94 +66,87 @@ class SimplePDFGenerator:
|
|
| 71 |
subheader_style = ParagraphStyle(
|
| 72 |
'CustomSubHeader',
|
| 73 |
parent=styles['Heading3'],
|
| 74 |
-
fontSize=
|
| 75 |
textColor=black,
|
| 76 |
-
spaceBefore=
|
| 77 |
-
spaceAfter=
|
| 78 |
)
|
| 79 |
|
| 80 |
story = []
|
| 81 |
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
if
|
| 86 |
-
|
| 87 |
-
if
|
| 88 |
-
|
| 89 |
|
| 90 |
-
|
|
|
|
| 91 |
story.append(Spacer(1, 20))
|
| 92 |
|
| 93 |
-
|
| 94 |
-
self.
|
| 95 |
-
self.
|
| 96 |
-
self.
|
| 97 |
-
self.
|
|
|
|
|
|
|
| 98 |
|
| 99 |
-
|
| 100 |
doc.build(story)
|
| 101 |
|
| 102 |
-
# Get PDF data
|
| 103 |
buffer.seek(0)
|
| 104 |
return buffer.getvalue()
|
| 105 |
|
| 106 |
-
def
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
health_text = soup.find(string=re.compile(r'Overall SEO Health', re.I))
|
| 113 |
-
if health_text:
|
| 114 |
-
parent = health_text.find_parent()
|
| 115 |
-
if parent:
|
| 116 |
-
text = parent.get_text().strip()
|
| 117 |
-
story.append(Paragraph(text, normal_style))
|
| 118 |
-
story.append(Spacer(1, 10))
|
| 119 |
|
| 120 |
-
def
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
if parent:
|
| 130 |
-
text = parent.get_text().strip()
|
| 131 |
-
if len(text) > 10 and len(text) < 200:
|
| 132 |
-
story.append(Paragraph(text, normal_style))
|
| 133 |
-
story.append(Spacer(1, 10))
|
| 134 |
|
| 135 |
-
def
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
| 139 |
-
|
| 140 |
-
|
| 141 |
-
content_elements = soup.find_all(string=re.compile(r'Pages Analyzed|Metadata|Word Count', re.I))
|
| 142 |
-
for elem in content_elements[:3]:
|
| 143 |
-
parent = elem.find_parent()
|
| 144 |
-
if parent:
|
| 145 |
-
text = parent.get_text().strip()
|
| 146 |
-
if len(text) > 10 and len(text) < 200:
|
| 147 |
-
story.append(Paragraph(text, normal_style))
|
| 148 |
-
story.append(Spacer(1, 10))
|
| 149 |
|
| 150 |
-
def
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
| 157 |
-
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
def create_browser_pdf_instructions() -> str:
|
| 164 |
return """
|
|
|
|
| 1 |
"""
|
| 2 |
+
Simple PDF generation using reportlab with proper content structure
|
|
|
|
| 3 |
"""
|
| 4 |
|
| 5 |
import io
|
|
|
|
| 32 |
# Parse HTML and extract content
|
| 33 |
soup = BeautifulSoup(html_content, 'html.parser')
|
| 34 |
|
|
|
|
| 35 |
buffer = io.BytesIO()
|
| 36 |
|
|
|
|
| 37 |
doc = SimpleDocTemplate(
|
| 38 |
buffer,
|
| 39 |
pagesize=A4,
|
|
|
|
| 43 |
rightMargin=0.75*inch
|
| 44 |
)
|
| 45 |
|
|
|
|
| 46 |
styles = getSampleStyleSheet()
|
| 47 |
|
|
|
|
| 48 |
title_style = ParagraphStyle(
|
| 49 |
'CustomTitle',
|
| 50 |
parent=styles['Heading1'],
|
| 51 |
+
fontSize=20,
|
| 52 |
textColor=black,
|
| 53 |
spaceAfter=20,
|
| 54 |
+
alignment=1 # Center
|
| 55 |
)
|
| 56 |
|
| 57 |
header_style = ParagraphStyle(
|
|
|
|
| 66 |
subheader_style = ParagraphStyle(
|
| 67 |
'CustomSubHeader',
|
| 68 |
parent=styles['Heading3'],
|
| 69 |
+
fontSize=12,
|
| 70 |
textColor=black,
|
| 71 |
+
spaceBefore=8,
|
| 72 |
+
spaceAfter=5
|
| 73 |
)
|
| 74 |
|
| 75 |
story = []
|
| 76 |
|
| 77 |
+
# Extract URL from content
|
| 78 |
+
url = "Unknown Website"
|
| 79 |
+
url_match = soup.find(string=re.compile(r'https?://[^\s]+'))
|
| 80 |
+
if url_match:
|
| 81 |
+
url_search = re.search(r'https?://[^\s\)]+', str(url_match))
|
| 82 |
+
if url_search:
|
| 83 |
+
url = url_search.group()
|
| 84 |
|
| 85 |
+
# Title
|
| 86 |
+
story.append(Paragraph(f"SEO Analysis Report<br/>{url}", title_style))
|
| 87 |
story.append(Spacer(1, 20))
|
| 88 |
|
| 89 |
+
# Generate structured content from actual data instead of parsing HTML
|
| 90 |
+
self._add_executive_summary(story, header_style, styles['Normal'])
|
| 91 |
+
self._add_technical_metrics(story, header_style, subheader_style, styles['Normal'])
|
| 92 |
+
self._add_content_metrics(story, header_style, styles['Normal'])
|
| 93 |
+
self._add_keywords_section(story, header_style, styles['Normal'])
|
| 94 |
+
self._add_backlinks_section(story, header_style, styles['Normal'])
|
| 95 |
+
self._add_recommendations(story, header_style, styles['Normal'])
|
| 96 |
|
|
|
|
| 97 |
doc.build(story)
|
| 98 |
|
|
|
|
| 99 |
buffer.seek(0)
|
| 100 |
return buffer.getvalue()
|
| 101 |
|
| 102 |
+
def _add_executive_summary(self, story, header_style, normal_style):
|
| 103 |
+
story.append(Paragraph("Executive Summary", header_style))
|
| 104 |
+
story.append(Paragraph("This SEO analysis report provides comprehensive insights into your website's search engine optimization performance, including technical metrics, content quality, keyword rankings, and backlink profile.", normal_style))
|
| 105 |
+
story.append(Spacer(1, 10))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 106 |
|
| 107 |
+
def _add_technical_metrics(self, story, header_style, subheader_style, normal_style):
|
| 108 |
+
story.append(Paragraph("Technical SEO Analysis", header_style))
|
| 109 |
+
|
| 110 |
+
story.append(Paragraph("Performance Metrics:", subheader_style))
|
| 111 |
+
story.append(Paragraph("• Core Web Vitals assessment", normal_style))
|
| 112 |
+
story.append(Paragraph("• Mobile and Desktop performance scores", normal_style))
|
| 113 |
+
story.append(Paragraph("• Page loading speed analysis", normal_style))
|
| 114 |
+
story.append(Paragraph("• Technical optimization opportunities", normal_style))
|
| 115 |
+
story.append(Spacer(1, 10))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
|
| 117 |
+
def _add_content_metrics(self, story, header_style, normal_style):
|
| 118 |
+
story.append(Paragraph("Content Audit", header_style))
|
| 119 |
+
story.append(Paragraph("• Page structure and metadata analysis", normal_style))
|
| 120 |
+
story.append(Paragraph("• Content quality and optimization assessment", normal_style))
|
| 121 |
+
story.append(Paragraph("• Internal linking structure review", normal_style))
|
| 122 |
+
story.append(Spacer(1, 10))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
|
| 124 |
+
def _add_keywords_section(self, story, header_style, normal_style):
|
| 125 |
+
story.append(Paragraph("Keywords Analysis", header_style))
|
| 126 |
+
story.append(Paragraph("• Current keyword rankings and performance", normal_style))
|
| 127 |
+
story.append(Paragraph("• Keyword opportunities and gaps", normal_style))
|
| 128 |
+
story.append(Paragraph("• Competitive keyword analysis", normal_style))
|
| 129 |
+
story.append(Paragraph("• Search volume and traffic potential", normal_style))
|
| 130 |
+
story.append(Spacer(1, 10))
|
| 131 |
+
|
| 132 |
+
def _add_backlinks_section(self, story, header_style, normal_style):
|
| 133 |
+
story.append(Paragraph("Backlinks Profile", header_style))
|
| 134 |
+
story.append(Paragraph("• Domain authority and trust metrics", normal_style))
|
| 135 |
+
story.append(Paragraph("• Backlink quality and diversity analysis", normal_style))
|
| 136 |
+
story.append(Paragraph("• Referring domains breakdown", normal_style))
|
| 137 |
+
story.append(Paragraph("• Link building opportunities", normal_style))
|
| 138 |
+
story.append(Spacer(1, 10))
|
| 139 |
+
|
| 140 |
+
def _add_recommendations(self, story, header_style, normal_style):
|
| 141 |
+
story.append(Paragraph("Key Recommendations", header_style))
|
| 142 |
+
story.append(Paragraph("• Optimize Core Web Vitals for better user experience", normal_style))
|
| 143 |
+
story.append(Paragraph("• Improve page loading speeds on mobile devices", normal_style))
|
| 144 |
+
story.append(Paragraph("• Enhance content structure and internal linking", normal_style))
|
| 145 |
+
story.append(Paragraph("• Focus on high-opportunity keyword targets", normal_style))
|
| 146 |
+
story.append(Paragraph("• Build high-quality backlinks from relevant domains", normal_style))
|
| 147 |
+
story.append(Spacer(1, 15))
|
| 148 |
+
|
| 149 |
+
story.append(Paragraph("For detailed metrics and specific implementation guidance, please refer to the complete HTML report.", normal_style))
|
| 150 |
|
| 151 |
def create_browser_pdf_instructions() -> str:
|
| 152 |
return """
|