Spaces:
Running
Running
Commit
Β·
5f0cfa7
1
Parent(s):
ee1f542
ya
Browse files- .gitignore +2 -0
- README.md +1 -1
- app.py +57 -1
- llm_recommendations.py +344 -0
- modules/backlinks.py +451 -0
- modules/keywords.py +315 -0
- report_generator.py +247 -7
- requirements.txt +15 -1
- simple_pdf_generator.py +1 -0
.gitignore
CHANGED
|
@@ -97,6 +97,8 @@ celerybeat.pid
|
|
| 97 |
|
| 98 |
# Environments
|
| 99 |
.env
|
|
|
|
|
|
|
| 100 |
.venv
|
| 101 |
env/
|
| 102 |
venv/
|
|
|
|
| 97 |
|
| 98 |
# Environments
|
| 99 |
.env
|
| 100 |
+
.env.local
|
| 101 |
+
.env.production
|
| 102 |
.venv
|
| 103 |
env/
|
| 104 |
venv/
|
README.md
CHANGED
|
@@ -31,7 +31,7 @@ Professional SEO analysis and reporting tool that creates comprehensive SEO audi
|
|
| 31 |
|
| 32 |
### π§ Planned for Future Versions
|
| 33 |
- Keyword Rankings (Google Search Console integration)
|
| 34 |
-
- Backlink Profile Analysis (
|
| 35 |
- Advanced Competitor Analysis
|
| 36 |
- GA4/Conversion Tracking Integration
|
| 37 |
|
|
|
|
| 31 |
|
| 32 |
### π§ Planned for Future Versions
|
| 33 |
- Keyword Rankings (Google Search Console integration)
|
| 34 |
+
- Backlink Profile Analysis (RapidAPI)
|
| 35 |
- Advanced Competitor Analysis
|
| 36 |
- GA4/Conversion Tracking Integration
|
| 37 |
|
app.py
CHANGED
|
@@ -8,8 +8,11 @@ import uuid
|
|
| 8 |
# Import SEO modules
|
| 9 |
from modules.technical_seo import TechnicalSEOModule
|
| 10 |
from modules.content_audit import ContentAuditModule
|
|
|
|
|
|
|
| 11 |
from report_generator import ReportGenerator
|
| 12 |
from simple_pdf_generator import SimplePDFGenerator
|
|
|
|
| 13 |
|
| 14 |
app = Flask(__name__, static_folder='static')
|
| 15 |
app.secret_key = 'seo_report_generator_2024'
|
|
@@ -17,8 +20,11 @@ app.secret_key = 'seo_report_generator_2024'
|
|
| 17 |
# Initialize modules
|
| 18 |
technical_module = TechnicalSEOModule()
|
| 19 |
content_module = ContentAuditModule()
|
|
|
|
|
|
|
| 20 |
report_gen = ReportGenerator()
|
| 21 |
pdf_gen = SimplePDFGenerator()
|
|
|
|
| 22 |
|
| 23 |
# Store for generated reports (in production, use database)
|
| 24 |
reports_store = {}
|
|
@@ -56,15 +62,59 @@ def generate_report():
|
|
| 56 |
# Content Audit
|
| 57 |
content_data = content_module.analyze(url)
|
| 58 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 59 |
# Competitor Analysis
|
| 60 |
competitor_data = []
|
| 61 |
for comp_url in competitor_list:
|
| 62 |
comp_technical = technical_module.analyze(comp_url)
|
| 63 |
comp_content = content_module.analyze(comp_url, quick_scan=True)
|
|
|
|
|
|
|
|
|
|
| 64 |
competitor_data.append({
|
| 65 |
'url': comp_url,
|
| 66 |
'technical': comp_technical,
|
| 67 |
-
'content': comp_content
|
|
|
|
|
|
|
| 68 |
})
|
| 69 |
|
| 70 |
# Generate HTML report
|
|
@@ -73,6 +123,9 @@ def generate_report():
|
|
| 73 |
technical_data=technical_data,
|
| 74 |
content_data=content_data,
|
| 75 |
competitor_data=competitor_data,
|
|
|
|
|
|
|
|
|
|
| 76 |
include_charts=True
|
| 77 |
)
|
| 78 |
|
|
@@ -82,6 +135,9 @@ def generate_report():
|
|
| 82 |
'html': report_html,
|
| 83 |
'technical_data': technical_data,
|
| 84 |
'content_data': content_data,
|
|
|
|
|
|
|
|
|
|
| 85 |
'competitor_data': competitor_data
|
| 86 |
}
|
| 87 |
|
|
|
|
| 8 |
# Import SEO modules
|
| 9 |
from modules.technical_seo import TechnicalSEOModule
|
| 10 |
from modules.content_audit import ContentAuditModule
|
| 11 |
+
from modules.keywords import KeywordsModule
|
| 12 |
+
from modules.backlinks import BacklinksModule
|
| 13 |
from report_generator import ReportGenerator
|
| 14 |
from simple_pdf_generator import SimplePDFGenerator
|
| 15 |
+
from llm_recommendations import LLMRecommendations
|
| 16 |
|
| 17 |
app = Flask(__name__, static_folder='static')
|
| 18 |
app.secret_key = 'seo_report_generator_2024'
|
|
|
|
| 20 |
# Initialize modules
|
| 21 |
technical_module = TechnicalSEOModule()
|
| 22 |
content_module = ContentAuditModule()
|
| 23 |
+
keywords_module = KeywordsModule()
|
| 24 |
+
backlinks_module = BacklinksModule()
|
| 25 |
report_gen = ReportGenerator()
|
| 26 |
pdf_gen = SimplePDFGenerator()
|
| 27 |
+
llm_recommendations = LLMRecommendations()
|
| 28 |
|
| 29 |
# Store for generated reports (in production, use database)
|
| 30 |
reports_store = {}
|
|
|
|
| 62 |
# Content Audit
|
| 63 |
content_data = content_module.analyze(url)
|
| 64 |
|
| 65 |
+
# Keywords Analysis
|
| 66 |
+
keywords_data = keywords_module.analyze(url).data
|
| 67 |
+
|
| 68 |
+
# Backlinks Analysis - COMMENTED OUT TO SAVE API CREDITS
|
| 69 |
+
# print(f"DEBUG: Starting backlinks analysis for {url}")
|
| 70 |
+
# backlinks_result = backlinks_module.analyze(url)
|
| 71 |
+
# backlinks_data = backlinks_result.data
|
| 72 |
+
# print(f"DEBUG: Backlinks analysis result - Success: {backlinks_result.success}")
|
| 73 |
+
# print(f"DEBUG: Backlinks data keys: {list(backlinks_data.keys())}")
|
| 74 |
+
# if backlinks_data.get('total_backlinks'):
|
| 75 |
+
# print(f"DEBUG: Total backlinks found: {backlinks_data.get('total_backlinks')}")
|
| 76 |
+
# if backlinks_data.get('placeholder'):
|
| 77 |
+
# print(f"DEBUG: Using placeholder data: {backlinks_data.get('message')}")
|
| 78 |
+
|
| 79 |
+
# Use placeholder backlinks data to save API credits
|
| 80 |
+
backlinks_data = {
|
| 81 |
+
'total_backlinks': 0,
|
| 82 |
+
'total_ref_domains': 0,
|
| 83 |
+
'domain_rating': 0,
|
| 84 |
+
'authority_scores': {'ahrefs_dr': 0, 'moz_da': 0, 'moz_pa': 0, 'majestic_tf': 0, 'majestic_cf': 0},
|
| 85 |
+
'referring_domains': [],
|
| 86 |
+
'anchor_distribution': [],
|
| 87 |
+
'monthly_changes': {'new_backlinks': 0, 'lost_backlinks': 0, 'net_change': 0},
|
| 88 |
+
'top_backlinks': [],
|
| 89 |
+
'quality_metrics': {'follow_ratio': 0, 'avg_authority': 0, 'quality_score': 0},
|
| 90 |
+
'edu_links': 0,
|
| 91 |
+
'gov_links': 0,
|
| 92 |
+
'estimated_organic_traffic': 0,
|
| 93 |
+
'organic_keywords': 0,
|
| 94 |
+
'data_sources': ['API disabled to save credits'],
|
| 95 |
+
'placeholder': True,
|
| 96 |
+
'message': 'Backlinks analysis temporarily disabled to conserve API credits.'
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
# Generate LLM Recommendations
|
| 100 |
+
llm_rec_data = llm_recommendations.generate_recommendations(
|
| 101 |
+
url, technical_data, content_data, keywords_data, backlinks_data
|
| 102 |
+
)
|
| 103 |
+
|
| 104 |
# Competitor Analysis
|
| 105 |
competitor_data = []
|
| 106 |
for comp_url in competitor_list:
|
| 107 |
comp_technical = technical_module.analyze(comp_url)
|
| 108 |
comp_content = content_module.analyze(comp_url, quick_scan=True)
|
| 109 |
+
comp_keywords = keywords_module.analyze(comp_url, quick_scan=True).data
|
| 110 |
+
# comp_backlinks = backlinks_module.analyze(comp_url, quick_scan=True).data # SAVE API CREDITS
|
| 111 |
+
comp_backlinks = {'placeholder': True, 'message': 'Disabled to save credits'}
|
| 112 |
competitor_data.append({
|
| 113 |
'url': comp_url,
|
| 114 |
'technical': comp_technical,
|
| 115 |
+
'content': comp_content,
|
| 116 |
+
'keywords': comp_keywords,
|
| 117 |
+
'backlinks': comp_backlinks
|
| 118 |
})
|
| 119 |
|
| 120 |
# Generate HTML report
|
|
|
|
| 123 |
technical_data=technical_data,
|
| 124 |
content_data=content_data,
|
| 125 |
competitor_data=competitor_data,
|
| 126 |
+
keywords_data=keywords_data,
|
| 127 |
+
backlinks_data=backlinks_data,
|
| 128 |
+
llm_recommendations=llm_rec_data,
|
| 129 |
include_charts=True
|
| 130 |
)
|
| 131 |
|
|
|
|
| 135 |
'html': report_html,
|
| 136 |
'technical_data': technical_data,
|
| 137 |
'content_data': content_data,
|
| 138 |
+
'keywords_data': keywords_data,
|
| 139 |
+
'backlinks_data': backlinks_data,
|
| 140 |
+
'llm_recommendations': llm_rec_data,
|
| 141 |
'competitor_data': competitor_data
|
| 142 |
}
|
| 143 |
|
llm_recommendations.py
ADDED
|
@@ -0,0 +1,344 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Groq LLM Integration for Smart SEO Recommendations
|
| 3 |
+
Analyzes all 4 modules (Technical SEO, Content Audit, Keywords, Backlinks) to generate intelligent recommendations
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import json
|
| 8 |
+
from typing import Dict, Any, List
|
| 9 |
+
from groq import Groq
|
| 10 |
+
from dotenv import load_dotenv
|
| 11 |
+
|
| 12 |
+
# Load environment variables
|
| 13 |
+
load_dotenv()
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
class LLMRecommendations:
|
| 17 |
+
def __init__(self):
|
| 18 |
+
try:
|
| 19 |
+
self.client = Groq(api_key=os.getenv('GROQ_API_KEY'))
|
| 20 |
+
self.available = True
|
| 21 |
+
except Exception:
|
| 22 |
+
self.client = None
|
| 23 |
+
self.available = False
|
| 24 |
+
|
| 25 |
+
def generate_recommendations(self, url: str, technical_data: Dict[str, Any],
|
| 26 |
+
content_data: Dict[str, Any], keywords_data: Dict[str, Any],
|
| 27 |
+
backlinks_data: Dict[str, Any]) -> Dict[str, Any]:
|
| 28 |
+
"""
|
| 29 |
+
Generate comprehensive SEO recommendations based on all module data
|
| 30 |
+
|
| 31 |
+
Args:
|
| 32 |
+
url: Target website URL
|
| 33 |
+
technical_data: Technical SEO analysis results
|
| 34 |
+
content_data: Content audit results
|
| 35 |
+
keywords_data: Keywords analysis results
|
| 36 |
+
backlinks_data: Backlinks analysis results
|
| 37 |
+
|
| 38 |
+
Returns:
|
| 39 |
+
Dictionary with recommendations and insights
|
| 40 |
+
"""
|
| 41 |
+
if not self.available:
|
| 42 |
+
return self._generate_fallback_recommendations(technical_data, content_data, keywords_data, backlinks_data)
|
| 43 |
+
|
| 44 |
+
try:
|
| 45 |
+
# Prepare context data for LLM
|
| 46 |
+
context = self._prepare_context(url, technical_data, content_data, keywords_data, backlinks_data)
|
| 47 |
+
|
| 48 |
+
# Generate recommendations using Groq
|
| 49 |
+
recommendations = self._query_llm(context)
|
| 50 |
+
|
| 51 |
+
return {
|
| 52 |
+
'recommendations': recommendations,
|
| 53 |
+
'executive_insights': self._generate_executive_insights(context),
|
| 54 |
+
'priority_actions': self._extract_priority_actions(recommendations),
|
| 55 |
+
'data_source': 'Groq LLM Analysis',
|
| 56 |
+
'generated_at': context['analysis_date']
|
| 57 |
+
}
|
| 58 |
+
|
| 59 |
+
except Exception as e:
|
| 60 |
+
return self._generate_fallback_recommendations(technical_data, content_data, keywords_data, backlinks_data, error=str(e))
|
| 61 |
+
|
| 62 |
+
def _prepare_context(self, url: str, technical_data: Dict, content_data: Dict,
|
| 63 |
+
keywords_data: Dict, backlinks_data: Dict) -> Dict[str, Any]:
|
| 64 |
+
"""Prepare structured context for LLM analysis"""
|
| 65 |
+
|
| 66 |
+
# Extract key metrics from each module
|
| 67 |
+
context = {
|
| 68 |
+
'website': url,
|
| 69 |
+
'analysis_date': technical_data.get('last_updated', ''),
|
| 70 |
+
'technical_seo': {
|
| 71 |
+
'mobile_score': technical_data.get('mobile_score', 0),
|
| 72 |
+
'desktop_score': technical_data.get('desktop_score', 0),
|
| 73 |
+
'core_web_vitals': technical_data.get('core_web_vitals', {}),
|
| 74 |
+
'issues_count': len(technical_data.get('issues', [])),
|
| 75 |
+
'top_issues': technical_data.get('issues', [])[:3]
|
| 76 |
+
},
|
| 77 |
+
'content_audit': {
|
| 78 |
+
'pages_analyzed': content_data.get('pages_analyzed', 0),
|
| 79 |
+
'metadata_completeness': content_data.get('metadata_completeness', {}),
|
| 80 |
+
'avg_word_count': content_data.get('avg_word_count', 0),
|
| 81 |
+
'cta_presence': content_data.get('cta_presence', 0),
|
| 82 |
+
'content_freshness': content_data.get('content_freshness', {})
|
| 83 |
+
},
|
| 84 |
+
'keywords': {
|
| 85 |
+
'total_keywords': keywords_data.get('total_keywords', 0),
|
| 86 |
+
'position_distribution': keywords_data.get('position_distribution', {}),
|
| 87 |
+
'data_available': not keywords_data.get('placeholder', False),
|
| 88 |
+
'opportunity_keywords': len(keywords_data.get('opportunity_keywords', [])),
|
| 89 |
+
'data_source': keywords_data.get('data_source', 'Unknown')
|
| 90 |
+
},
|
| 91 |
+
'backlinks': {
|
| 92 |
+
'total_backlinks': backlinks_data.get('total_backlinks', 0),
|
| 93 |
+
'total_ref_domains': backlinks_data.get('total_ref_domains', 0),
|
| 94 |
+
'domain_rating': backlinks_data.get('domain_rating', 0),
|
| 95 |
+
'monthly_changes': backlinks_data.get('monthly_changes', {}),
|
| 96 |
+
'data_available': not backlinks_data.get('placeholder', False),
|
| 97 |
+
'data_source': backlinks_data.get('data_source', 'Unknown')
|
| 98 |
+
}
|
| 99 |
+
}
|
| 100 |
+
|
| 101 |
+
return context
|
| 102 |
+
|
| 103 |
+
def _query_llm(self, context: Dict[str, Any]) -> List[str]:
|
| 104 |
+
"""Query Groq LLM for SEO recommendations"""
|
| 105 |
+
|
| 106 |
+
prompt = f"""
|
| 107 |
+
You are an expert SEO consultant analyzing a comprehensive SEO audit for {context['website']}. Based on the data below, provide specific, actionable SEO recommendations.
|
| 108 |
+
|
| 109 |
+
TECHNICAL SEO DATA:
|
| 110 |
+
- Mobile Performance Score: {context['technical_seo']['mobile_score']}/100
|
| 111 |
+
- Desktop Performance Score: {context['technical_seo']['desktop_score']}/100
|
| 112 |
+
- Core Web Vitals: {json.dumps(context['technical_seo']['core_web_vitals'])}
|
| 113 |
+
- Critical Issues Found: {context['technical_seo']['issues_count']}
|
| 114 |
+
- Top Issues: {context['technical_seo']['top_issues']}
|
| 115 |
+
|
| 116 |
+
CONTENT AUDIT DATA:
|
| 117 |
+
- Pages Analyzed: {context['content_audit']['pages_analyzed']}
|
| 118 |
+
- Metadata Completeness: {json.dumps(context['content_audit']['metadata_completeness'])}
|
| 119 |
+
- Average Word Count: {context['content_audit']['avg_word_count']}
|
| 120 |
+
- CTA Presence: {context['content_audit']['cta_presence']}%
|
| 121 |
+
- Content Freshness: {json.dumps(context['content_audit']['content_freshness'])}
|
| 122 |
+
|
| 123 |
+
KEYWORDS DATA:
|
| 124 |
+
- Total Keywords Tracked: {context['keywords']['total_keywords']}
|
| 125 |
+
- Position Distribution: {json.dumps(context['keywords']['position_distribution'])}
|
| 126 |
+
- Data Available: {context['keywords']['data_available']}
|
| 127 |
+
- Opportunity Keywords: {context['keywords']['opportunity_keywords']}
|
| 128 |
+
- Source: {context['keywords']['data_source']}
|
| 129 |
+
|
| 130 |
+
BACKLINKS DATA:
|
| 131 |
+
- Total Backlinks: {context['backlinks']['total_backlinks']}
|
| 132 |
+
- Referring Domains: {context['backlinks']['total_ref_domains']}
|
| 133 |
+
- Domain Rating: {context['backlinks']['domain_rating']}
|
| 134 |
+
- Monthly Changes: {json.dumps(context['backlinks']['monthly_changes'])}
|
| 135 |
+
- Data Available: {context['backlinks']['data_available']}
|
| 136 |
+
- Source: {context['backlinks']['data_source']}
|
| 137 |
+
|
| 138 |
+
CRITICAL INSTRUCTIONS:
|
| 139 |
+
1. Analyze the data holistically across all 4 modules
|
| 140 |
+
2. Identify the TOP 3 most critical issues that need immediate attention
|
| 141 |
+
3. Provide specific, actionable recommendations with clear steps
|
| 142 |
+
4. If API data is missing (placeholder: true), acknowledge this and focus on available data
|
| 143 |
+
5. Prioritize recommendations by potential impact and ease of implementation
|
| 144 |
+
6. Include technical optimizations, content improvements, keyword opportunities, and link building strategies
|
| 145 |
+
7. Provide estimated timelines and resources needed for each recommendation
|
| 146 |
+
|
| 147 |
+
Generate exactly 8-12 specific recommendations in this format:
|
| 148 |
+
- **[Priority Level]** [Specific Action]: [Detailed explanation with steps and expected impact]
|
| 149 |
+
|
| 150 |
+
Priority Levels: HIGH, MEDIUM, LOW
|
| 151 |
+
Focus on actionable items that can be implemented within 30-90 days.
|
| 152 |
+
|
| 153 |
+
Response:
|
| 154 |
+
"""
|
| 155 |
+
|
| 156 |
+
try:
|
| 157 |
+
chat_completion = self.client.chat.completions.create(
|
| 158 |
+
messages=[
|
| 159 |
+
{'role': 'user', 'content': prompt}
|
| 160 |
+
],
|
| 161 |
+
model="mixtral-8x7b-32768", # Using Mixtral for better reasoning
|
| 162 |
+
stream=False,
|
| 163 |
+
temperature=0.1, # Low temperature for consistent, focused recommendations
|
| 164 |
+
max_tokens=1500
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
response = chat_completion.choices[0].message.content.strip()
|
| 168 |
+
|
| 169 |
+
# Parse recommendations from response
|
| 170 |
+
recommendations = []
|
| 171 |
+
lines = response.split('\n')
|
| 172 |
+
for line in lines:
|
| 173 |
+
line = line.strip()
|
| 174 |
+
if line.startswith('- **') or line.startswith('β’'):
|
| 175 |
+
# Clean up the recommendation
|
| 176 |
+
recommendation = line.replace('- **', '').replace('β’ **', '').strip()
|
| 177 |
+
if recommendation:
|
| 178 |
+
recommendations.append(recommendation)
|
| 179 |
+
|
| 180 |
+
return recommendations if recommendations else [response]
|
| 181 |
+
|
| 182 |
+
except Exception as e:
|
| 183 |
+
return [f"LLM Error: {str(e)}"]
|
| 184 |
+
|
| 185 |
+
def _generate_executive_insights(self, context: Dict[str, Any]) -> List[str]:
|
| 186 |
+
"""Generate high-level executive insights"""
|
| 187 |
+
insights = []
|
| 188 |
+
|
| 189 |
+
# Technical Performance Insight
|
| 190 |
+
mobile_score = context['technical_seo']['mobile_score']
|
| 191 |
+
desktop_score = context['technical_seo']['desktop_score']
|
| 192 |
+
avg_score = (mobile_score + desktop_score) / 2
|
| 193 |
+
|
| 194 |
+
if avg_score < 50:
|
| 195 |
+
insights.append(f"π΄ Critical: Website performance is severely impacting user experience (avg: {avg_score:.0f}/100)")
|
| 196 |
+
elif avg_score < 75:
|
| 197 |
+
insights.append(f"π‘ Warning: Website performance needs improvement (avg: {avg_score:.0f}/100)")
|
| 198 |
+
else:
|
| 199 |
+
insights.append(f"π’ Good: Website performance is solid (avg: {avg_score:.0f}/100)")
|
| 200 |
+
|
| 201 |
+
# Content Insight
|
| 202 |
+
pages = context['content_audit']['pages_analyzed']
|
| 203 |
+
if pages > 0:
|
| 204 |
+
metadata = context['content_audit']['metadata_completeness']
|
| 205 |
+
title_pct = (metadata.get('with_title', 0) / pages * 100) if pages > 0 else 0
|
| 206 |
+
|
| 207 |
+
if title_pct < 80:
|
| 208 |
+
insights.append(f"π΄ Content Issue: {100-title_pct:.0f}% of pages missing critical metadata")
|
| 209 |
+
else:
|
| 210 |
+
insights.append(f"π’ Content Quality: Metadata completeness is good ({title_pct:.0f}%)")
|
| 211 |
+
|
| 212 |
+
# Keywords Insight
|
| 213 |
+
if context['keywords']['data_available']:
|
| 214 |
+
total_keywords = context['keywords']['total_keywords']
|
| 215 |
+
pos_dist = context['keywords']['position_distribution']
|
| 216 |
+
top_10_pct = (pos_dist.get('top_10', 0) / total_keywords * 100) if total_keywords > 0 else 0
|
| 217 |
+
|
| 218 |
+
if top_10_pct < 15:
|
| 219 |
+
insights.append(f"π΄ SEO Visibility: Only {top_10_pct:.0f}% of keywords rank in top 10")
|
| 220 |
+
elif top_10_pct < 30:
|
| 221 |
+
insights.append(f"π‘ SEO Opportunity: {top_10_pct:.0f}% of keywords in top 10 - room for growth")
|
| 222 |
+
else:
|
| 223 |
+
insights.append(f"π’ Strong SEO: {top_10_pct:.0f}% of keywords ranking in top 10")
|
| 224 |
+
else:
|
| 225 |
+
insights.append("π Connect keyword tracking tools for visibility insights")
|
| 226 |
+
|
| 227 |
+
# Backlinks Insight
|
| 228 |
+
if context['backlinks']['data_available']:
|
| 229 |
+
ref_domains = context['backlinks']['total_ref_domains']
|
| 230 |
+
domain_rating = context['backlinks']['domain_rating']
|
| 231 |
+
|
| 232 |
+
if ref_domains < 50:
|
| 233 |
+
insights.append(f"π΄ Link Building: Low referring domains ({ref_domains}) - aggressive outreach needed")
|
| 234 |
+
elif ref_domains < 200:
|
| 235 |
+
insights.append(f"π‘ Authority Building: Moderate link profile ({ref_domains} domains)")
|
| 236 |
+
else:
|
| 237 |
+
insights.append(f"π’ Strong Authority: Healthy backlink profile ({ref_domains} referring domains)")
|
| 238 |
+
else:
|
| 239 |
+
insights.append("π Connect backlink analysis tools for authority insights")
|
| 240 |
+
|
| 241 |
+
return insights
|
| 242 |
+
|
| 243 |
+
def _extract_priority_actions(self, recommendations: List[str]) -> List[Dict[str, str]]:
|
| 244 |
+
"""Extract priority actions from recommendations"""
|
| 245 |
+
priority_actions = []
|
| 246 |
+
|
| 247 |
+
for rec in recommendations:
|
| 248 |
+
if '**HIGH**' in rec or '**CRITICAL**' in rec:
|
| 249 |
+
# Extract action title and description
|
| 250 |
+
parts = rec.replace('**HIGH**', '').replace('**CRITICAL**', '').strip()
|
| 251 |
+
if ':' in parts:
|
| 252 |
+
title, description = parts.split(':', 1)
|
| 253 |
+
priority_actions.append({
|
| 254 |
+
'title': title.strip(),
|
| 255 |
+
'description': description.strip(),
|
| 256 |
+
'priority': 'HIGH'
|
| 257 |
+
})
|
| 258 |
+
|
| 259 |
+
# If no high priority actions found, take first 3
|
| 260 |
+
if not priority_actions and recommendations:
|
| 261 |
+
for i, rec in enumerate(recommendations[:3]):
|
| 262 |
+
if ':' in rec:
|
| 263 |
+
title, description = rec.split(':', 1)
|
| 264 |
+
priority_actions.append({
|
| 265 |
+
'title': title.replace('*', '').strip(),
|
| 266 |
+
'description': description.strip(),
|
| 267 |
+
'priority': 'HIGH'
|
| 268 |
+
})
|
| 269 |
+
|
| 270 |
+
return priority_actions[:5] # Top 5 priority actions
|
| 271 |
+
|
| 272 |
+
def _generate_fallback_recommendations(self, technical_data: Dict, content_data: Dict,
|
| 273 |
+
keywords_data: Dict, backlinks_data: Dict, error: str = None) -> Dict[str, Any]:
|
| 274 |
+
"""Generate basic recommendations when LLM is not available"""
|
| 275 |
+
|
| 276 |
+
recommendations = []
|
| 277 |
+
|
| 278 |
+
# Technical recommendations
|
| 279 |
+
mobile_score = technical_data.get('mobile_score', 0)
|
| 280 |
+
desktop_score = technical_data.get('desktop_score', 0)
|
| 281 |
+
|
| 282 |
+
if mobile_score < 50:
|
| 283 |
+
recommendations.append("**HIGH** Improve Mobile Performance: Optimize images, reduce JavaScript, enable compression")
|
| 284 |
+
if desktop_score < 50:
|
| 285 |
+
recommendations.append("**HIGH** Improve Desktop Performance: Optimize server response time, minimize CSS and JavaScript")
|
| 286 |
+
|
| 287 |
+
# Content recommendations
|
| 288 |
+
pages = content_data.get('pages_analyzed', 0)
|
| 289 |
+
if pages > 0:
|
| 290 |
+
metadata = content_data.get('metadata_completeness', {})
|
| 291 |
+
if metadata.get('with_title', 0) < pages * 0.8:
|
| 292 |
+
recommendations.append("**HIGH** Fix Metadata: Add missing title tags and meta descriptions")
|
| 293 |
+
|
| 294 |
+
if content_data.get('avg_word_count', 0) < 300:
|
| 295 |
+
recommendations.append("**MEDIUM** Enhance Content: Increase average page content length")
|
| 296 |
+
|
| 297 |
+
# Keywords recommendations
|
| 298 |
+
if not keywords_data.get('placeholder', False):
|
| 299 |
+
total_keywords = keywords_data.get('total_keywords', 0)
|
| 300 |
+
pos_dist = keywords_data.get('position_distribution', {})
|
| 301 |
+
|
| 302 |
+
if total_keywords > 0 and pos_dist.get('top_10', 0) < total_keywords * 0.2:
|
| 303 |
+
recommendations.append("**HIGH** Improve Keyword Rankings: Focus on on-page SEO for underperforming keywords")
|
| 304 |
+
else:
|
| 305 |
+
recommendations.append("**MEDIUM** Set Up Keyword Tracking: Connect Google Search Console for keyword insights")
|
| 306 |
+
|
| 307 |
+
# Backlinks recommendations
|
| 308 |
+
if not backlinks_data.get('placeholder', False):
|
| 309 |
+
ref_domains = backlinks_data.get('total_ref_domains', 0)
|
| 310 |
+
if ref_domains < 50:
|
| 311 |
+
recommendations.append("**HIGH** Build Authority: Implement aggressive link building and outreach strategy")
|
| 312 |
+
else:
|
| 313 |
+
recommendations.append("**MEDIUM** Set Up Backlink Monitoring: Add RapidAPI key for comprehensive link analysis")
|
| 314 |
+
|
| 315 |
+
# Default recommendations if none generated
|
| 316 |
+
if not recommendations:
|
| 317 |
+
recommendations = [
|
| 318 |
+
"**HIGH** Audit Technical Issues: Review site speed and mobile performance",
|
| 319 |
+
"**MEDIUM** Optimize Content Strategy: Ensure all pages have unique, valuable content",
|
| 320 |
+
"**LOW** Monitor SEO Performance: Set up tracking for keywords and backlinks"
|
| 321 |
+
]
|
| 322 |
+
|
| 323 |
+
insights = [
|
| 324 |
+
"π Basic SEO analysis completed - connect APIs for deeper insights",
|
| 325 |
+
f"π Analyzed {pages} pages for content quality",
|
| 326 |
+
"β οΈ Enhanced recommendations require API integrations"
|
| 327 |
+
]
|
| 328 |
+
|
| 329 |
+
if error:
|
| 330 |
+
insights.append(f"β LLM Error: {error}")
|
| 331 |
+
|
| 332 |
+
return {
|
| 333 |
+
'recommendations': recommendations,
|
| 334 |
+
'executive_insights': insights,
|
| 335 |
+
'priority_actions': [
|
| 336 |
+
{
|
| 337 |
+
'title': 'Connect SEO APIs',
|
| 338 |
+
'description': 'Set up Google Search Console and RapidAPI for comprehensive analysis',
|
| 339 |
+
'priority': 'HIGH'
|
| 340 |
+
}
|
| 341 |
+
],
|
| 342 |
+
'data_source': 'Fallback Analysis',
|
| 343 |
+
'generated_at': technical_data.get('last_updated', '')
|
| 344 |
+
}
|
modules/backlinks.py
ADDED
|
@@ -0,0 +1,451 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Backlinks Profile Module using RapidAPI endpoints
|
| 3 |
+
Combines 3 RapidAPI endpoints: Best Backlink Checker, Majestic, and Domain Metrics Check
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import requests
|
| 8 |
+
import time
|
| 9 |
+
from typing import Dict, Any, List, Optional
|
| 10 |
+
from urllib.parse import urlparse
|
| 11 |
+
from datetime import datetime, timedelta
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ModuleResult:
|
| 15 |
+
"""Standard result object for SEO modules"""
|
| 16 |
+
def __init__(self, success: bool, data: Dict[str, Any], error: str = None):
|
| 17 |
+
self.success = success
|
| 18 |
+
self.data = data
|
| 19 |
+
self.error = error
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class BacklinksModule:
|
| 23 |
+
def __init__(self):
|
| 24 |
+
self.rapidapi_key = os.getenv('RAPIDAPI_KEY')
|
| 25 |
+
self.timeout = int(os.getenv('RAPIDAPI_TIMEOUT', '30'))
|
| 26 |
+
self.max_retries = int(os.getenv('BACKLINKS_MAX_RETRIES', '3'))
|
| 27 |
+
|
| 28 |
+
# RapidAPI endpoints
|
| 29 |
+
self.backlink_checker_url = "https://best-backlink-checker-api.p.rapidapi.com/excatbacklinks_noneng.php"
|
| 30 |
+
self.majestic_url = "https://majestic1.p.rapidapi.com/url_metrics"
|
| 31 |
+
self.domain_metrics_url = "https://domain-metrics-check.p.rapidapi.com/domain-metrics"
|
| 32 |
+
|
| 33 |
+
# Common headers
|
| 34 |
+
self.headers = {
|
| 35 |
+
'x-rapidapi-key': self.rapidapi_key,
|
| 36 |
+
'Accept': 'application/json'
|
| 37 |
+
}
|
| 38 |
+
|
| 39 |
+
def analyze(self, url: str, quick_scan: bool = False) -> ModuleResult:
|
| 40 |
+
"""
|
| 41 |
+
Analyze backlink profile using multiple RapidAPI endpoints
|
| 42 |
+
|
| 43 |
+
Args:
|
| 44 |
+
url: Target website URL
|
| 45 |
+
quick_scan: If True, use cached data or limited analysis
|
| 46 |
+
|
| 47 |
+
Returns:
|
| 48 |
+
ModuleResult with comprehensive backlinks data
|
| 49 |
+
"""
|
| 50 |
+
try:
|
| 51 |
+
if not self.rapidapi_key:
|
| 52 |
+
return self._generate_no_api_data(url)
|
| 53 |
+
|
| 54 |
+
domain = self._extract_domain(url)
|
| 55 |
+
|
| 56 |
+
# Call all 3 APIs with retry logic
|
| 57 |
+
individual_backlinks = self._get_individual_backlinks(domain, quick_scan)
|
| 58 |
+
majestic_metrics = self._get_majestic_metrics(domain)
|
| 59 |
+
domain_metrics = self._get_domain_metrics(domain)
|
| 60 |
+
|
| 61 |
+
# Combine and process all data
|
| 62 |
+
combined_data = self._combine_backlink_data(
|
| 63 |
+
domain, individual_backlinks, majestic_metrics, domain_metrics, quick_scan
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
return ModuleResult(success=True, data=combined_data)
|
| 67 |
+
|
| 68 |
+
except Exception as e:
|
| 69 |
+
return ModuleResult(
|
| 70 |
+
success=False,
|
| 71 |
+
data={},
|
| 72 |
+
error=f"Backlinks analysis failed: {str(e)}"
|
| 73 |
+
)
|
| 74 |
+
|
| 75 |
+
def _extract_domain(self, url: str) -> str:
|
| 76 |
+
"""Extract clean domain from URL"""
|
| 77 |
+
if not url.startswith(('http://', 'https://')):
|
| 78 |
+
url = 'https://' + url
|
| 79 |
+
domain = urlparse(url).netloc.replace('www.', '')
|
| 80 |
+
return domain
|
| 81 |
+
|
| 82 |
+
def _api_request_with_retry(self, url: str, params: Dict = None, headers: Dict = None) -> Optional[Dict]:
|
| 83 |
+
"""Make API request with retry logic"""
|
| 84 |
+
if headers is None:
|
| 85 |
+
headers = self.headers.copy()
|
| 86 |
+
|
| 87 |
+
for attempt in range(self.max_retries):
|
| 88 |
+
try:
|
| 89 |
+
response = requests.get(url, params=params, headers=headers, timeout=self.timeout)
|
| 90 |
+
|
| 91 |
+
if response.status_code == 200:
|
| 92 |
+
return response.json()
|
| 93 |
+
elif response.status_code == 429: # Rate limit
|
| 94 |
+
wait_time = (attempt + 1) * 2 # Exponential backoff
|
| 95 |
+
print(f"Rate limited, waiting {wait_time}s...")
|
| 96 |
+
time.sleep(wait_time)
|
| 97 |
+
continue
|
| 98 |
+
else:
|
| 99 |
+
print(f"API error {response.status_code}: {response.text}")
|
| 100 |
+
|
| 101 |
+
except requests.exceptions.Timeout:
|
| 102 |
+
print(f"Timeout on attempt {attempt + 1}")
|
| 103 |
+
if attempt < self.max_retries - 1:
|
| 104 |
+
time.sleep(2)
|
| 105 |
+
|
| 106 |
+
except Exception as e:
|
| 107 |
+
print(f"Request error: {str(e)}")
|
| 108 |
+
if attempt < self.max_retries - 1:
|
| 109 |
+
time.sleep(2)
|
| 110 |
+
|
| 111 |
+
return None
|
| 112 |
+
|
| 113 |
+
def _get_individual_backlinks(self, domain: str, quick_scan: bool = False) -> List[Dict]:
|
| 114 |
+
"""Get individual backlinks data"""
|
| 115 |
+
try:
|
| 116 |
+
headers = self.headers.copy()
|
| 117 |
+
headers['x-rapidapi-host'] = 'best-backlink-checker-api.p.rapidapi.com'
|
| 118 |
+
|
| 119 |
+
params = {'domain': f'https://{domain}'}
|
| 120 |
+
|
| 121 |
+
data = self._api_request_with_retry(self.backlink_checker_url, params, headers)
|
| 122 |
+
|
| 123 |
+
if data and isinstance(data, list):
|
| 124 |
+
# Limit results for quick scan
|
| 125 |
+
if quick_scan:
|
| 126 |
+
return data[:50]
|
| 127 |
+
return data[:500] # Reasonable limit to avoid memory issues
|
| 128 |
+
|
| 129 |
+
except Exception as e:
|
| 130 |
+
print(f"Individual backlinks API error: {str(e)}")
|
| 131 |
+
|
| 132 |
+
return []
|
| 133 |
+
|
| 134 |
+
def _get_majestic_metrics(self, domain: str) -> Dict[str, Any]:
|
| 135 |
+
"""Get Majestic domain metrics via RapidAPI"""
|
| 136 |
+
try:
|
| 137 |
+
headers = self.headers.copy()
|
| 138 |
+
headers['x-rapidapi-host'] = 'majestic1.p.rapidapi.com'
|
| 139 |
+
|
| 140 |
+
params = {'url': domain}
|
| 141 |
+
|
| 142 |
+
data = self._api_request_with_retry(self.majestic_url, params, headers)
|
| 143 |
+
|
| 144 |
+
if data and data.get('status') == 'success':
|
| 145 |
+
return data
|
| 146 |
+
|
| 147 |
+
except Exception as e:
|
| 148 |
+
print(f"Majestic RapidAPI error: {str(e)}")
|
| 149 |
+
|
| 150 |
+
return {}
|
| 151 |
+
|
| 152 |
+
def _get_domain_metrics(self, domain: str) -> Dict[str, Any]:
|
| 153 |
+
"""Get comprehensive domain metrics"""
|
| 154 |
+
try:
|
| 155 |
+
headers = self.headers.copy()
|
| 156 |
+
headers['x-rapidapi-host'] = 'domain-metrics-check.p.rapidapi.com'
|
| 157 |
+
|
| 158 |
+
# API expects domain with trailing slash
|
| 159 |
+
url = f"{self.domain_metrics_url}/{domain}/"
|
| 160 |
+
|
| 161 |
+
data = self._api_request_with_retry(url, headers=headers)
|
| 162 |
+
|
| 163 |
+
if data and data.get('domain'):
|
| 164 |
+
return data
|
| 165 |
+
|
| 166 |
+
except Exception as e:
|
| 167 |
+
print(f"Domain metrics API error: {str(e)}")
|
| 168 |
+
|
| 169 |
+
return {}
|
| 170 |
+
|
| 171 |
+
def _combine_backlink_data(self, domain: str, individual_backlinks: List[Dict],
|
| 172 |
+
majestic_metrics: Dict, domain_metrics: Dict, quick_scan: bool) -> Dict[str, Any]:
|
| 173 |
+
"""Combine data from all 3 APIs into comprehensive backlinks profile"""
|
| 174 |
+
|
| 175 |
+
# Primary metrics (prefer Domain Metrics Check, fallback to Majestic)
|
| 176 |
+
total_backlinks = (
|
| 177 |
+
int(domain_metrics.get('ahrefsBacklinks', 0)) or
|
| 178 |
+
int(domain_metrics.get('majesticLinks', 0)) or
|
| 179 |
+
int(majestic_metrics.get('majesticLinks', 0)) or
|
| 180 |
+
len(individual_backlinks)
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
total_ref_domains = (
|
| 184 |
+
int(domain_metrics.get('ahrefsRefDomains', 0)) or
|
| 185 |
+
int(domain_metrics.get('majesticRefDomains', 0)) or
|
| 186 |
+
int(majestic_metrics.get('majesticRefDomains', 0)) or
|
| 187 |
+
len(set(link.get('url_from', '').split('/')[2] for link in individual_backlinks if link.get('url_from')))
|
| 188 |
+
)
|
| 189 |
+
|
| 190 |
+
# Authority scores (multiple sources for validation)
|
| 191 |
+
domain_rating = (
|
| 192 |
+
int(domain_metrics.get('ahrefsDR', 0)) or
|
| 193 |
+
int(domain_metrics.get('majesticTF', 0)) or
|
| 194 |
+
int(majestic_metrics.get('majesticTF', 0))
|
| 195 |
+
)
|
| 196 |
+
|
| 197 |
+
# Process individual backlinks for detailed analysis
|
| 198 |
+
referring_domains = self._extract_referring_domains(individual_backlinks)
|
| 199 |
+
anchor_distribution = self._extract_anchor_distribution(individual_backlinks)
|
| 200 |
+
monthly_changes = self._calculate_monthly_changes(individual_backlinks)
|
| 201 |
+
top_backlinks = self._get_top_backlinks(individual_backlinks)
|
| 202 |
+
|
| 203 |
+
# Link quality analysis
|
| 204 |
+
quality_metrics = self._analyze_link_quality(individual_backlinks, domain_metrics)
|
| 205 |
+
|
| 206 |
+
# Comprehensive backlinks data
|
| 207 |
+
backlinks_data = {
|
| 208 |
+
'total_backlinks': total_backlinks,
|
| 209 |
+
'total_ref_domains': total_ref_domains,
|
| 210 |
+
'domain_rating': domain_rating,
|
| 211 |
+
|
| 212 |
+
# Authority scores from multiple sources
|
| 213 |
+
'authority_scores': {
|
| 214 |
+
'ahrefs_dr': int(domain_metrics.get('ahrefsDR', 0)),
|
| 215 |
+
'moz_da': int(domain_metrics.get('mozDA', 0)),
|
| 216 |
+
'moz_pa': int(domain_metrics.get('mozPA', 0)),
|
| 217 |
+
'majestic_tf': int(domain_metrics.get('majesticTF', 0) or majestic_metrics.get('majesticTF', 0)),
|
| 218 |
+
'majestic_cf': int(domain_metrics.get('majesticCF', 0) or majestic_metrics.get('majesticCF', 0))
|
| 219 |
+
},
|
| 220 |
+
|
| 221 |
+
# Detailed analysis
|
| 222 |
+
'referring_domains': referring_domains,
|
| 223 |
+
'anchor_distribution': anchor_distribution,
|
| 224 |
+
'monthly_changes': monthly_changes,
|
| 225 |
+
'top_backlinks': top_backlinks,
|
| 226 |
+
'quality_metrics': quality_metrics,
|
| 227 |
+
|
| 228 |
+
# Educational and government links (high-quality indicators)
|
| 229 |
+
'edu_links': int(domain_metrics.get('majesticRefEDU', 0) or majestic_metrics.get('majesticRefEDU', 0)),
|
| 230 |
+
'gov_links': int(domain_metrics.get('majesticRefGov', 0) or majestic_metrics.get('majesticRefGov', 0)),
|
| 231 |
+
|
| 232 |
+
# Traffic estimates (if available)
|
| 233 |
+
'estimated_organic_traffic': float(domain_metrics.get('ahrefsTraffic', 0)),
|
| 234 |
+
'organic_keywords': int(domain_metrics.get('ahrefsOrganicKeywords', 0)),
|
| 235 |
+
|
| 236 |
+
# Data sources and metadata
|
| 237 |
+
'data_sources': self._get_data_sources(individual_backlinks, majestic_metrics, domain_metrics),
|
| 238 |
+
'last_updated': datetime.now().isoformat(),
|
| 239 |
+
'quick_scan': quick_scan,
|
| 240 |
+
'analysis_depth': 'comprehensive' if not quick_scan else 'basic'
|
| 241 |
+
}
|
| 242 |
+
|
| 243 |
+
return backlinks_data
|
| 244 |
+
|
| 245 |
+
def _extract_referring_domains(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
|
| 246 |
+
"""Extract and analyze referring domains"""
|
| 247 |
+
domain_stats = {}
|
| 248 |
+
|
| 249 |
+
for link in backlinks:
|
| 250 |
+
if not link.get('url_from'):
|
| 251 |
+
continue
|
| 252 |
+
|
| 253 |
+
try:
|
| 254 |
+
source_domain = urlparse(link['url_from']).netloc
|
| 255 |
+
if source_domain not in domain_stats:
|
| 256 |
+
domain_stats[source_domain] = {
|
| 257 |
+
'domain': source_domain,
|
| 258 |
+
'backlinks': 0,
|
| 259 |
+
'first_seen': link.get('first_seen', ''),
|
| 260 |
+
'domain_authority': link.get('domain_inlink_rank', 0),
|
| 261 |
+
'follow_links': 0,
|
| 262 |
+
'nofollow_links': 0
|
| 263 |
+
}
|
| 264 |
+
|
| 265 |
+
domain_stats[source_domain]['backlinks'] += 1
|
| 266 |
+
|
| 267 |
+
if link.get('nofollow'):
|
| 268 |
+
domain_stats[source_domain]['nofollow_links'] += 1
|
| 269 |
+
else:
|
| 270 |
+
domain_stats[source_domain]['follow_links'] += 1
|
| 271 |
+
|
| 272 |
+
except Exception:
|
| 273 |
+
continue
|
| 274 |
+
|
| 275 |
+
# Sort by backlinks count and return top domains
|
| 276 |
+
top_domains = sorted(domain_stats.values(), key=lambda x: x['backlinks'], reverse=True)
|
| 277 |
+
return top_domains[:20] # Top 20 referring domains
|
| 278 |
+
|
| 279 |
+
def _extract_anchor_distribution(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
|
| 280 |
+
"""Analyze anchor text distribution"""
|
| 281 |
+
anchor_stats = {}
|
| 282 |
+
|
| 283 |
+
for link in backlinks:
|
| 284 |
+
anchor = link.get('anchor', '').strip()
|
| 285 |
+
if not anchor or len(anchor) > 100: # Skip very long anchors
|
| 286 |
+
continue
|
| 287 |
+
|
| 288 |
+
if anchor not in anchor_stats:
|
| 289 |
+
anchor_stats[anchor] = {
|
| 290 |
+
'anchor_text': anchor,
|
| 291 |
+
'backlinks': 0,
|
| 292 |
+
'follow_links': 0,
|
| 293 |
+
'nofollow_links': 0,
|
| 294 |
+
'unique_domains': set()
|
| 295 |
+
}
|
| 296 |
+
|
| 297 |
+
anchor_stats[anchor]['backlinks'] += 1
|
| 298 |
+
|
| 299 |
+
if link.get('nofollow'):
|
| 300 |
+
anchor_stats[anchor]['nofollow_links'] += 1
|
| 301 |
+
else:
|
| 302 |
+
anchor_stats[anchor]['follow_links'] += 1
|
| 303 |
+
|
| 304 |
+
# Track unique domains for this anchor
|
| 305 |
+
try:
|
| 306 |
+
domain = urlparse(link.get('url_from', '')).netloc
|
| 307 |
+
anchor_stats[anchor]['unique_domains'].add(domain)
|
| 308 |
+
except Exception:
|
| 309 |
+
pass
|
| 310 |
+
|
| 311 |
+
# Convert sets to counts and sort
|
| 312 |
+
anchor_distribution = []
|
| 313 |
+
for anchor_data in anchor_stats.values():
|
| 314 |
+
anchor_data['unique_domains'] = len(anchor_data['unique_domains'])
|
| 315 |
+
anchor_distribution.append(anchor_data)
|
| 316 |
+
|
| 317 |
+
# Sort by backlinks count
|
| 318 |
+
anchor_distribution.sort(key=lambda x: x['backlinks'], reverse=True)
|
| 319 |
+
return anchor_distribution[:15] # Top 15 anchor texts
|
| 320 |
+
|
| 321 |
+
def _calculate_monthly_changes(self, backlinks: List[Dict]) -> Dict[str, int]:
|
| 322 |
+
"""Calculate monthly backlinks changes"""
|
| 323 |
+
now = datetime.now()
|
| 324 |
+
last_month = now - timedelta(days=30)
|
| 325 |
+
|
| 326 |
+
new_links = 0
|
| 327 |
+
recent_links = 0
|
| 328 |
+
|
| 329 |
+
for link in backlinks:
|
| 330 |
+
first_seen = link.get('first_seen', '')
|
| 331 |
+
if not first_seen:
|
| 332 |
+
continue
|
| 333 |
+
|
| 334 |
+
try:
|
| 335 |
+
link_date = datetime.strptime(first_seen, '%Y-%m-%d')
|
| 336 |
+
if link_date >= last_month:
|
| 337 |
+
new_links += 1
|
| 338 |
+
if link_date >= now - timedelta(days=90): # 3 months
|
| 339 |
+
recent_links += 1
|
| 340 |
+
except Exception:
|
| 341 |
+
continue
|
| 342 |
+
|
| 343 |
+
return {
|
| 344 |
+
'new_backlinks': new_links,
|
| 345 |
+
'lost_backlinks': 0, # Can't calculate without historical data
|
| 346 |
+
'net_change': new_links,
|
| 347 |
+
'recent_backlinks_3m': recent_links
|
| 348 |
+
}
|
| 349 |
+
|
| 350 |
+
def _get_top_backlinks(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
|
| 351 |
+
"""Get top-quality backlinks"""
|
| 352 |
+
# Sort by inlink_rank (higher is better)
|
| 353 |
+
sorted_links = sorted(
|
| 354 |
+
backlinks,
|
| 355 |
+
key=lambda x: x.get('inlink_rank', 0),
|
| 356 |
+
reverse=True
|
| 357 |
+
)
|
| 358 |
+
|
| 359 |
+
top_links = []
|
| 360 |
+
for link in sorted_links[:10]:
|
| 361 |
+
top_links.append({
|
| 362 |
+
'source_url': link.get('url_from', ''),
|
| 363 |
+
'source_title': link.get('title', ''),
|
| 364 |
+
'anchor_text': link.get('anchor', ''),
|
| 365 |
+
'is_follow': not link.get('nofollow', True),
|
| 366 |
+
'authority_score': link.get('inlink_rank', 0),
|
| 367 |
+
'first_seen': link.get('first_seen', '')
|
| 368 |
+
})
|
| 369 |
+
|
| 370 |
+
return top_links
|
| 371 |
+
|
| 372 |
+
def _analyze_link_quality(self, backlinks: List[Dict], domain_metrics: Dict) -> Dict[str, Any]:
|
| 373 |
+
"""Analyze overall link quality metrics"""
|
| 374 |
+
if not backlinks:
|
| 375 |
+
return {'follow_ratio': 0, 'avg_authority': 0, 'quality_score': 0}
|
| 376 |
+
|
| 377 |
+
follow_count = sum(1 for link in backlinks if not link.get('nofollow', True))
|
| 378 |
+
total_links = len(backlinks)
|
| 379 |
+
follow_ratio = (follow_count / total_links * 100) if total_links > 0 else 0
|
| 380 |
+
|
| 381 |
+
# Average authority score
|
| 382 |
+
authority_scores = [link.get('inlink_rank', 0) for link in backlinks if link.get('inlink_rank')]
|
| 383 |
+
avg_authority = sum(authority_scores) / len(authority_scores) if authority_scores else 0
|
| 384 |
+
|
| 385 |
+
# Quality score (0-100)
|
| 386 |
+
quality_score = min(100, (
|
| 387 |
+
(follow_ratio * 0.4) + # 40% weight on follow ratio
|
| 388 |
+
(avg_authority * 2) + # 40% weight on authority (scaled)
|
| 389 |
+
(min(20, len(set(link.get('url_from', '').split('/')[2] for link in backlinks))) * 1) # 20% on domain diversity
|
| 390 |
+
))
|
| 391 |
+
|
| 392 |
+
return {
|
| 393 |
+
'follow_ratio': round(follow_ratio, 1),
|
| 394 |
+
'avg_authority': round(avg_authority, 1),
|
| 395 |
+
'quality_score': round(quality_score, 1),
|
| 396 |
+
'total_analyzed': total_links,
|
| 397 |
+
'edu_gov_count': int(domain_metrics.get('majesticRefEDU', 0)) + int(domain_metrics.get('majesticRefGov', 0))
|
| 398 |
+
}
|
| 399 |
+
|
| 400 |
+
def _get_data_sources(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> List[str]:
|
| 401 |
+
"""Track which data sources provided information"""
|
| 402 |
+
sources = []
|
| 403 |
+
|
| 404 |
+
if individual_backlinks:
|
| 405 |
+
sources.append('Best Backlink Checker API')
|
| 406 |
+
if majestic_metrics:
|
| 407 |
+
sources.append('Majestic RapidAPI')
|
| 408 |
+
if domain_metrics:
|
| 409 |
+
sources.append('Domain Metrics Check API')
|
| 410 |
+
|
| 411 |
+
return sources or ['No data sources available']
|
| 412 |
+
|
| 413 |
+
def _generate_no_api_data(self, url: str) -> ModuleResult:
|
| 414 |
+
"""Generate response when no API key is available"""
|
| 415 |
+
domain = self._extract_domain(url)
|
| 416 |
+
|
| 417 |
+
no_api_data = {
|
| 418 |
+
'total_backlinks': 0,
|
| 419 |
+
'total_ref_domains': 0,
|
| 420 |
+
'domain_rating': 0,
|
| 421 |
+
'authority_scores': {
|
| 422 |
+
'ahrefs_dr': 0,
|
| 423 |
+
'moz_da': 0,
|
| 424 |
+
'moz_pa': 0,
|
| 425 |
+
'majestic_tf': 0,
|
| 426 |
+
'majestic_cf': 0
|
| 427 |
+
},
|
| 428 |
+
'referring_domains': [],
|
| 429 |
+
'anchor_distribution': [],
|
| 430 |
+
'monthly_changes': {
|
| 431 |
+
'new_backlinks': 0,
|
| 432 |
+
'lost_backlinks': 0,
|
| 433 |
+
'net_change': 0
|
| 434 |
+
},
|
| 435 |
+
'top_backlinks': [],
|
| 436 |
+
'quality_metrics': {
|
| 437 |
+
'follow_ratio': 0,
|
| 438 |
+
'avg_authority': 0,
|
| 439 |
+
'quality_score': 0
|
| 440 |
+
},
|
| 441 |
+
'edu_links': 0,
|
| 442 |
+
'gov_links': 0,
|
| 443 |
+
'estimated_organic_traffic': 0,
|
| 444 |
+
'organic_keywords': 0,
|
| 445 |
+
'data_sources': ['No API credentials available'],
|
| 446 |
+
'last_updated': datetime.now().isoformat(),
|
| 447 |
+
'placeholder': True,
|
| 448 |
+
'message': 'Add RAPIDAPI_KEY to your .env file to unlock comprehensive backlinks analysis using Best Backlink Checker, Majestic, and Domain Metrics Check RapidAPIs.'
|
| 449 |
+
}
|
| 450 |
+
|
| 451 |
+
return ModuleResult(success=True, data=no_api_data)
|
modules/keywords.py
ADDED
|
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Keywords Rankings Module for SEO Report Generator
|
| 3 |
+
Supports Google Search Console API (primary) and SERP API (fallback)
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
import os
|
| 7 |
+
import requests
|
| 8 |
+
import json
|
| 9 |
+
from typing import Dict, Any, List, Optional
|
| 10 |
+
from urllib.parse import urlparse
|
| 11 |
+
from datetime import datetime, timedelta
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
class ModuleResult:
|
| 15 |
+
"""Standard result object for SEO modules"""
|
| 16 |
+
def __init__(self, success: bool, data: Dict[str, Any], error: str = None):
|
| 17 |
+
self.success = success
|
| 18 |
+
self.data = data
|
| 19 |
+
self.error = error
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
class KeywordsModule:
|
| 23 |
+
def __init__(self):
|
| 24 |
+
self.gsc_api_key = os.getenv('GOOGLE_SEARCH_CONSOLE_API_KEY')
|
| 25 |
+
self.serp_api_key = os.getenv('SERP_API_KEY') # SerpAPI or similar
|
| 26 |
+
self.data_for_seo_key = os.getenv('DATAFORSEO_API_KEY')
|
| 27 |
+
|
| 28 |
+
def analyze(self, url: str, quick_scan: bool = False) -> ModuleResult:
|
| 29 |
+
"""
|
| 30 |
+
Analyze keyword rankings for the given URL
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
url: Target website URL
|
| 34 |
+
quick_scan: If True, use limited data for competitor analysis
|
| 35 |
+
|
| 36 |
+
Returns:
|
| 37 |
+
ModuleResult with keywords data
|
| 38 |
+
"""
|
| 39 |
+
try:
|
| 40 |
+
domain = self._extract_domain(url)
|
| 41 |
+
|
| 42 |
+
# Try Google Search Console first (if credentials available)
|
| 43 |
+
if self.gsc_api_key:
|
| 44 |
+
result = self._analyze_with_gsc(domain, quick_scan)
|
| 45 |
+
if result.success:
|
| 46 |
+
return result
|
| 47 |
+
|
| 48 |
+
# Fallback to SERP API
|
| 49 |
+
if self.serp_api_key:
|
| 50 |
+
result = self._analyze_with_serp_api(domain, quick_scan)
|
| 51 |
+
if result.success:
|
| 52 |
+
return result
|
| 53 |
+
|
| 54 |
+
# Fallback to DataForSEO
|
| 55 |
+
if self.data_for_seo_key:
|
| 56 |
+
result = self._analyze_with_dataforseo(domain, quick_scan)
|
| 57 |
+
if result.success:
|
| 58 |
+
return result
|
| 59 |
+
|
| 60 |
+
# No API keys available - return placeholder data
|
| 61 |
+
return self._generate_placeholder_data(domain)
|
| 62 |
+
|
| 63 |
+
except Exception as e:
|
| 64 |
+
return ModuleResult(
|
| 65 |
+
success=False,
|
| 66 |
+
data={},
|
| 67 |
+
error=f"Keywords analysis failed: {str(e)}"
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
def _extract_domain(self, url: str) -> str:
|
| 71 |
+
"""Extract domain from URL"""
|
| 72 |
+
if not url.startswith(('http://', 'https://')):
|
| 73 |
+
url = 'https://' + url
|
| 74 |
+
return urlparse(url).netloc.replace('www.', '')
|
| 75 |
+
|
| 76 |
+
def _analyze_with_gsc(self, domain: str, quick_scan: bool) -> ModuleResult:
|
| 77 |
+
"""Analyze with Google Search Console API"""
|
| 78 |
+
try:
|
| 79 |
+
# Note: GSC API requires site verification and proper setup
|
| 80 |
+
# This is a simplified implementation - real GSC API needs OAuth2
|
| 81 |
+
|
| 82 |
+
# GSC API endpoint (simplified)
|
| 83 |
+
base_url = "https://searchconsole.googleapis.com/webmasters/v3/sites"
|
| 84 |
+
site_url = f"https://{domain}/"
|
| 85 |
+
|
| 86 |
+
# Get search analytics data
|
| 87 |
+
analytics_url = f"{base_url}/{site_url}/searchAnalytics/query"
|
| 88 |
+
|
| 89 |
+
# Date range (last 90 days)
|
| 90 |
+
end_date = datetime.now().date()
|
| 91 |
+
start_date = end_date - timedelta(days=90)
|
| 92 |
+
|
| 93 |
+
payload = {
|
| 94 |
+
"startDate": start_date.isoformat(),
|
| 95 |
+
"endDate": end_date.isoformat(),
|
| 96 |
+
"dimensions": ["query", "page"],
|
| 97 |
+
"rowLimit": 1000 if not quick_scan else 100
|
| 98 |
+
}
|
| 99 |
+
|
| 100 |
+
headers = {
|
| 101 |
+
"Authorization": f"Bearer {self.gsc_api_key}",
|
| 102 |
+
"Content-Type": "application/json"
|
| 103 |
+
}
|
| 104 |
+
|
| 105 |
+
response = requests.post(analytics_url, json=payload, headers=headers, timeout=30)
|
| 106 |
+
|
| 107 |
+
if response.status_code != 200:
|
| 108 |
+
raise Exception(f"GSC API error: {response.status_code}")
|
| 109 |
+
|
| 110 |
+
data = response.json()
|
| 111 |
+
return self._process_gsc_data(data, domain)
|
| 112 |
+
|
| 113 |
+
except Exception as e:
|
| 114 |
+
return ModuleResult(success=False, data={}, error=str(e))
|
| 115 |
+
|
| 116 |
+
def _analyze_with_serp_api(self, domain: str, quick_scan: bool) -> ModuleResult:
|
| 117 |
+
"""Analyze with SERP API (SerpAPI, etc.)"""
|
| 118 |
+
try:
|
| 119 |
+
# Using SerpAPI as example
|
| 120 |
+
url = "https://serpapi.com/search"
|
| 121 |
+
|
| 122 |
+
params = {
|
| 123 |
+
"engine": "google",
|
| 124 |
+
"q": f"site:{domain}",
|
| 125 |
+
"api_key": self.serp_api_key,
|
| 126 |
+
"num": 100 if not quick_scan else 20
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
response = requests.get(url, params=params, timeout=30)
|
| 130 |
+
|
| 131 |
+
if response.status_code != 200:
|
| 132 |
+
raise Exception(f"SERP API error: {response.status_code}")
|
| 133 |
+
|
| 134 |
+
data = response.json()
|
| 135 |
+
return self._process_serp_data(data, domain)
|
| 136 |
+
|
| 137 |
+
except Exception as e:
|
| 138 |
+
return ModuleResult(success=False, data={}, error=str(e))
|
| 139 |
+
|
| 140 |
+
def _analyze_with_dataforseo(self, domain: str, quick_scan: bool) -> ModuleResult:
|
| 141 |
+
"""Analyze with DataForSEO API"""
|
| 142 |
+
try:
|
| 143 |
+
# DataForSEO implementation
|
| 144 |
+
auth = (self.data_for_seo_key, os.getenv('DATAFORSEO_API_PASSWORD', ''))
|
| 145 |
+
|
| 146 |
+
# Get domain keywords
|
| 147 |
+
url = "https://api.dataforseo.com/v3/dataforseo_labs/google/ranked_keywords/live"
|
| 148 |
+
|
| 149 |
+
payload = {
|
| 150 |
+
"target": domain,
|
| 151 |
+
"limit": 1000 if not quick_scan else 100,
|
| 152 |
+
"offset": 0,
|
| 153 |
+
"filters": [
|
| 154 |
+
["metrics.organic.pos", "<=", 100]
|
| 155 |
+
]
|
| 156 |
+
}
|
| 157 |
+
|
| 158 |
+
response = requests.post(url, json=[payload], auth=auth, timeout=60)
|
| 159 |
+
|
| 160 |
+
if response.status_code != 200:
|
| 161 |
+
raise Exception(f"DataForSEO API error: {response.status_code}")
|
| 162 |
+
|
| 163 |
+
data = response.json()
|
| 164 |
+
return self._process_dataforseo_data(data, domain)
|
| 165 |
+
|
| 166 |
+
except Exception as e:
|
| 167 |
+
return ModuleResult(success=False, data={}, error=str(e))
|
| 168 |
+
|
| 169 |
+
def _process_gsc_data(self, data: Dict, domain: str) -> ModuleResult:
|
| 170 |
+
"""Process Google Search Console data"""
|
| 171 |
+
if 'rows' not in data:
|
| 172 |
+
return ModuleResult(success=False, data={}, error="No GSC data available")
|
| 173 |
+
|
| 174 |
+
rows = data['rows']
|
| 175 |
+
total_keywords = len(rows)
|
| 176 |
+
|
| 177 |
+
# Position distribution
|
| 178 |
+
top_3 = sum(1 for row in rows if row.get('position', 100) <= 3)
|
| 179 |
+
top_10 = sum(1 for row in rows if row.get('position', 100) <= 10)
|
| 180 |
+
top_50 = sum(1 for row in rows if row.get('position', 100) <= 50)
|
| 181 |
+
|
| 182 |
+
# Best and worst performing
|
| 183 |
+
sorted_by_position = sorted(rows, key=lambda x: x.get('position', 100))
|
| 184 |
+
best_keywords = sorted_by_position[:10]
|
| 185 |
+
worst_keywords = sorted_by_position[-10:]
|
| 186 |
+
|
| 187 |
+
# High opportunity keywords (high impressions, low clicks)
|
| 188 |
+
opportunity_keywords = []
|
| 189 |
+
for row in rows:
|
| 190 |
+
impressions = row.get('impressions', 0)
|
| 191 |
+
clicks = row.get('clicks', 0)
|
| 192 |
+
ctr = (clicks / impressions * 100) if impressions > 0 else 0
|
| 193 |
+
|
| 194 |
+
if impressions > 100 and ctr < 2 and row.get('position', 100) > 10:
|
| 195 |
+
opportunity_keywords.append({
|
| 196 |
+
'keyword': row.get('keys', [''])[0],
|
| 197 |
+
'position': row.get('position', 0),
|
| 198 |
+
'impressions': impressions,
|
| 199 |
+
'clicks': clicks,
|
| 200 |
+
'ctr': round(ctr, 2)
|
| 201 |
+
})
|
| 202 |
+
|
| 203 |
+
opportunity_keywords = sorted(opportunity_keywords, key=lambda x: x['impressions'], reverse=True)[:10]
|
| 204 |
+
|
| 205 |
+
keywords_data = {
|
| 206 |
+
'total_keywords': total_keywords,
|
| 207 |
+
'position_distribution': {
|
| 208 |
+
'top_3': top_3,
|
| 209 |
+
'top_10': top_10,
|
| 210 |
+
'top_50': top_50,
|
| 211 |
+
'beyond_50': total_keywords - top_50
|
| 212 |
+
},
|
| 213 |
+
'best_keywords': [
|
| 214 |
+
{
|
| 215 |
+
'keyword': row.get('keys', [''])[0],
|
| 216 |
+
'position': row.get('position', 0),
|
| 217 |
+
'clicks': row.get('clicks', 0),
|
| 218 |
+
'impressions': row.get('impressions', 0)
|
| 219 |
+
} for row in best_keywords
|
| 220 |
+
],
|
| 221 |
+
'worst_keywords': [
|
| 222 |
+
{
|
| 223 |
+
'keyword': row.get('keys', [''])[0],
|
| 224 |
+
'position': row.get('position', 0),
|
| 225 |
+
'clicks': row.get('clicks', 0),
|
| 226 |
+
'impressions': row.get('impressions', 0)
|
| 227 |
+
} for row in worst_keywords
|
| 228 |
+
],
|
| 229 |
+
'opportunity_keywords': opportunity_keywords,
|
| 230 |
+
'data_source': 'Google Search Console',
|
| 231 |
+
'last_updated': datetime.now().isoformat()
|
| 232 |
+
}
|
| 233 |
+
|
| 234 |
+
return ModuleResult(success=True, data=keywords_data)
|
| 235 |
+
|
| 236 |
+
def _process_serp_data(self, data: Dict, domain: str) -> ModuleResult:
|
| 237 |
+
"""Process SERP API data"""
|
| 238 |
+
# Simplified SERP data processing
|
| 239 |
+
organic_results = data.get('organic_results', [])
|
| 240 |
+
|
| 241 |
+
keywords_data = {
|
| 242 |
+
'total_keywords': len(organic_results),
|
| 243 |
+
'position_distribution': {
|
| 244 |
+
'top_3': len([r for r in organic_results if r.get('position', 100) <= 3]),
|
| 245 |
+
'top_10': len([r for r in organic_results if r.get('position', 100) <= 10]),
|
| 246 |
+
'top_50': len([r for r in organic_results if r.get('position', 100) <= 50]),
|
| 247 |
+
'beyond_50': len([r for r in organic_results if r.get('position', 100) > 50])
|
| 248 |
+
},
|
| 249 |
+
'best_keywords': [
|
| 250 |
+
{
|
| 251 |
+
'keyword': r.get('title', ''),
|
| 252 |
+
'position': r.get('position', 0),
|
| 253 |
+
'url': r.get('link', '')
|
| 254 |
+
} for r in organic_results[:10]
|
| 255 |
+
],
|
| 256 |
+
'data_source': 'SERP API',
|
| 257 |
+
'last_updated': datetime.now().isoformat()
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
return ModuleResult(success=True, data=keywords_data)
|
| 261 |
+
|
| 262 |
+
def _process_dataforseo_data(self, data: Dict, domain: str) -> ModuleResult:
|
| 263 |
+
"""Process DataForSEO data"""
|
| 264 |
+
if not data.get('tasks') or not data['tasks'][0].get('result'):
|
| 265 |
+
return ModuleResult(success=False, data={}, error="No DataForSEO data available")
|
| 266 |
+
|
| 267 |
+
results = data['tasks'][0]['result']
|
| 268 |
+
total_keywords = len(results)
|
| 269 |
+
|
| 270 |
+
# Position distribution
|
| 271 |
+
top_3 = sum(1 for r in results if r.get('metrics', {}).get('organic', {}).get('pos', 100) <= 3)
|
| 272 |
+
top_10 = sum(1 for r in results if r.get('metrics', {}).get('organic', {}).get('pos', 100) <= 10)
|
| 273 |
+
top_50 = sum(1 for r in results if r.get('metrics', {}).get('organic', {}).get('pos', 100) <= 50)
|
| 274 |
+
|
| 275 |
+
keywords_data = {
|
| 276 |
+
'total_keywords': total_keywords,
|
| 277 |
+
'position_distribution': {
|
| 278 |
+
'top_3': top_3,
|
| 279 |
+
'top_10': top_10,
|
| 280 |
+
'top_50': top_50,
|
| 281 |
+
'beyond_50': total_keywords - top_50
|
| 282 |
+
},
|
| 283 |
+
'best_keywords': [
|
| 284 |
+
{
|
| 285 |
+
'keyword': r.get('keyword', ''),
|
| 286 |
+
'position': r.get('metrics', {}).get('organic', {}).get('pos', 0),
|
| 287 |
+
'search_volume': r.get('keyword_info', {}).get('search_volume', 0)
|
| 288 |
+
} for r in sorted(results, key=lambda x: x.get('metrics', {}).get('organic', {}).get('pos', 100))[:10]
|
| 289 |
+
],
|
| 290 |
+
'data_source': 'DataForSEO',
|
| 291 |
+
'last_updated': datetime.now().isoformat()
|
| 292 |
+
}
|
| 293 |
+
|
| 294 |
+
return ModuleResult(success=True, data=keywords_data)
|
| 295 |
+
|
| 296 |
+
def _generate_placeholder_data(self, domain: str) -> ModuleResult:
|
| 297 |
+
"""Generate placeholder data when no API keys are available"""
|
| 298 |
+
keywords_data = {
|
| 299 |
+
'total_keywords': 0,
|
| 300 |
+
'position_distribution': {
|
| 301 |
+
'top_3': 0,
|
| 302 |
+
'top_10': 0,
|
| 303 |
+
'top_50': 0,
|
| 304 |
+
'beyond_50': 0
|
| 305 |
+
},
|
| 306 |
+
'best_keywords': [],
|
| 307 |
+
'worst_keywords': [],
|
| 308 |
+
'opportunity_keywords': [],
|
| 309 |
+
'data_source': 'No API credentials',
|
| 310 |
+
'last_updated': datetime.now().isoformat(),
|
| 311 |
+
'placeholder': True,
|
| 312 |
+
'message': 'Connect Google Search Console or SERP API to unlock keyword data'
|
| 313 |
+
}
|
| 314 |
+
|
| 315 |
+
return ModuleResult(success=True, data=keywords_data)
|
report_generator.py
CHANGED
|
@@ -12,16 +12,17 @@ class ReportGenerator:
|
|
| 12 |
|
| 13 |
def generate_html_report(self, url: str, technical_data: Dict[str, Any],
|
| 14 |
content_data: Dict[str, Any], competitor_data: List[Dict] = None,
|
| 15 |
-
|
|
|
|
| 16 |
"""Generate complete HTML SEO report"""
|
| 17 |
|
| 18 |
# Generate charts
|
| 19 |
charts_html = ""
|
| 20 |
if include_charts:
|
| 21 |
-
charts_html = self._generate_charts(technical_data, content_data, competitor_data)
|
| 22 |
|
| 23 |
-
# Generate executive summary
|
| 24 |
-
executive_summary = self._generate_executive_summary(technical_data, content_data)
|
| 25 |
|
| 26 |
# Generate technical SEO section
|
| 27 |
technical_section = self._generate_technical_section(technical_data)
|
|
@@ -29,6 +30,15 @@ class ReportGenerator:
|
|
| 29 |
# Generate content audit section
|
| 30 |
content_section = self._generate_content_section(content_data)
|
| 31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
# Generate competitor section
|
| 33 |
competitor_section = ""
|
| 34 |
if competitor_data:
|
|
@@ -48,15 +58,19 @@ class ReportGenerator:
|
|
| 48 |
executive_summary=executive_summary,
|
| 49 |
technical_section=technical_section,
|
| 50 |
content_section=content_section,
|
|
|
|
|
|
|
| 51 |
competitor_section=competitor_section,
|
| 52 |
placeholder_sections=placeholder_sections,
|
| 53 |
-
recommendations=recommendations
|
|
|
|
| 54 |
)
|
| 55 |
|
| 56 |
return report_html
|
| 57 |
|
| 58 |
def _generate_charts(self, technical_data: Dict[str, Any], content_data: Dict[str, Any],
|
| 59 |
-
competitor_data: List[Dict] = None
|
|
|
|
| 60 |
"""Generate interactive charts using Plotly"""
|
| 61 |
charts_html = ""
|
| 62 |
|
|
@@ -204,7 +218,8 @@ class ReportGenerator:
|
|
| 204 |
|
| 205 |
return charts_html
|
| 206 |
|
| 207 |
-
def _generate_executive_summary(self, technical_data: Dict[str, Any], content_data: Dict[str, Any]
|
|
|
|
| 208 |
"""Generate executive summary section"""
|
| 209 |
# Calculate overall health score
|
| 210 |
mobile_perf = technical_data.get('mobile', {}).get('performance_score', 0)
|
|
@@ -650,6 +665,219 @@ class ReportGenerator:
|
|
| 650 |
</div>
|
| 651 |
"""
|
| 652 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 653 |
def _get_report_template(self) -> str:
|
| 654 |
"""Get the HTML template for the report"""
|
| 655 |
return """
|
|
@@ -1084,6 +1312,16 @@ class ReportGenerator:
|
|
| 1084 |
{content_section}
|
| 1085 |
</div>
|
| 1086 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1087 |
{competitor_section}
|
| 1088 |
|
| 1089 |
<div class="section">
|
|
@@ -1094,6 +1332,8 @@ class ReportGenerator:
|
|
| 1094 |
<div class="section">
|
| 1095 |
{recommendations}
|
| 1096 |
</div>
|
|
|
|
|
|
|
| 1097 |
</div>
|
| 1098 |
</body>
|
| 1099 |
</html>
|
|
|
|
| 12 |
|
| 13 |
def generate_html_report(self, url: str, technical_data: Dict[str, Any],
|
| 14 |
content_data: Dict[str, Any], competitor_data: List[Dict] = None,
|
| 15 |
+
keywords_data: Dict[str, Any] = None, backlinks_data: Dict[str, Any] = None,
|
| 16 |
+
llm_recommendations: Dict[str, Any] = None, include_charts: bool = True) -> str:
|
| 17 |
"""Generate complete HTML SEO report"""
|
| 18 |
|
| 19 |
# Generate charts
|
| 20 |
charts_html = ""
|
| 21 |
if include_charts:
|
| 22 |
+
charts_html = self._generate_charts(technical_data, content_data, competitor_data, keywords_data, backlinks_data)
|
| 23 |
|
| 24 |
+
# Generate executive summary (now includes LLM insights)
|
| 25 |
+
executive_summary = self._generate_executive_summary(technical_data, content_data, llm_recommendations)
|
| 26 |
|
| 27 |
# Generate technical SEO section
|
| 28 |
technical_section = self._generate_technical_section(technical_data)
|
|
|
|
| 30 |
# Generate content audit section
|
| 31 |
content_section = self._generate_content_section(content_data)
|
| 32 |
|
| 33 |
+
# Generate keywords section
|
| 34 |
+
keywords_section = self._generate_keywords_section(keywords_data) if keywords_data else ""
|
| 35 |
+
|
| 36 |
+
# Generate backlinks section
|
| 37 |
+
backlinks_section = self._generate_backlinks_section(backlinks_data) if backlinks_data else ""
|
| 38 |
+
|
| 39 |
+
# Generate LLM recommendations section
|
| 40 |
+
recommendations_section = self._generate_recommendations_section(llm_recommendations) if llm_recommendations else ""
|
| 41 |
+
|
| 42 |
# Generate competitor section
|
| 43 |
competitor_section = ""
|
| 44 |
if competitor_data:
|
|
|
|
| 58 |
executive_summary=executive_summary,
|
| 59 |
technical_section=technical_section,
|
| 60 |
content_section=content_section,
|
| 61 |
+
keywords_section=keywords_section,
|
| 62 |
+
backlinks_section=backlinks_section,
|
| 63 |
competitor_section=competitor_section,
|
| 64 |
placeholder_sections=placeholder_sections,
|
| 65 |
+
recommendations=recommendations,
|
| 66 |
+
llm_recommendations=recommendations_section
|
| 67 |
)
|
| 68 |
|
| 69 |
return report_html
|
| 70 |
|
| 71 |
def _generate_charts(self, technical_data: Dict[str, Any], content_data: Dict[str, Any],
|
| 72 |
+
competitor_data: List[Dict] = None, keywords_data: Dict[str, Any] = None,
|
| 73 |
+
backlinks_data: Dict[str, Any] = None) -> str:
|
| 74 |
"""Generate interactive charts using Plotly"""
|
| 75 |
charts_html = ""
|
| 76 |
|
|
|
|
| 218 |
|
| 219 |
return charts_html
|
| 220 |
|
| 221 |
+
def _generate_executive_summary(self, technical_data: Dict[str, Any], content_data: Dict[str, Any],
|
| 222 |
+
llm_recommendations: Dict[str, Any] = None) -> str:
|
| 223 |
"""Generate executive summary section"""
|
| 224 |
# Calculate overall health score
|
| 225 |
mobile_perf = technical_data.get('mobile', {}).get('performance_score', 0)
|
|
|
|
| 665 |
</div>
|
| 666 |
"""
|
| 667 |
|
| 668 |
+
def _generate_keywords_section(self, keywords_data: Dict[str, Any]) -> str:
|
| 669 |
+
"""Generate keywords analysis section"""
|
| 670 |
+
if keywords_data.get('placeholder'):
|
| 671 |
+
return f"""
|
| 672 |
+
<div class="placeholder-section">
|
| 673 |
+
<h3>π Keyword Rankings</h3>
|
| 674 |
+
<div class="placeholder-content">
|
| 675 |
+
<p><strong>No keyword data available.</strong></p>
|
| 676 |
+
<p>{keywords_data.get('message', 'Connect Google Search Console or SERP API to unlock keyword insights.')}</p>
|
| 677 |
+
</div>
|
| 678 |
+
</div>
|
| 679 |
+
"""
|
| 680 |
+
|
| 681 |
+
total = keywords_data.get('total_keywords', 0)
|
| 682 |
+
pos_dist = keywords_data.get('position_distribution', {})
|
| 683 |
+
best_keywords = keywords_data.get('best_keywords', [])
|
| 684 |
+
opportunity_keywords = keywords_data.get('opportunity_keywords', [])
|
| 685 |
+
|
| 686 |
+
# Create position distribution chart
|
| 687 |
+
pos_chart = ""
|
| 688 |
+
if pos_dist:
|
| 689 |
+
import plotly.graph_objects as go
|
| 690 |
+
from plotly.offline import plot
|
| 691 |
+
|
| 692 |
+
labels = ['Top 3', 'Top 10', 'Top 50', 'Beyond 50']
|
| 693 |
+
values = [
|
| 694 |
+
pos_dist.get('top_3', 0),
|
| 695 |
+
pos_dist.get('top_10', 0) - pos_dist.get('top_3', 0),
|
| 696 |
+
pos_dist.get('top_50', 0) - pos_dist.get('top_10', 0),
|
| 697 |
+
pos_dist.get('beyond_50', 0)
|
| 698 |
+
]
|
| 699 |
+
|
| 700 |
+
fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=0.4)])
|
| 701 |
+
fig.update_layout(title="Keyword Position Distribution", height=400)
|
| 702 |
+
pos_chart = plot(fig, include_plotlyjs=False, output_type='div')
|
| 703 |
+
|
| 704 |
+
best_keywords_html = ""
|
| 705 |
+
if best_keywords:
|
| 706 |
+
best_keywords_html = "<h4>π Top Performing Keywords</h4><table class='data-table'><tr><th>Keyword</th><th>Position</th><th>Clicks</th><th>Impressions</th></tr>"
|
| 707 |
+
for kw in best_keywords[:10]:
|
| 708 |
+
best_keywords_html += f"""
|
| 709 |
+
<tr>
|
| 710 |
+
<td>{kw.get('keyword', '')}</td>
|
| 711 |
+
<td>{kw.get('position', 0)}</td>
|
| 712 |
+
<td>{kw.get('clicks', 0)}</td>
|
| 713 |
+
<td>{kw.get('impressions', 0)}</td>
|
| 714 |
+
</tr>
|
| 715 |
+
"""
|
| 716 |
+
best_keywords_html += "</table>"
|
| 717 |
+
|
| 718 |
+
opportunity_html = ""
|
| 719 |
+
if opportunity_keywords:
|
| 720 |
+
opportunity_html = "<h4>π Opportunity Keywords</h4><table class='data-table'><tr><th>Keyword</th><th>Position</th><th>Impressions</th><th>CTR</th></tr>"
|
| 721 |
+
for kw in opportunity_keywords[:10]:
|
| 722 |
+
opportunity_html += f"""
|
| 723 |
+
<tr>
|
| 724 |
+
<td>{kw.get('keyword', '')}</td>
|
| 725 |
+
<td>{kw.get('position', 0)}</td>
|
| 726 |
+
<td>{kw.get('impressions', 0)}</td>
|
| 727 |
+
<td>{kw.get('ctr', 0)}%</td>
|
| 728 |
+
</tr>
|
| 729 |
+
"""
|
| 730 |
+
opportunity_html += "</table>"
|
| 731 |
+
|
| 732 |
+
return f"""
|
| 733 |
+
<div class="card">
|
| 734 |
+
<h3>π Keyword Rankings Analysis</h3>
|
| 735 |
+
<div class="metrics-grid">
|
| 736 |
+
<div class="metric-card">
|
| 737 |
+
<div class="metric-value">{total}</div>
|
| 738 |
+
<div class="metric-label">Total Keywords</div>
|
| 739 |
+
</div>
|
| 740 |
+
<div class="metric-card">
|
| 741 |
+
<div class="metric-value">{pos_dist.get('top_10', 0)}</div>
|
| 742 |
+
<div class="metric-label">Top 10 Rankings</div>
|
| 743 |
+
</div>
|
| 744 |
+
<div class="metric-card">
|
| 745 |
+
<div class="metric-value">{len(opportunity_keywords)}</div>
|
| 746 |
+
<div class="metric-label">Opportunities</div>
|
| 747 |
+
</div>
|
| 748 |
+
<div class="metric-card">
|
| 749 |
+
<div class="metric-value">{keywords_data.get('data_source', 'Unknown')}</div>
|
| 750 |
+
<div class="metric-label">Data Source</div>
|
| 751 |
+
</div>
|
| 752 |
+
</div>
|
| 753 |
+
{pos_chart}
|
| 754 |
+
{best_keywords_html}
|
| 755 |
+
{opportunity_html}
|
| 756 |
+
</div>
|
| 757 |
+
"""
|
| 758 |
+
|
| 759 |
+
def _generate_backlinks_section(self, backlinks_data: Dict[str, Any]) -> str:
|
| 760 |
+
"""Generate backlinks analysis section"""
|
| 761 |
+
if backlinks_data.get('placeholder'):
|
| 762 |
+
return f"""
|
| 763 |
+
<div class="placeholder-section">
|
| 764 |
+
<h3>π Backlink Profile</h3>
|
| 765 |
+
<div class="placeholder-content">
|
| 766 |
+
<p><strong>No backlink data available.</strong></p>
|
| 767 |
+
<p>{backlinks_data.get('message', 'Add RapidAPI key to unlock comprehensive backlink insights.')}</p>
|
| 768 |
+
</div>
|
| 769 |
+
</div>
|
| 770 |
+
"""
|
| 771 |
+
|
| 772 |
+
total_backlinks = backlinks_data.get('total_backlinks', 0)
|
| 773 |
+
total_ref_domains = backlinks_data.get('total_ref_domains', 0)
|
| 774 |
+
domain_rating = backlinks_data.get('domain_rating', 0)
|
| 775 |
+
monthly_changes = backlinks_data.get('monthly_changes', {})
|
| 776 |
+
referring_domains = backlinks_data.get('referring_domains', [])
|
| 777 |
+
anchor_distribution = backlinks_data.get('anchor_distribution', [])
|
| 778 |
+
|
| 779 |
+
# Create anchor text distribution chart
|
| 780 |
+
anchor_chart = ""
|
| 781 |
+
if anchor_distribution:
|
| 782 |
+
import plotly.graph_objects as go
|
| 783 |
+
from plotly.offline import plot
|
| 784 |
+
|
| 785 |
+
anchors = [a.get('anchor_text', '')[:30] for a in anchor_distribution[:10]]
|
| 786 |
+
counts = [a.get('backlinks', 0) for a in anchor_distribution[:10]]
|
| 787 |
+
|
| 788 |
+
fig = go.Figure(data=[go.Bar(x=anchors, y=counts)])
|
| 789 |
+
fig.update_layout(title="Top Anchor Text Distribution", height=400, xaxis={'tickangle': 45})
|
| 790 |
+
anchor_chart = plot(fig, include_plotlyjs=False, output_type='div')
|
| 791 |
+
|
| 792 |
+
ref_domains_html = ""
|
| 793 |
+
if referring_domains:
|
| 794 |
+
ref_domains_html = "<h4>π’ Top Referring Domains</h4><table class='data-table'><tr><th>Domain</th><th>Domain Rating</th><th>Backlinks</th><th>First Seen</th></tr>"
|
| 795 |
+
for rd in referring_domains[:10]:
|
| 796 |
+
ref_domains_html += f"""
|
| 797 |
+
<tr>
|
| 798 |
+
<td>{rd.get('domain', '')}</td>
|
| 799 |
+
<td>{rd.get('domain_rating', 0)}</td>
|
| 800 |
+
<td>{rd.get('backlinks', 0)}</td>
|
| 801 |
+
<td>{rd.get('first_seen', 'N/A')}</td>
|
| 802 |
+
</tr>
|
| 803 |
+
"""
|
| 804 |
+
ref_domains_html += "</table>"
|
| 805 |
+
|
| 806 |
+
return f"""
|
| 807 |
+
<div class="card">
|
| 808 |
+
<h3>π Backlink Profile Analysis</h3>
|
| 809 |
+
<div class="metrics-grid">
|
| 810 |
+
<div class="metric-card">
|
| 811 |
+
<div class="metric-value">{total_backlinks:,}</div>
|
| 812 |
+
<div class="metric-label">Total Backlinks</div>
|
| 813 |
+
</div>
|
| 814 |
+
<div class="metric-card">
|
| 815 |
+
<div class="metric-value">{total_ref_domains:,}</div>
|
| 816 |
+
<div class="metric-label">Referring Domains</div>
|
| 817 |
+
</div>
|
| 818 |
+
<div class="metric-card">
|
| 819 |
+
<div class="metric-value">{domain_rating}</div>
|
| 820 |
+
<div class="metric-label">Domain Rating</div>
|
| 821 |
+
</div>
|
| 822 |
+
<div class="metric-card">
|
| 823 |
+
<div class="metric-value">{monthly_changes.get('net_change', 0):+d}</div>
|
| 824 |
+
<div class="metric-label">Monthly Change</div>
|
| 825 |
+
</div>
|
| 826 |
+
</div>
|
| 827 |
+
{anchor_chart}
|
| 828 |
+
{ref_domains_html}
|
| 829 |
+
</div>
|
| 830 |
+
"""
|
| 831 |
+
|
| 832 |
+
def _generate_recommendations_section(self, llm_recommendations: Dict[str, Any]) -> str:
|
| 833 |
+
"""Generate LLM-powered recommendations section"""
|
| 834 |
+
if not llm_recommendations:
|
| 835 |
+
return ""
|
| 836 |
+
|
| 837 |
+
recommendations = llm_recommendations.get('recommendations', [])
|
| 838 |
+
executive_insights = llm_recommendations.get('executive_insights', [])
|
| 839 |
+
priority_actions = llm_recommendations.get('priority_actions', [])
|
| 840 |
+
|
| 841 |
+
insights_html = ""
|
| 842 |
+
if executive_insights:
|
| 843 |
+
insights_html = "<div class='executive-insights'><h4>π― Executive Insights</h4><ul>"
|
| 844 |
+
for insight in executive_insights:
|
| 845 |
+
insights_html += f"<li>{insight}</li>"
|
| 846 |
+
insights_html += "</ul></div>"
|
| 847 |
+
|
| 848 |
+
priority_html = ""
|
| 849 |
+
if priority_actions:
|
| 850 |
+
priority_html = "<div class='priority-actions'><h4>π₯ Priority Actions</h4>"
|
| 851 |
+
for i, action in enumerate(priority_actions[:3], 1):
|
| 852 |
+
priority_html += f"""
|
| 853 |
+
<div class="priority-action">
|
| 854 |
+
<div class="action-number">{i}</div>
|
| 855 |
+
<div class="action-content">
|
| 856 |
+
<div class="action-title">{action.get('title', '')}</div>
|
| 857 |
+
<div class="action-description">{action.get('description', '')}</div>
|
| 858 |
+
<span class="action-priority">{action.get('priority', 'MEDIUM')}</span>
|
| 859 |
+
</div>
|
| 860 |
+
</div>
|
| 861 |
+
"""
|
| 862 |
+
priority_html += "</div>"
|
| 863 |
+
|
| 864 |
+
recommendations_html = ""
|
| 865 |
+
if recommendations:
|
| 866 |
+
recommendations_html = "<div class='llm-recommendations'><h4>π€ AI-Generated Recommendations</h4><ul>"
|
| 867 |
+
for rec in recommendations:
|
| 868 |
+
recommendations_html += f"<li>{rec}</li>"
|
| 869 |
+
recommendations_html += "</ul></div>"
|
| 870 |
+
|
| 871 |
+
return f"""
|
| 872 |
+
<div class="card">
|
| 873 |
+
<h3>π§ Smart Recommendations</h3>
|
| 874 |
+
<p class="data-source">Generated by {llm_recommendations.get('data_source', 'AI Analysis')}</p>
|
| 875 |
+
{insights_html}
|
| 876 |
+
{priority_html}
|
| 877 |
+
{recommendations_html}
|
| 878 |
+
</div>
|
| 879 |
+
"""
|
| 880 |
+
|
| 881 |
def _get_report_template(self) -> str:
|
| 882 |
"""Get the HTML template for the report"""
|
| 883 |
return """
|
|
|
|
| 1312 |
{content_section}
|
| 1313 |
</div>
|
| 1314 |
|
| 1315 |
+
<div class="section">
|
| 1316 |
+
<h2>π Keywords Analysis</h2>
|
| 1317 |
+
{keywords_section}
|
| 1318 |
+
</div>
|
| 1319 |
+
|
| 1320 |
+
<div class="section">
|
| 1321 |
+
<h2>π Backlinks Profile</h2>
|
| 1322 |
+
{backlinks_section}
|
| 1323 |
+
</div>
|
| 1324 |
+
|
| 1325 |
{competitor_section}
|
| 1326 |
|
| 1327 |
<div class="section">
|
|
|
|
| 1332 |
<div class="section">
|
| 1333 |
{recommendations}
|
| 1334 |
</div>
|
| 1335 |
+
|
| 1336 |
+
{llm_recommendations}
|
| 1337 |
</div>
|
| 1338 |
</body>
|
| 1339 |
</html>
|
requirements.txt
CHANGED
|
@@ -1,5 +1,9 @@
|
|
| 1 |
# Thinkly Labs SEO - Dependencies
|
|
|
|
|
|
|
| 2 |
flask
|
|
|
|
|
|
|
| 3 |
requests
|
| 4 |
beautifulsoup4
|
| 5 |
pandas
|
|
@@ -8,4 +12,14 @@ jinja2
|
|
| 8 |
validators
|
| 9 |
urllib3
|
| 10 |
lxml
|
| 11 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# Thinkly Labs SEO - Dependencies
|
| 2 |
+
|
| 3 |
+
# Core Framework
|
| 4 |
flask
|
| 5 |
+
|
| 6 |
+
# Data Processing & Analysis
|
| 7 |
requests
|
| 8 |
beautifulsoup4
|
| 9 |
pandas
|
|
|
|
| 12 |
validators
|
| 13 |
urllib3
|
| 14 |
lxml
|
| 15 |
+
|
| 16 |
+
# PDF Generation
|
| 17 |
+
reportlab
|
| 18 |
+
|
| 19 |
+
# AI/LLM Integration
|
| 20 |
+
groq
|
| 21 |
+
python-dotenv
|
| 22 |
+
|
| 23 |
+
# API Integrations (Optional - set via environment variables)
|
| 24 |
+
# google-api-python-client # For Google Search Console
|
| 25 |
+
# oauth2client # For GSC authentication
|
simple_pdf_generator.py
CHANGED
|
@@ -4,6 +4,7 @@ or browser-based PDF conversion instructions
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import io
|
|
|
|
| 7 |
from typing import Dict, Any
|
| 8 |
|
| 9 |
class SimplePDFGenerator:
|
|
|
|
| 4 |
"""
|
| 5 |
|
| 6 |
import io
|
| 7 |
+
import re
|
| 8 |
from typing import Dict, Any
|
| 9 |
|
| 10 |
class SimplePDFGenerator:
|