yashgori20 commited on
Commit
5f0cfa7
Β·
1 Parent(s): ee1f542
.gitignore CHANGED
@@ -97,6 +97,8 @@ celerybeat.pid
97
 
98
  # Environments
99
  .env
 
 
100
  .venv
101
  env/
102
  venv/
 
97
 
98
  # Environments
99
  .env
100
+ .env.local
101
+ .env.production
102
  .venv
103
  env/
104
  venv/
README.md CHANGED
@@ -31,7 +31,7 @@ Professional SEO analysis and reporting tool that creates comprehensive SEO audi
31
 
32
  ### 🚧 Planned for Future Versions
33
  - Keyword Rankings (Google Search Console integration)
34
- - Backlink Profile Analysis (Ahrefs/SEMrush APIs)
35
  - Advanced Competitor Analysis
36
  - GA4/Conversion Tracking Integration
37
 
 
31
 
32
  ### 🚧 Planned for Future Versions
33
  - Keyword Rankings (Google Search Console integration)
34
+ - Backlink Profile Analysis (RapidAPI)
35
  - Advanced Competitor Analysis
36
  - GA4/Conversion Tracking Integration
37
 
app.py CHANGED
@@ -8,8 +8,11 @@ import uuid
8
  # Import SEO modules
9
  from modules.technical_seo import TechnicalSEOModule
10
  from modules.content_audit import ContentAuditModule
 
 
11
  from report_generator import ReportGenerator
12
  from simple_pdf_generator import SimplePDFGenerator
 
13
 
14
  app = Flask(__name__, static_folder='static')
15
  app.secret_key = 'seo_report_generator_2024'
@@ -17,8 +20,11 @@ app.secret_key = 'seo_report_generator_2024'
17
  # Initialize modules
18
  technical_module = TechnicalSEOModule()
19
  content_module = ContentAuditModule()
 
 
20
  report_gen = ReportGenerator()
21
  pdf_gen = SimplePDFGenerator()
 
22
 
23
  # Store for generated reports (in production, use database)
24
  reports_store = {}
@@ -56,15 +62,59 @@ def generate_report():
56
  # Content Audit
57
  content_data = content_module.analyze(url)
58
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  # Competitor Analysis
60
  competitor_data = []
61
  for comp_url in competitor_list:
62
  comp_technical = technical_module.analyze(comp_url)
63
  comp_content = content_module.analyze(comp_url, quick_scan=True)
 
 
 
64
  competitor_data.append({
65
  'url': comp_url,
66
  'technical': comp_technical,
67
- 'content': comp_content
 
 
68
  })
69
 
70
  # Generate HTML report
@@ -73,6 +123,9 @@ def generate_report():
73
  technical_data=technical_data,
74
  content_data=content_data,
75
  competitor_data=competitor_data,
 
 
 
76
  include_charts=True
77
  )
78
 
@@ -82,6 +135,9 @@ def generate_report():
82
  'html': report_html,
83
  'technical_data': technical_data,
84
  'content_data': content_data,
 
 
 
85
  'competitor_data': competitor_data
86
  }
87
 
 
8
  # Import SEO modules
9
  from modules.technical_seo import TechnicalSEOModule
10
  from modules.content_audit import ContentAuditModule
11
+ from modules.keywords import KeywordsModule
12
+ from modules.backlinks import BacklinksModule
13
  from report_generator import ReportGenerator
14
  from simple_pdf_generator import SimplePDFGenerator
15
+ from llm_recommendations import LLMRecommendations
16
 
17
  app = Flask(__name__, static_folder='static')
18
  app.secret_key = 'seo_report_generator_2024'
 
20
  # Initialize modules
21
  technical_module = TechnicalSEOModule()
22
  content_module = ContentAuditModule()
23
+ keywords_module = KeywordsModule()
24
+ backlinks_module = BacklinksModule()
25
  report_gen = ReportGenerator()
26
  pdf_gen = SimplePDFGenerator()
27
+ llm_recommendations = LLMRecommendations()
28
 
29
  # Store for generated reports (in production, use database)
30
  reports_store = {}
 
62
  # Content Audit
63
  content_data = content_module.analyze(url)
64
 
65
+ # Keywords Analysis
66
+ keywords_data = keywords_module.analyze(url).data
67
+
68
+ # Backlinks Analysis - COMMENTED OUT TO SAVE API CREDITS
69
+ # print(f"DEBUG: Starting backlinks analysis for {url}")
70
+ # backlinks_result = backlinks_module.analyze(url)
71
+ # backlinks_data = backlinks_result.data
72
+ # print(f"DEBUG: Backlinks analysis result - Success: {backlinks_result.success}")
73
+ # print(f"DEBUG: Backlinks data keys: {list(backlinks_data.keys())}")
74
+ # if backlinks_data.get('total_backlinks'):
75
+ # print(f"DEBUG: Total backlinks found: {backlinks_data.get('total_backlinks')}")
76
+ # if backlinks_data.get('placeholder'):
77
+ # print(f"DEBUG: Using placeholder data: {backlinks_data.get('message')}")
78
+
79
+ # Use placeholder backlinks data to save API credits
80
+ backlinks_data = {
81
+ 'total_backlinks': 0,
82
+ 'total_ref_domains': 0,
83
+ 'domain_rating': 0,
84
+ 'authority_scores': {'ahrefs_dr': 0, 'moz_da': 0, 'moz_pa': 0, 'majestic_tf': 0, 'majestic_cf': 0},
85
+ 'referring_domains': [],
86
+ 'anchor_distribution': [],
87
+ 'monthly_changes': {'new_backlinks': 0, 'lost_backlinks': 0, 'net_change': 0},
88
+ 'top_backlinks': [],
89
+ 'quality_metrics': {'follow_ratio': 0, 'avg_authority': 0, 'quality_score': 0},
90
+ 'edu_links': 0,
91
+ 'gov_links': 0,
92
+ 'estimated_organic_traffic': 0,
93
+ 'organic_keywords': 0,
94
+ 'data_sources': ['API disabled to save credits'],
95
+ 'placeholder': True,
96
+ 'message': 'Backlinks analysis temporarily disabled to conserve API credits.'
97
+ }
98
+
99
+ # Generate LLM Recommendations
100
+ llm_rec_data = llm_recommendations.generate_recommendations(
101
+ url, technical_data, content_data, keywords_data, backlinks_data
102
+ )
103
+
104
  # Competitor Analysis
105
  competitor_data = []
106
  for comp_url in competitor_list:
107
  comp_technical = technical_module.analyze(comp_url)
108
  comp_content = content_module.analyze(comp_url, quick_scan=True)
109
+ comp_keywords = keywords_module.analyze(comp_url, quick_scan=True).data
110
+ # comp_backlinks = backlinks_module.analyze(comp_url, quick_scan=True).data # SAVE API CREDITS
111
+ comp_backlinks = {'placeholder': True, 'message': 'Disabled to save credits'}
112
  competitor_data.append({
113
  'url': comp_url,
114
  'technical': comp_technical,
115
+ 'content': comp_content,
116
+ 'keywords': comp_keywords,
117
+ 'backlinks': comp_backlinks
118
  })
119
 
120
  # Generate HTML report
 
123
  technical_data=technical_data,
124
  content_data=content_data,
125
  competitor_data=competitor_data,
126
+ keywords_data=keywords_data,
127
+ backlinks_data=backlinks_data,
128
+ llm_recommendations=llm_rec_data,
129
  include_charts=True
130
  )
131
 
 
135
  'html': report_html,
136
  'technical_data': technical_data,
137
  'content_data': content_data,
138
+ 'keywords_data': keywords_data,
139
+ 'backlinks_data': backlinks_data,
140
+ 'llm_recommendations': llm_rec_data,
141
  'competitor_data': competitor_data
142
  }
143
 
llm_recommendations.py ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Groq LLM Integration for Smart SEO Recommendations
3
+ Analyzes all 4 modules (Technical SEO, Content Audit, Keywords, Backlinks) to generate intelligent recommendations
4
+ """
5
+
6
+ import os
7
+ import json
8
+ from typing import Dict, Any, List
9
+ from groq import Groq
10
+ from dotenv import load_dotenv
11
+
12
+ # Load environment variables
13
+ load_dotenv()
14
+
15
+
16
+ class LLMRecommendations:
17
+ def __init__(self):
18
+ try:
19
+ self.client = Groq(api_key=os.getenv('GROQ_API_KEY'))
20
+ self.available = True
21
+ except Exception:
22
+ self.client = None
23
+ self.available = False
24
+
25
+ def generate_recommendations(self, url: str, technical_data: Dict[str, Any],
26
+ content_data: Dict[str, Any], keywords_data: Dict[str, Any],
27
+ backlinks_data: Dict[str, Any]) -> Dict[str, Any]:
28
+ """
29
+ Generate comprehensive SEO recommendations based on all module data
30
+
31
+ Args:
32
+ url: Target website URL
33
+ technical_data: Technical SEO analysis results
34
+ content_data: Content audit results
35
+ keywords_data: Keywords analysis results
36
+ backlinks_data: Backlinks analysis results
37
+
38
+ Returns:
39
+ Dictionary with recommendations and insights
40
+ """
41
+ if not self.available:
42
+ return self._generate_fallback_recommendations(technical_data, content_data, keywords_data, backlinks_data)
43
+
44
+ try:
45
+ # Prepare context data for LLM
46
+ context = self._prepare_context(url, technical_data, content_data, keywords_data, backlinks_data)
47
+
48
+ # Generate recommendations using Groq
49
+ recommendations = self._query_llm(context)
50
+
51
+ return {
52
+ 'recommendations': recommendations,
53
+ 'executive_insights': self._generate_executive_insights(context),
54
+ 'priority_actions': self._extract_priority_actions(recommendations),
55
+ 'data_source': 'Groq LLM Analysis',
56
+ 'generated_at': context['analysis_date']
57
+ }
58
+
59
+ except Exception as e:
60
+ return self._generate_fallback_recommendations(technical_data, content_data, keywords_data, backlinks_data, error=str(e))
61
+
62
+ def _prepare_context(self, url: str, technical_data: Dict, content_data: Dict,
63
+ keywords_data: Dict, backlinks_data: Dict) -> Dict[str, Any]:
64
+ """Prepare structured context for LLM analysis"""
65
+
66
+ # Extract key metrics from each module
67
+ context = {
68
+ 'website': url,
69
+ 'analysis_date': technical_data.get('last_updated', ''),
70
+ 'technical_seo': {
71
+ 'mobile_score': technical_data.get('mobile_score', 0),
72
+ 'desktop_score': technical_data.get('desktop_score', 0),
73
+ 'core_web_vitals': technical_data.get('core_web_vitals', {}),
74
+ 'issues_count': len(technical_data.get('issues', [])),
75
+ 'top_issues': technical_data.get('issues', [])[:3]
76
+ },
77
+ 'content_audit': {
78
+ 'pages_analyzed': content_data.get('pages_analyzed', 0),
79
+ 'metadata_completeness': content_data.get('metadata_completeness', {}),
80
+ 'avg_word_count': content_data.get('avg_word_count', 0),
81
+ 'cta_presence': content_data.get('cta_presence', 0),
82
+ 'content_freshness': content_data.get('content_freshness', {})
83
+ },
84
+ 'keywords': {
85
+ 'total_keywords': keywords_data.get('total_keywords', 0),
86
+ 'position_distribution': keywords_data.get('position_distribution', {}),
87
+ 'data_available': not keywords_data.get('placeholder', False),
88
+ 'opportunity_keywords': len(keywords_data.get('opportunity_keywords', [])),
89
+ 'data_source': keywords_data.get('data_source', 'Unknown')
90
+ },
91
+ 'backlinks': {
92
+ 'total_backlinks': backlinks_data.get('total_backlinks', 0),
93
+ 'total_ref_domains': backlinks_data.get('total_ref_domains', 0),
94
+ 'domain_rating': backlinks_data.get('domain_rating', 0),
95
+ 'monthly_changes': backlinks_data.get('monthly_changes', {}),
96
+ 'data_available': not backlinks_data.get('placeholder', False),
97
+ 'data_source': backlinks_data.get('data_source', 'Unknown')
98
+ }
99
+ }
100
+
101
+ return context
102
+
103
+ def _query_llm(self, context: Dict[str, Any]) -> List[str]:
104
+ """Query Groq LLM for SEO recommendations"""
105
+
106
+ prompt = f"""
107
+ You are an expert SEO consultant analyzing a comprehensive SEO audit for {context['website']}. Based on the data below, provide specific, actionable SEO recommendations.
108
+
109
+ TECHNICAL SEO DATA:
110
+ - Mobile Performance Score: {context['technical_seo']['mobile_score']}/100
111
+ - Desktop Performance Score: {context['technical_seo']['desktop_score']}/100
112
+ - Core Web Vitals: {json.dumps(context['technical_seo']['core_web_vitals'])}
113
+ - Critical Issues Found: {context['technical_seo']['issues_count']}
114
+ - Top Issues: {context['technical_seo']['top_issues']}
115
+
116
+ CONTENT AUDIT DATA:
117
+ - Pages Analyzed: {context['content_audit']['pages_analyzed']}
118
+ - Metadata Completeness: {json.dumps(context['content_audit']['metadata_completeness'])}
119
+ - Average Word Count: {context['content_audit']['avg_word_count']}
120
+ - CTA Presence: {context['content_audit']['cta_presence']}%
121
+ - Content Freshness: {json.dumps(context['content_audit']['content_freshness'])}
122
+
123
+ KEYWORDS DATA:
124
+ - Total Keywords Tracked: {context['keywords']['total_keywords']}
125
+ - Position Distribution: {json.dumps(context['keywords']['position_distribution'])}
126
+ - Data Available: {context['keywords']['data_available']}
127
+ - Opportunity Keywords: {context['keywords']['opportunity_keywords']}
128
+ - Source: {context['keywords']['data_source']}
129
+
130
+ BACKLINKS DATA:
131
+ - Total Backlinks: {context['backlinks']['total_backlinks']}
132
+ - Referring Domains: {context['backlinks']['total_ref_domains']}
133
+ - Domain Rating: {context['backlinks']['domain_rating']}
134
+ - Monthly Changes: {json.dumps(context['backlinks']['monthly_changes'])}
135
+ - Data Available: {context['backlinks']['data_available']}
136
+ - Source: {context['backlinks']['data_source']}
137
+
138
+ CRITICAL INSTRUCTIONS:
139
+ 1. Analyze the data holistically across all 4 modules
140
+ 2. Identify the TOP 3 most critical issues that need immediate attention
141
+ 3. Provide specific, actionable recommendations with clear steps
142
+ 4. If API data is missing (placeholder: true), acknowledge this and focus on available data
143
+ 5. Prioritize recommendations by potential impact and ease of implementation
144
+ 6. Include technical optimizations, content improvements, keyword opportunities, and link building strategies
145
+ 7. Provide estimated timelines and resources needed for each recommendation
146
+
147
+ Generate exactly 8-12 specific recommendations in this format:
148
+ - **[Priority Level]** [Specific Action]: [Detailed explanation with steps and expected impact]
149
+
150
+ Priority Levels: HIGH, MEDIUM, LOW
151
+ Focus on actionable items that can be implemented within 30-90 days.
152
+
153
+ Response:
154
+ """
155
+
156
+ try:
157
+ chat_completion = self.client.chat.completions.create(
158
+ messages=[
159
+ {'role': 'user', 'content': prompt}
160
+ ],
161
+ model="mixtral-8x7b-32768", # Using Mixtral for better reasoning
162
+ stream=False,
163
+ temperature=0.1, # Low temperature for consistent, focused recommendations
164
+ max_tokens=1500
165
+ )
166
+
167
+ response = chat_completion.choices[0].message.content.strip()
168
+
169
+ # Parse recommendations from response
170
+ recommendations = []
171
+ lines = response.split('\n')
172
+ for line in lines:
173
+ line = line.strip()
174
+ if line.startswith('- **') or line.startswith('β€’'):
175
+ # Clean up the recommendation
176
+ recommendation = line.replace('- **', '').replace('β€’ **', '').strip()
177
+ if recommendation:
178
+ recommendations.append(recommendation)
179
+
180
+ return recommendations if recommendations else [response]
181
+
182
+ except Exception as e:
183
+ return [f"LLM Error: {str(e)}"]
184
+
185
+ def _generate_executive_insights(self, context: Dict[str, Any]) -> List[str]:
186
+ """Generate high-level executive insights"""
187
+ insights = []
188
+
189
+ # Technical Performance Insight
190
+ mobile_score = context['technical_seo']['mobile_score']
191
+ desktop_score = context['technical_seo']['desktop_score']
192
+ avg_score = (mobile_score + desktop_score) / 2
193
+
194
+ if avg_score < 50:
195
+ insights.append(f"πŸ”΄ Critical: Website performance is severely impacting user experience (avg: {avg_score:.0f}/100)")
196
+ elif avg_score < 75:
197
+ insights.append(f"🟑 Warning: Website performance needs improvement (avg: {avg_score:.0f}/100)")
198
+ else:
199
+ insights.append(f"🟒 Good: Website performance is solid (avg: {avg_score:.0f}/100)")
200
+
201
+ # Content Insight
202
+ pages = context['content_audit']['pages_analyzed']
203
+ if pages > 0:
204
+ metadata = context['content_audit']['metadata_completeness']
205
+ title_pct = (metadata.get('with_title', 0) / pages * 100) if pages > 0 else 0
206
+
207
+ if title_pct < 80:
208
+ insights.append(f"πŸ”΄ Content Issue: {100-title_pct:.0f}% of pages missing critical metadata")
209
+ else:
210
+ insights.append(f"🟒 Content Quality: Metadata completeness is good ({title_pct:.0f}%)")
211
+
212
+ # Keywords Insight
213
+ if context['keywords']['data_available']:
214
+ total_keywords = context['keywords']['total_keywords']
215
+ pos_dist = context['keywords']['position_distribution']
216
+ top_10_pct = (pos_dist.get('top_10', 0) / total_keywords * 100) if total_keywords > 0 else 0
217
+
218
+ if top_10_pct < 15:
219
+ insights.append(f"πŸ”΄ SEO Visibility: Only {top_10_pct:.0f}% of keywords rank in top 10")
220
+ elif top_10_pct < 30:
221
+ insights.append(f"🟑 SEO Opportunity: {top_10_pct:.0f}% of keywords in top 10 - room for growth")
222
+ else:
223
+ insights.append(f"🟒 Strong SEO: {top_10_pct:.0f}% of keywords ranking in top 10")
224
+ else:
225
+ insights.append("πŸ“Š Connect keyword tracking tools for visibility insights")
226
+
227
+ # Backlinks Insight
228
+ if context['backlinks']['data_available']:
229
+ ref_domains = context['backlinks']['total_ref_domains']
230
+ domain_rating = context['backlinks']['domain_rating']
231
+
232
+ if ref_domains < 50:
233
+ insights.append(f"πŸ”΄ Link Building: Low referring domains ({ref_domains}) - aggressive outreach needed")
234
+ elif ref_domains < 200:
235
+ insights.append(f"🟑 Authority Building: Moderate link profile ({ref_domains} domains)")
236
+ else:
237
+ insights.append(f"🟒 Strong Authority: Healthy backlink profile ({ref_domains} referring domains)")
238
+ else:
239
+ insights.append("πŸ”— Connect backlink analysis tools for authority insights")
240
+
241
+ return insights
242
+
243
+ def _extract_priority_actions(self, recommendations: List[str]) -> List[Dict[str, str]]:
244
+ """Extract priority actions from recommendations"""
245
+ priority_actions = []
246
+
247
+ for rec in recommendations:
248
+ if '**HIGH**' in rec or '**CRITICAL**' in rec:
249
+ # Extract action title and description
250
+ parts = rec.replace('**HIGH**', '').replace('**CRITICAL**', '').strip()
251
+ if ':' in parts:
252
+ title, description = parts.split(':', 1)
253
+ priority_actions.append({
254
+ 'title': title.strip(),
255
+ 'description': description.strip(),
256
+ 'priority': 'HIGH'
257
+ })
258
+
259
+ # If no high priority actions found, take first 3
260
+ if not priority_actions and recommendations:
261
+ for i, rec in enumerate(recommendations[:3]):
262
+ if ':' in rec:
263
+ title, description = rec.split(':', 1)
264
+ priority_actions.append({
265
+ 'title': title.replace('*', '').strip(),
266
+ 'description': description.strip(),
267
+ 'priority': 'HIGH'
268
+ })
269
+
270
+ return priority_actions[:5] # Top 5 priority actions
271
+
272
+ def _generate_fallback_recommendations(self, technical_data: Dict, content_data: Dict,
273
+ keywords_data: Dict, backlinks_data: Dict, error: str = None) -> Dict[str, Any]:
274
+ """Generate basic recommendations when LLM is not available"""
275
+
276
+ recommendations = []
277
+
278
+ # Technical recommendations
279
+ mobile_score = technical_data.get('mobile_score', 0)
280
+ desktop_score = technical_data.get('desktop_score', 0)
281
+
282
+ if mobile_score < 50:
283
+ recommendations.append("**HIGH** Improve Mobile Performance: Optimize images, reduce JavaScript, enable compression")
284
+ if desktop_score < 50:
285
+ recommendations.append("**HIGH** Improve Desktop Performance: Optimize server response time, minimize CSS and JavaScript")
286
+
287
+ # Content recommendations
288
+ pages = content_data.get('pages_analyzed', 0)
289
+ if pages > 0:
290
+ metadata = content_data.get('metadata_completeness', {})
291
+ if metadata.get('with_title', 0) < pages * 0.8:
292
+ recommendations.append("**HIGH** Fix Metadata: Add missing title tags and meta descriptions")
293
+
294
+ if content_data.get('avg_word_count', 0) < 300:
295
+ recommendations.append("**MEDIUM** Enhance Content: Increase average page content length")
296
+
297
+ # Keywords recommendations
298
+ if not keywords_data.get('placeholder', False):
299
+ total_keywords = keywords_data.get('total_keywords', 0)
300
+ pos_dist = keywords_data.get('position_distribution', {})
301
+
302
+ if total_keywords > 0 and pos_dist.get('top_10', 0) < total_keywords * 0.2:
303
+ recommendations.append("**HIGH** Improve Keyword Rankings: Focus on on-page SEO for underperforming keywords")
304
+ else:
305
+ recommendations.append("**MEDIUM** Set Up Keyword Tracking: Connect Google Search Console for keyword insights")
306
+
307
+ # Backlinks recommendations
308
+ if not backlinks_data.get('placeholder', False):
309
+ ref_domains = backlinks_data.get('total_ref_domains', 0)
310
+ if ref_domains < 50:
311
+ recommendations.append("**HIGH** Build Authority: Implement aggressive link building and outreach strategy")
312
+ else:
313
+ recommendations.append("**MEDIUM** Set Up Backlink Monitoring: Add RapidAPI key for comprehensive link analysis")
314
+
315
+ # Default recommendations if none generated
316
+ if not recommendations:
317
+ recommendations = [
318
+ "**HIGH** Audit Technical Issues: Review site speed and mobile performance",
319
+ "**MEDIUM** Optimize Content Strategy: Ensure all pages have unique, valuable content",
320
+ "**LOW** Monitor SEO Performance: Set up tracking for keywords and backlinks"
321
+ ]
322
+
323
+ insights = [
324
+ "πŸ”„ Basic SEO analysis completed - connect APIs for deeper insights",
325
+ f"πŸ“Š Analyzed {pages} pages for content quality",
326
+ "⚠️ Enhanced recommendations require API integrations"
327
+ ]
328
+
329
+ if error:
330
+ insights.append(f"❌ LLM Error: {error}")
331
+
332
+ return {
333
+ 'recommendations': recommendations,
334
+ 'executive_insights': insights,
335
+ 'priority_actions': [
336
+ {
337
+ 'title': 'Connect SEO APIs',
338
+ 'description': 'Set up Google Search Console and RapidAPI for comprehensive analysis',
339
+ 'priority': 'HIGH'
340
+ }
341
+ ],
342
+ 'data_source': 'Fallback Analysis',
343
+ 'generated_at': technical_data.get('last_updated', '')
344
+ }
modules/backlinks.py ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Backlinks Profile Module using RapidAPI endpoints
3
+ Combines 3 RapidAPI endpoints: Best Backlink Checker, Majestic, and Domain Metrics Check
4
+ """
5
+
6
+ import os
7
+ import requests
8
+ import time
9
+ from typing import Dict, Any, List, Optional
10
+ from urllib.parse import urlparse
11
+ from datetime import datetime, timedelta
12
+
13
+
14
+ class ModuleResult:
15
+ """Standard result object for SEO modules"""
16
+ def __init__(self, success: bool, data: Dict[str, Any], error: str = None):
17
+ self.success = success
18
+ self.data = data
19
+ self.error = error
20
+
21
+
22
+ class BacklinksModule:
23
+ def __init__(self):
24
+ self.rapidapi_key = os.getenv('RAPIDAPI_KEY')
25
+ self.timeout = int(os.getenv('RAPIDAPI_TIMEOUT', '30'))
26
+ self.max_retries = int(os.getenv('BACKLINKS_MAX_RETRIES', '3'))
27
+
28
+ # RapidAPI endpoints
29
+ self.backlink_checker_url = "https://best-backlink-checker-api.p.rapidapi.com/excatbacklinks_noneng.php"
30
+ self.majestic_url = "https://majestic1.p.rapidapi.com/url_metrics"
31
+ self.domain_metrics_url = "https://domain-metrics-check.p.rapidapi.com/domain-metrics"
32
+
33
+ # Common headers
34
+ self.headers = {
35
+ 'x-rapidapi-key': self.rapidapi_key,
36
+ 'Accept': 'application/json'
37
+ }
38
+
39
+ def analyze(self, url: str, quick_scan: bool = False) -> ModuleResult:
40
+ """
41
+ Analyze backlink profile using multiple RapidAPI endpoints
42
+
43
+ Args:
44
+ url: Target website URL
45
+ quick_scan: If True, use cached data or limited analysis
46
+
47
+ Returns:
48
+ ModuleResult with comprehensive backlinks data
49
+ """
50
+ try:
51
+ if not self.rapidapi_key:
52
+ return self._generate_no_api_data(url)
53
+
54
+ domain = self._extract_domain(url)
55
+
56
+ # Call all 3 APIs with retry logic
57
+ individual_backlinks = self._get_individual_backlinks(domain, quick_scan)
58
+ majestic_metrics = self._get_majestic_metrics(domain)
59
+ domain_metrics = self._get_domain_metrics(domain)
60
+
61
+ # Combine and process all data
62
+ combined_data = self._combine_backlink_data(
63
+ domain, individual_backlinks, majestic_metrics, domain_metrics, quick_scan
64
+ )
65
+
66
+ return ModuleResult(success=True, data=combined_data)
67
+
68
+ except Exception as e:
69
+ return ModuleResult(
70
+ success=False,
71
+ data={},
72
+ error=f"Backlinks analysis failed: {str(e)}"
73
+ )
74
+
75
+ def _extract_domain(self, url: str) -> str:
76
+ """Extract clean domain from URL"""
77
+ if not url.startswith(('http://', 'https://')):
78
+ url = 'https://' + url
79
+ domain = urlparse(url).netloc.replace('www.', '')
80
+ return domain
81
+
82
+ def _api_request_with_retry(self, url: str, params: Dict = None, headers: Dict = None) -> Optional[Dict]:
83
+ """Make API request with retry logic"""
84
+ if headers is None:
85
+ headers = self.headers.copy()
86
+
87
+ for attempt in range(self.max_retries):
88
+ try:
89
+ response = requests.get(url, params=params, headers=headers, timeout=self.timeout)
90
+
91
+ if response.status_code == 200:
92
+ return response.json()
93
+ elif response.status_code == 429: # Rate limit
94
+ wait_time = (attempt + 1) * 2 # Exponential backoff
95
+ print(f"Rate limited, waiting {wait_time}s...")
96
+ time.sleep(wait_time)
97
+ continue
98
+ else:
99
+ print(f"API error {response.status_code}: {response.text}")
100
+
101
+ except requests.exceptions.Timeout:
102
+ print(f"Timeout on attempt {attempt + 1}")
103
+ if attempt < self.max_retries - 1:
104
+ time.sleep(2)
105
+
106
+ except Exception as e:
107
+ print(f"Request error: {str(e)}")
108
+ if attempt < self.max_retries - 1:
109
+ time.sleep(2)
110
+
111
+ return None
112
+
113
+ def _get_individual_backlinks(self, domain: str, quick_scan: bool = False) -> List[Dict]:
114
+ """Get individual backlinks data"""
115
+ try:
116
+ headers = self.headers.copy()
117
+ headers['x-rapidapi-host'] = 'best-backlink-checker-api.p.rapidapi.com'
118
+
119
+ params = {'domain': f'https://{domain}'}
120
+
121
+ data = self._api_request_with_retry(self.backlink_checker_url, params, headers)
122
+
123
+ if data and isinstance(data, list):
124
+ # Limit results for quick scan
125
+ if quick_scan:
126
+ return data[:50]
127
+ return data[:500] # Reasonable limit to avoid memory issues
128
+
129
+ except Exception as e:
130
+ print(f"Individual backlinks API error: {str(e)}")
131
+
132
+ return []
133
+
134
+ def _get_majestic_metrics(self, domain: str) -> Dict[str, Any]:
135
+ """Get Majestic domain metrics via RapidAPI"""
136
+ try:
137
+ headers = self.headers.copy()
138
+ headers['x-rapidapi-host'] = 'majestic1.p.rapidapi.com'
139
+
140
+ params = {'url': domain}
141
+
142
+ data = self._api_request_with_retry(self.majestic_url, params, headers)
143
+
144
+ if data and data.get('status') == 'success':
145
+ return data
146
+
147
+ except Exception as e:
148
+ print(f"Majestic RapidAPI error: {str(e)}")
149
+
150
+ return {}
151
+
152
+ def _get_domain_metrics(self, domain: str) -> Dict[str, Any]:
153
+ """Get comprehensive domain metrics"""
154
+ try:
155
+ headers = self.headers.copy()
156
+ headers['x-rapidapi-host'] = 'domain-metrics-check.p.rapidapi.com'
157
+
158
+ # API expects domain with trailing slash
159
+ url = f"{self.domain_metrics_url}/{domain}/"
160
+
161
+ data = self._api_request_with_retry(url, headers=headers)
162
+
163
+ if data and data.get('domain'):
164
+ return data
165
+
166
+ except Exception as e:
167
+ print(f"Domain metrics API error: {str(e)}")
168
+
169
+ return {}
170
+
171
+ def _combine_backlink_data(self, domain: str, individual_backlinks: List[Dict],
172
+ majestic_metrics: Dict, domain_metrics: Dict, quick_scan: bool) -> Dict[str, Any]:
173
+ """Combine data from all 3 APIs into comprehensive backlinks profile"""
174
+
175
+ # Primary metrics (prefer Domain Metrics Check, fallback to Majestic)
176
+ total_backlinks = (
177
+ int(domain_metrics.get('ahrefsBacklinks', 0)) or
178
+ int(domain_metrics.get('majesticLinks', 0)) or
179
+ int(majestic_metrics.get('majesticLinks', 0)) or
180
+ len(individual_backlinks)
181
+ )
182
+
183
+ total_ref_domains = (
184
+ int(domain_metrics.get('ahrefsRefDomains', 0)) or
185
+ int(domain_metrics.get('majesticRefDomains', 0)) or
186
+ int(majestic_metrics.get('majesticRefDomains', 0)) or
187
+ len(set(link.get('url_from', '').split('/')[2] for link in individual_backlinks if link.get('url_from')))
188
+ )
189
+
190
+ # Authority scores (multiple sources for validation)
191
+ domain_rating = (
192
+ int(domain_metrics.get('ahrefsDR', 0)) or
193
+ int(domain_metrics.get('majesticTF', 0)) or
194
+ int(majestic_metrics.get('majesticTF', 0))
195
+ )
196
+
197
+ # Process individual backlinks for detailed analysis
198
+ referring_domains = self._extract_referring_domains(individual_backlinks)
199
+ anchor_distribution = self._extract_anchor_distribution(individual_backlinks)
200
+ monthly_changes = self._calculate_monthly_changes(individual_backlinks)
201
+ top_backlinks = self._get_top_backlinks(individual_backlinks)
202
+
203
+ # Link quality analysis
204
+ quality_metrics = self._analyze_link_quality(individual_backlinks, domain_metrics)
205
+
206
+ # Comprehensive backlinks data
207
+ backlinks_data = {
208
+ 'total_backlinks': total_backlinks,
209
+ 'total_ref_domains': total_ref_domains,
210
+ 'domain_rating': domain_rating,
211
+
212
+ # Authority scores from multiple sources
213
+ 'authority_scores': {
214
+ 'ahrefs_dr': int(domain_metrics.get('ahrefsDR', 0)),
215
+ 'moz_da': int(domain_metrics.get('mozDA', 0)),
216
+ 'moz_pa': int(domain_metrics.get('mozPA', 0)),
217
+ 'majestic_tf': int(domain_metrics.get('majesticTF', 0) or majestic_metrics.get('majesticTF', 0)),
218
+ 'majestic_cf': int(domain_metrics.get('majesticCF', 0) or majestic_metrics.get('majesticCF', 0))
219
+ },
220
+
221
+ # Detailed analysis
222
+ 'referring_domains': referring_domains,
223
+ 'anchor_distribution': anchor_distribution,
224
+ 'monthly_changes': monthly_changes,
225
+ 'top_backlinks': top_backlinks,
226
+ 'quality_metrics': quality_metrics,
227
+
228
+ # Educational and government links (high-quality indicators)
229
+ 'edu_links': int(domain_metrics.get('majesticRefEDU', 0) or majestic_metrics.get('majesticRefEDU', 0)),
230
+ 'gov_links': int(domain_metrics.get('majesticRefGov', 0) or majestic_metrics.get('majesticRefGov', 0)),
231
+
232
+ # Traffic estimates (if available)
233
+ 'estimated_organic_traffic': float(domain_metrics.get('ahrefsTraffic', 0)),
234
+ 'organic_keywords': int(domain_metrics.get('ahrefsOrganicKeywords', 0)),
235
+
236
+ # Data sources and metadata
237
+ 'data_sources': self._get_data_sources(individual_backlinks, majestic_metrics, domain_metrics),
238
+ 'last_updated': datetime.now().isoformat(),
239
+ 'quick_scan': quick_scan,
240
+ 'analysis_depth': 'comprehensive' if not quick_scan else 'basic'
241
+ }
242
+
243
+ return backlinks_data
244
+
245
+ def _extract_referring_domains(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
246
+ """Extract and analyze referring domains"""
247
+ domain_stats = {}
248
+
249
+ for link in backlinks:
250
+ if not link.get('url_from'):
251
+ continue
252
+
253
+ try:
254
+ source_domain = urlparse(link['url_from']).netloc
255
+ if source_domain not in domain_stats:
256
+ domain_stats[source_domain] = {
257
+ 'domain': source_domain,
258
+ 'backlinks': 0,
259
+ 'first_seen': link.get('first_seen', ''),
260
+ 'domain_authority': link.get('domain_inlink_rank', 0),
261
+ 'follow_links': 0,
262
+ 'nofollow_links': 0
263
+ }
264
+
265
+ domain_stats[source_domain]['backlinks'] += 1
266
+
267
+ if link.get('nofollow'):
268
+ domain_stats[source_domain]['nofollow_links'] += 1
269
+ else:
270
+ domain_stats[source_domain]['follow_links'] += 1
271
+
272
+ except Exception:
273
+ continue
274
+
275
+ # Sort by backlinks count and return top domains
276
+ top_domains = sorted(domain_stats.values(), key=lambda x: x['backlinks'], reverse=True)
277
+ return top_domains[:20] # Top 20 referring domains
278
+
279
+ def _extract_anchor_distribution(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
280
+ """Analyze anchor text distribution"""
281
+ anchor_stats = {}
282
+
283
+ for link in backlinks:
284
+ anchor = link.get('anchor', '').strip()
285
+ if not anchor or len(anchor) > 100: # Skip very long anchors
286
+ continue
287
+
288
+ if anchor not in anchor_stats:
289
+ anchor_stats[anchor] = {
290
+ 'anchor_text': anchor,
291
+ 'backlinks': 0,
292
+ 'follow_links': 0,
293
+ 'nofollow_links': 0,
294
+ 'unique_domains': set()
295
+ }
296
+
297
+ anchor_stats[anchor]['backlinks'] += 1
298
+
299
+ if link.get('nofollow'):
300
+ anchor_stats[anchor]['nofollow_links'] += 1
301
+ else:
302
+ anchor_stats[anchor]['follow_links'] += 1
303
+
304
+ # Track unique domains for this anchor
305
+ try:
306
+ domain = urlparse(link.get('url_from', '')).netloc
307
+ anchor_stats[anchor]['unique_domains'].add(domain)
308
+ except Exception:
309
+ pass
310
+
311
+ # Convert sets to counts and sort
312
+ anchor_distribution = []
313
+ for anchor_data in anchor_stats.values():
314
+ anchor_data['unique_domains'] = len(anchor_data['unique_domains'])
315
+ anchor_distribution.append(anchor_data)
316
+
317
+ # Sort by backlinks count
318
+ anchor_distribution.sort(key=lambda x: x['backlinks'], reverse=True)
319
+ return anchor_distribution[:15] # Top 15 anchor texts
320
+
321
+ def _calculate_monthly_changes(self, backlinks: List[Dict]) -> Dict[str, int]:
322
+ """Calculate monthly backlinks changes"""
323
+ now = datetime.now()
324
+ last_month = now - timedelta(days=30)
325
+
326
+ new_links = 0
327
+ recent_links = 0
328
+
329
+ for link in backlinks:
330
+ first_seen = link.get('first_seen', '')
331
+ if not first_seen:
332
+ continue
333
+
334
+ try:
335
+ link_date = datetime.strptime(first_seen, '%Y-%m-%d')
336
+ if link_date >= last_month:
337
+ new_links += 1
338
+ if link_date >= now - timedelta(days=90): # 3 months
339
+ recent_links += 1
340
+ except Exception:
341
+ continue
342
+
343
+ return {
344
+ 'new_backlinks': new_links,
345
+ 'lost_backlinks': 0, # Can't calculate without historical data
346
+ 'net_change': new_links,
347
+ 'recent_backlinks_3m': recent_links
348
+ }
349
+
350
+ def _get_top_backlinks(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
351
+ """Get top-quality backlinks"""
352
+ # Sort by inlink_rank (higher is better)
353
+ sorted_links = sorted(
354
+ backlinks,
355
+ key=lambda x: x.get('inlink_rank', 0),
356
+ reverse=True
357
+ )
358
+
359
+ top_links = []
360
+ for link in sorted_links[:10]:
361
+ top_links.append({
362
+ 'source_url': link.get('url_from', ''),
363
+ 'source_title': link.get('title', ''),
364
+ 'anchor_text': link.get('anchor', ''),
365
+ 'is_follow': not link.get('nofollow', True),
366
+ 'authority_score': link.get('inlink_rank', 0),
367
+ 'first_seen': link.get('first_seen', '')
368
+ })
369
+
370
+ return top_links
371
+
372
+ def _analyze_link_quality(self, backlinks: List[Dict], domain_metrics: Dict) -> Dict[str, Any]:
373
+ """Analyze overall link quality metrics"""
374
+ if not backlinks:
375
+ return {'follow_ratio': 0, 'avg_authority': 0, 'quality_score': 0}
376
+
377
+ follow_count = sum(1 for link in backlinks if not link.get('nofollow', True))
378
+ total_links = len(backlinks)
379
+ follow_ratio = (follow_count / total_links * 100) if total_links > 0 else 0
380
+
381
+ # Average authority score
382
+ authority_scores = [link.get('inlink_rank', 0) for link in backlinks if link.get('inlink_rank')]
383
+ avg_authority = sum(authority_scores) / len(authority_scores) if authority_scores else 0
384
+
385
+ # Quality score (0-100)
386
+ quality_score = min(100, (
387
+ (follow_ratio * 0.4) + # 40% weight on follow ratio
388
+ (avg_authority * 2) + # 40% weight on authority (scaled)
389
+ (min(20, len(set(link.get('url_from', '').split('/')[2] for link in backlinks))) * 1) # 20% on domain diversity
390
+ ))
391
+
392
+ return {
393
+ 'follow_ratio': round(follow_ratio, 1),
394
+ 'avg_authority': round(avg_authority, 1),
395
+ 'quality_score': round(quality_score, 1),
396
+ 'total_analyzed': total_links,
397
+ 'edu_gov_count': int(domain_metrics.get('majesticRefEDU', 0)) + int(domain_metrics.get('majesticRefGov', 0))
398
+ }
399
+
400
+ def _get_data_sources(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> List[str]:
401
+ """Track which data sources provided information"""
402
+ sources = []
403
+
404
+ if individual_backlinks:
405
+ sources.append('Best Backlink Checker API')
406
+ if majestic_metrics:
407
+ sources.append('Majestic RapidAPI')
408
+ if domain_metrics:
409
+ sources.append('Domain Metrics Check API')
410
+
411
+ return sources or ['No data sources available']
412
+
413
+ def _generate_no_api_data(self, url: str) -> ModuleResult:
414
+ """Generate response when no API key is available"""
415
+ domain = self._extract_domain(url)
416
+
417
+ no_api_data = {
418
+ 'total_backlinks': 0,
419
+ 'total_ref_domains': 0,
420
+ 'domain_rating': 0,
421
+ 'authority_scores': {
422
+ 'ahrefs_dr': 0,
423
+ 'moz_da': 0,
424
+ 'moz_pa': 0,
425
+ 'majestic_tf': 0,
426
+ 'majestic_cf': 0
427
+ },
428
+ 'referring_domains': [],
429
+ 'anchor_distribution': [],
430
+ 'monthly_changes': {
431
+ 'new_backlinks': 0,
432
+ 'lost_backlinks': 0,
433
+ 'net_change': 0
434
+ },
435
+ 'top_backlinks': [],
436
+ 'quality_metrics': {
437
+ 'follow_ratio': 0,
438
+ 'avg_authority': 0,
439
+ 'quality_score': 0
440
+ },
441
+ 'edu_links': 0,
442
+ 'gov_links': 0,
443
+ 'estimated_organic_traffic': 0,
444
+ 'organic_keywords': 0,
445
+ 'data_sources': ['No API credentials available'],
446
+ 'last_updated': datetime.now().isoformat(),
447
+ 'placeholder': True,
448
+ 'message': 'Add RAPIDAPI_KEY to your .env file to unlock comprehensive backlinks analysis using Best Backlink Checker, Majestic, and Domain Metrics Check RapidAPIs.'
449
+ }
450
+
451
+ return ModuleResult(success=True, data=no_api_data)
modules/keywords.py ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Keywords Rankings Module for SEO Report Generator
3
+ Supports Google Search Console API (primary) and SERP API (fallback)
4
+ """
5
+
6
+ import os
7
+ import requests
8
+ import json
9
+ from typing import Dict, Any, List, Optional
10
+ from urllib.parse import urlparse
11
+ from datetime import datetime, timedelta
12
+
13
+
14
+ class ModuleResult:
15
+ """Standard result object for SEO modules"""
16
+ def __init__(self, success: bool, data: Dict[str, Any], error: str = None):
17
+ self.success = success
18
+ self.data = data
19
+ self.error = error
20
+
21
+
22
+ class KeywordsModule:
23
+ def __init__(self):
24
+ self.gsc_api_key = os.getenv('GOOGLE_SEARCH_CONSOLE_API_KEY')
25
+ self.serp_api_key = os.getenv('SERP_API_KEY') # SerpAPI or similar
26
+ self.data_for_seo_key = os.getenv('DATAFORSEO_API_KEY')
27
+
28
+ def analyze(self, url: str, quick_scan: bool = False) -> ModuleResult:
29
+ """
30
+ Analyze keyword rankings for the given URL
31
+
32
+ Args:
33
+ url: Target website URL
34
+ quick_scan: If True, use limited data for competitor analysis
35
+
36
+ Returns:
37
+ ModuleResult with keywords data
38
+ """
39
+ try:
40
+ domain = self._extract_domain(url)
41
+
42
+ # Try Google Search Console first (if credentials available)
43
+ if self.gsc_api_key:
44
+ result = self._analyze_with_gsc(domain, quick_scan)
45
+ if result.success:
46
+ return result
47
+
48
+ # Fallback to SERP API
49
+ if self.serp_api_key:
50
+ result = self._analyze_with_serp_api(domain, quick_scan)
51
+ if result.success:
52
+ return result
53
+
54
+ # Fallback to DataForSEO
55
+ if self.data_for_seo_key:
56
+ result = self._analyze_with_dataforseo(domain, quick_scan)
57
+ if result.success:
58
+ return result
59
+
60
+ # No API keys available - return placeholder data
61
+ return self._generate_placeholder_data(domain)
62
+
63
+ except Exception as e:
64
+ return ModuleResult(
65
+ success=False,
66
+ data={},
67
+ error=f"Keywords analysis failed: {str(e)}"
68
+ )
69
+
70
+ def _extract_domain(self, url: str) -> str:
71
+ """Extract domain from URL"""
72
+ if not url.startswith(('http://', 'https://')):
73
+ url = 'https://' + url
74
+ return urlparse(url).netloc.replace('www.', '')
75
+
76
+ def _analyze_with_gsc(self, domain: str, quick_scan: bool) -> ModuleResult:
77
+ """Analyze with Google Search Console API"""
78
+ try:
79
+ # Note: GSC API requires site verification and proper setup
80
+ # This is a simplified implementation - real GSC API needs OAuth2
81
+
82
+ # GSC API endpoint (simplified)
83
+ base_url = "https://searchconsole.googleapis.com/webmasters/v3/sites"
84
+ site_url = f"https://{domain}/"
85
+
86
+ # Get search analytics data
87
+ analytics_url = f"{base_url}/{site_url}/searchAnalytics/query"
88
+
89
+ # Date range (last 90 days)
90
+ end_date = datetime.now().date()
91
+ start_date = end_date - timedelta(days=90)
92
+
93
+ payload = {
94
+ "startDate": start_date.isoformat(),
95
+ "endDate": end_date.isoformat(),
96
+ "dimensions": ["query", "page"],
97
+ "rowLimit": 1000 if not quick_scan else 100
98
+ }
99
+
100
+ headers = {
101
+ "Authorization": f"Bearer {self.gsc_api_key}",
102
+ "Content-Type": "application/json"
103
+ }
104
+
105
+ response = requests.post(analytics_url, json=payload, headers=headers, timeout=30)
106
+
107
+ if response.status_code != 200:
108
+ raise Exception(f"GSC API error: {response.status_code}")
109
+
110
+ data = response.json()
111
+ return self._process_gsc_data(data, domain)
112
+
113
+ except Exception as e:
114
+ return ModuleResult(success=False, data={}, error=str(e))
115
+
116
+ def _analyze_with_serp_api(self, domain: str, quick_scan: bool) -> ModuleResult:
117
+ """Analyze with SERP API (SerpAPI, etc.)"""
118
+ try:
119
+ # Using SerpAPI as example
120
+ url = "https://serpapi.com/search"
121
+
122
+ params = {
123
+ "engine": "google",
124
+ "q": f"site:{domain}",
125
+ "api_key": self.serp_api_key,
126
+ "num": 100 if not quick_scan else 20
127
+ }
128
+
129
+ response = requests.get(url, params=params, timeout=30)
130
+
131
+ if response.status_code != 200:
132
+ raise Exception(f"SERP API error: {response.status_code}")
133
+
134
+ data = response.json()
135
+ return self._process_serp_data(data, domain)
136
+
137
+ except Exception as e:
138
+ return ModuleResult(success=False, data={}, error=str(e))
139
+
140
+ def _analyze_with_dataforseo(self, domain: str, quick_scan: bool) -> ModuleResult:
141
+ """Analyze with DataForSEO API"""
142
+ try:
143
+ # DataForSEO implementation
144
+ auth = (self.data_for_seo_key, os.getenv('DATAFORSEO_API_PASSWORD', ''))
145
+
146
+ # Get domain keywords
147
+ url = "https://api.dataforseo.com/v3/dataforseo_labs/google/ranked_keywords/live"
148
+
149
+ payload = {
150
+ "target": domain,
151
+ "limit": 1000 if not quick_scan else 100,
152
+ "offset": 0,
153
+ "filters": [
154
+ ["metrics.organic.pos", "<=", 100]
155
+ ]
156
+ }
157
+
158
+ response = requests.post(url, json=[payload], auth=auth, timeout=60)
159
+
160
+ if response.status_code != 200:
161
+ raise Exception(f"DataForSEO API error: {response.status_code}")
162
+
163
+ data = response.json()
164
+ return self._process_dataforseo_data(data, domain)
165
+
166
+ except Exception as e:
167
+ return ModuleResult(success=False, data={}, error=str(e))
168
+
169
+ def _process_gsc_data(self, data: Dict, domain: str) -> ModuleResult:
170
+ """Process Google Search Console data"""
171
+ if 'rows' not in data:
172
+ return ModuleResult(success=False, data={}, error="No GSC data available")
173
+
174
+ rows = data['rows']
175
+ total_keywords = len(rows)
176
+
177
+ # Position distribution
178
+ top_3 = sum(1 for row in rows if row.get('position', 100) <= 3)
179
+ top_10 = sum(1 for row in rows if row.get('position', 100) <= 10)
180
+ top_50 = sum(1 for row in rows if row.get('position', 100) <= 50)
181
+
182
+ # Best and worst performing
183
+ sorted_by_position = sorted(rows, key=lambda x: x.get('position', 100))
184
+ best_keywords = sorted_by_position[:10]
185
+ worst_keywords = sorted_by_position[-10:]
186
+
187
+ # High opportunity keywords (high impressions, low clicks)
188
+ opportunity_keywords = []
189
+ for row in rows:
190
+ impressions = row.get('impressions', 0)
191
+ clicks = row.get('clicks', 0)
192
+ ctr = (clicks / impressions * 100) if impressions > 0 else 0
193
+
194
+ if impressions > 100 and ctr < 2 and row.get('position', 100) > 10:
195
+ opportunity_keywords.append({
196
+ 'keyword': row.get('keys', [''])[0],
197
+ 'position': row.get('position', 0),
198
+ 'impressions': impressions,
199
+ 'clicks': clicks,
200
+ 'ctr': round(ctr, 2)
201
+ })
202
+
203
+ opportunity_keywords = sorted(opportunity_keywords, key=lambda x: x['impressions'], reverse=True)[:10]
204
+
205
+ keywords_data = {
206
+ 'total_keywords': total_keywords,
207
+ 'position_distribution': {
208
+ 'top_3': top_3,
209
+ 'top_10': top_10,
210
+ 'top_50': top_50,
211
+ 'beyond_50': total_keywords - top_50
212
+ },
213
+ 'best_keywords': [
214
+ {
215
+ 'keyword': row.get('keys', [''])[0],
216
+ 'position': row.get('position', 0),
217
+ 'clicks': row.get('clicks', 0),
218
+ 'impressions': row.get('impressions', 0)
219
+ } for row in best_keywords
220
+ ],
221
+ 'worst_keywords': [
222
+ {
223
+ 'keyword': row.get('keys', [''])[0],
224
+ 'position': row.get('position', 0),
225
+ 'clicks': row.get('clicks', 0),
226
+ 'impressions': row.get('impressions', 0)
227
+ } for row in worst_keywords
228
+ ],
229
+ 'opportunity_keywords': opportunity_keywords,
230
+ 'data_source': 'Google Search Console',
231
+ 'last_updated': datetime.now().isoformat()
232
+ }
233
+
234
+ return ModuleResult(success=True, data=keywords_data)
235
+
236
+ def _process_serp_data(self, data: Dict, domain: str) -> ModuleResult:
237
+ """Process SERP API data"""
238
+ # Simplified SERP data processing
239
+ organic_results = data.get('organic_results', [])
240
+
241
+ keywords_data = {
242
+ 'total_keywords': len(organic_results),
243
+ 'position_distribution': {
244
+ 'top_3': len([r for r in organic_results if r.get('position', 100) <= 3]),
245
+ 'top_10': len([r for r in organic_results if r.get('position', 100) <= 10]),
246
+ 'top_50': len([r for r in organic_results if r.get('position', 100) <= 50]),
247
+ 'beyond_50': len([r for r in organic_results if r.get('position', 100) > 50])
248
+ },
249
+ 'best_keywords': [
250
+ {
251
+ 'keyword': r.get('title', ''),
252
+ 'position': r.get('position', 0),
253
+ 'url': r.get('link', '')
254
+ } for r in organic_results[:10]
255
+ ],
256
+ 'data_source': 'SERP API',
257
+ 'last_updated': datetime.now().isoformat()
258
+ }
259
+
260
+ return ModuleResult(success=True, data=keywords_data)
261
+
262
+ def _process_dataforseo_data(self, data: Dict, domain: str) -> ModuleResult:
263
+ """Process DataForSEO data"""
264
+ if not data.get('tasks') or not data['tasks'][0].get('result'):
265
+ return ModuleResult(success=False, data={}, error="No DataForSEO data available")
266
+
267
+ results = data['tasks'][0]['result']
268
+ total_keywords = len(results)
269
+
270
+ # Position distribution
271
+ top_3 = sum(1 for r in results if r.get('metrics', {}).get('organic', {}).get('pos', 100) <= 3)
272
+ top_10 = sum(1 for r in results if r.get('metrics', {}).get('organic', {}).get('pos', 100) <= 10)
273
+ top_50 = sum(1 for r in results if r.get('metrics', {}).get('organic', {}).get('pos', 100) <= 50)
274
+
275
+ keywords_data = {
276
+ 'total_keywords': total_keywords,
277
+ 'position_distribution': {
278
+ 'top_3': top_3,
279
+ 'top_10': top_10,
280
+ 'top_50': top_50,
281
+ 'beyond_50': total_keywords - top_50
282
+ },
283
+ 'best_keywords': [
284
+ {
285
+ 'keyword': r.get('keyword', ''),
286
+ 'position': r.get('metrics', {}).get('organic', {}).get('pos', 0),
287
+ 'search_volume': r.get('keyword_info', {}).get('search_volume', 0)
288
+ } for r in sorted(results, key=lambda x: x.get('metrics', {}).get('organic', {}).get('pos', 100))[:10]
289
+ ],
290
+ 'data_source': 'DataForSEO',
291
+ 'last_updated': datetime.now().isoformat()
292
+ }
293
+
294
+ return ModuleResult(success=True, data=keywords_data)
295
+
296
+ def _generate_placeholder_data(self, domain: str) -> ModuleResult:
297
+ """Generate placeholder data when no API keys are available"""
298
+ keywords_data = {
299
+ 'total_keywords': 0,
300
+ 'position_distribution': {
301
+ 'top_3': 0,
302
+ 'top_10': 0,
303
+ 'top_50': 0,
304
+ 'beyond_50': 0
305
+ },
306
+ 'best_keywords': [],
307
+ 'worst_keywords': [],
308
+ 'opportunity_keywords': [],
309
+ 'data_source': 'No API credentials',
310
+ 'last_updated': datetime.now().isoformat(),
311
+ 'placeholder': True,
312
+ 'message': 'Connect Google Search Console or SERP API to unlock keyword data'
313
+ }
314
+
315
+ return ModuleResult(success=True, data=keywords_data)
report_generator.py CHANGED
@@ -12,16 +12,17 @@ class ReportGenerator:
12
 
13
  def generate_html_report(self, url: str, technical_data: Dict[str, Any],
14
  content_data: Dict[str, Any], competitor_data: List[Dict] = None,
15
- include_charts: bool = True) -> str:
 
16
  """Generate complete HTML SEO report"""
17
 
18
  # Generate charts
19
  charts_html = ""
20
  if include_charts:
21
- charts_html = self._generate_charts(technical_data, content_data, competitor_data)
22
 
23
- # Generate executive summary
24
- executive_summary = self._generate_executive_summary(technical_data, content_data)
25
 
26
  # Generate technical SEO section
27
  technical_section = self._generate_technical_section(technical_data)
@@ -29,6 +30,15 @@ class ReportGenerator:
29
  # Generate content audit section
30
  content_section = self._generate_content_section(content_data)
31
 
 
 
 
 
 
 
 
 
 
32
  # Generate competitor section
33
  competitor_section = ""
34
  if competitor_data:
@@ -48,15 +58,19 @@ class ReportGenerator:
48
  executive_summary=executive_summary,
49
  technical_section=technical_section,
50
  content_section=content_section,
 
 
51
  competitor_section=competitor_section,
52
  placeholder_sections=placeholder_sections,
53
- recommendations=recommendations
 
54
  )
55
 
56
  return report_html
57
 
58
  def _generate_charts(self, technical_data: Dict[str, Any], content_data: Dict[str, Any],
59
- competitor_data: List[Dict] = None) -> str:
 
60
  """Generate interactive charts using Plotly"""
61
  charts_html = ""
62
 
@@ -204,7 +218,8 @@ class ReportGenerator:
204
 
205
  return charts_html
206
 
207
- def _generate_executive_summary(self, technical_data: Dict[str, Any], content_data: Dict[str, Any]) -> str:
 
208
  """Generate executive summary section"""
209
  # Calculate overall health score
210
  mobile_perf = technical_data.get('mobile', {}).get('performance_score', 0)
@@ -650,6 +665,219 @@ class ReportGenerator:
650
  </div>
651
  """
652
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
653
  def _get_report_template(self) -> str:
654
  """Get the HTML template for the report"""
655
  return """
@@ -1084,6 +1312,16 @@ class ReportGenerator:
1084
  {content_section}
1085
  </div>
1086
 
 
 
 
 
 
 
 
 
 
 
1087
  {competitor_section}
1088
 
1089
  <div class="section">
@@ -1094,6 +1332,8 @@ class ReportGenerator:
1094
  <div class="section">
1095
  {recommendations}
1096
  </div>
 
 
1097
  </div>
1098
  </body>
1099
  </html>
 
12
 
13
  def generate_html_report(self, url: str, technical_data: Dict[str, Any],
14
  content_data: Dict[str, Any], competitor_data: List[Dict] = None,
15
+ keywords_data: Dict[str, Any] = None, backlinks_data: Dict[str, Any] = None,
16
+ llm_recommendations: Dict[str, Any] = None, include_charts: bool = True) -> str:
17
  """Generate complete HTML SEO report"""
18
 
19
  # Generate charts
20
  charts_html = ""
21
  if include_charts:
22
+ charts_html = self._generate_charts(technical_data, content_data, competitor_data, keywords_data, backlinks_data)
23
 
24
+ # Generate executive summary (now includes LLM insights)
25
+ executive_summary = self._generate_executive_summary(technical_data, content_data, llm_recommendations)
26
 
27
  # Generate technical SEO section
28
  technical_section = self._generate_technical_section(technical_data)
 
30
  # Generate content audit section
31
  content_section = self._generate_content_section(content_data)
32
 
33
+ # Generate keywords section
34
+ keywords_section = self._generate_keywords_section(keywords_data) if keywords_data else ""
35
+
36
+ # Generate backlinks section
37
+ backlinks_section = self._generate_backlinks_section(backlinks_data) if backlinks_data else ""
38
+
39
+ # Generate LLM recommendations section
40
+ recommendations_section = self._generate_recommendations_section(llm_recommendations) if llm_recommendations else ""
41
+
42
  # Generate competitor section
43
  competitor_section = ""
44
  if competitor_data:
 
58
  executive_summary=executive_summary,
59
  technical_section=technical_section,
60
  content_section=content_section,
61
+ keywords_section=keywords_section,
62
+ backlinks_section=backlinks_section,
63
  competitor_section=competitor_section,
64
  placeholder_sections=placeholder_sections,
65
+ recommendations=recommendations,
66
+ llm_recommendations=recommendations_section
67
  )
68
 
69
  return report_html
70
 
71
  def _generate_charts(self, technical_data: Dict[str, Any], content_data: Dict[str, Any],
72
+ competitor_data: List[Dict] = None, keywords_data: Dict[str, Any] = None,
73
+ backlinks_data: Dict[str, Any] = None) -> str:
74
  """Generate interactive charts using Plotly"""
75
  charts_html = ""
76
 
 
218
 
219
  return charts_html
220
 
221
+ def _generate_executive_summary(self, technical_data: Dict[str, Any], content_data: Dict[str, Any],
222
+ llm_recommendations: Dict[str, Any] = None) -> str:
223
  """Generate executive summary section"""
224
  # Calculate overall health score
225
  mobile_perf = technical_data.get('mobile', {}).get('performance_score', 0)
 
665
  </div>
666
  """
667
 
668
+ def _generate_keywords_section(self, keywords_data: Dict[str, Any]) -> str:
669
+ """Generate keywords analysis section"""
670
+ if keywords_data.get('placeholder'):
671
+ return f"""
672
+ <div class="placeholder-section">
673
+ <h3>πŸ” Keyword Rankings</h3>
674
+ <div class="placeholder-content">
675
+ <p><strong>No keyword data available.</strong></p>
676
+ <p>{keywords_data.get('message', 'Connect Google Search Console or SERP API to unlock keyword insights.')}</p>
677
+ </div>
678
+ </div>
679
+ """
680
+
681
+ total = keywords_data.get('total_keywords', 0)
682
+ pos_dist = keywords_data.get('position_distribution', {})
683
+ best_keywords = keywords_data.get('best_keywords', [])
684
+ opportunity_keywords = keywords_data.get('opportunity_keywords', [])
685
+
686
+ # Create position distribution chart
687
+ pos_chart = ""
688
+ if pos_dist:
689
+ import plotly.graph_objects as go
690
+ from plotly.offline import plot
691
+
692
+ labels = ['Top 3', 'Top 10', 'Top 50', 'Beyond 50']
693
+ values = [
694
+ pos_dist.get('top_3', 0),
695
+ pos_dist.get('top_10', 0) - pos_dist.get('top_3', 0),
696
+ pos_dist.get('top_50', 0) - pos_dist.get('top_10', 0),
697
+ pos_dist.get('beyond_50', 0)
698
+ ]
699
+
700
+ fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=0.4)])
701
+ fig.update_layout(title="Keyword Position Distribution", height=400)
702
+ pos_chart = plot(fig, include_plotlyjs=False, output_type='div')
703
+
704
+ best_keywords_html = ""
705
+ if best_keywords:
706
+ best_keywords_html = "<h4>πŸ† Top Performing Keywords</h4><table class='data-table'><tr><th>Keyword</th><th>Position</th><th>Clicks</th><th>Impressions</th></tr>"
707
+ for kw in best_keywords[:10]:
708
+ best_keywords_html += f"""
709
+ <tr>
710
+ <td>{kw.get('keyword', '')}</td>
711
+ <td>{kw.get('position', 0)}</td>
712
+ <td>{kw.get('clicks', 0)}</td>
713
+ <td>{kw.get('impressions', 0)}</td>
714
+ </tr>
715
+ """
716
+ best_keywords_html += "</table>"
717
+
718
+ opportunity_html = ""
719
+ if opportunity_keywords:
720
+ opportunity_html = "<h4>πŸš€ Opportunity Keywords</h4><table class='data-table'><tr><th>Keyword</th><th>Position</th><th>Impressions</th><th>CTR</th></tr>"
721
+ for kw in opportunity_keywords[:10]:
722
+ opportunity_html += f"""
723
+ <tr>
724
+ <td>{kw.get('keyword', '')}</td>
725
+ <td>{kw.get('position', 0)}</td>
726
+ <td>{kw.get('impressions', 0)}</td>
727
+ <td>{kw.get('ctr', 0)}%</td>
728
+ </tr>
729
+ """
730
+ opportunity_html += "</table>"
731
+
732
+ return f"""
733
+ <div class="card">
734
+ <h3>πŸ” Keyword Rankings Analysis</h3>
735
+ <div class="metrics-grid">
736
+ <div class="metric-card">
737
+ <div class="metric-value">{total}</div>
738
+ <div class="metric-label">Total Keywords</div>
739
+ </div>
740
+ <div class="metric-card">
741
+ <div class="metric-value">{pos_dist.get('top_10', 0)}</div>
742
+ <div class="metric-label">Top 10 Rankings</div>
743
+ </div>
744
+ <div class="metric-card">
745
+ <div class="metric-value">{len(opportunity_keywords)}</div>
746
+ <div class="metric-label">Opportunities</div>
747
+ </div>
748
+ <div class="metric-card">
749
+ <div class="metric-value">{keywords_data.get('data_source', 'Unknown')}</div>
750
+ <div class="metric-label">Data Source</div>
751
+ </div>
752
+ </div>
753
+ {pos_chart}
754
+ {best_keywords_html}
755
+ {opportunity_html}
756
+ </div>
757
+ """
758
+
759
+ def _generate_backlinks_section(self, backlinks_data: Dict[str, Any]) -> str:
760
+ """Generate backlinks analysis section"""
761
+ if backlinks_data.get('placeholder'):
762
+ return f"""
763
+ <div class="placeholder-section">
764
+ <h3>πŸ”— Backlink Profile</h3>
765
+ <div class="placeholder-content">
766
+ <p><strong>No backlink data available.</strong></p>
767
+ <p>{backlinks_data.get('message', 'Add RapidAPI key to unlock comprehensive backlink insights.')}</p>
768
+ </div>
769
+ </div>
770
+ """
771
+
772
+ total_backlinks = backlinks_data.get('total_backlinks', 0)
773
+ total_ref_domains = backlinks_data.get('total_ref_domains', 0)
774
+ domain_rating = backlinks_data.get('domain_rating', 0)
775
+ monthly_changes = backlinks_data.get('monthly_changes', {})
776
+ referring_domains = backlinks_data.get('referring_domains', [])
777
+ anchor_distribution = backlinks_data.get('anchor_distribution', [])
778
+
779
+ # Create anchor text distribution chart
780
+ anchor_chart = ""
781
+ if anchor_distribution:
782
+ import plotly.graph_objects as go
783
+ from plotly.offline import plot
784
+
785
+ anchors = [a.get('anchor_text', '')[:30] for a in anchor_distribution[:10]]
786
+ counts = [a.get('backlinks', 0) for a in anchor_distribution[:10]]
787
+
788
+ fig = go.Figure(data=[go.Bar(x=anchors, y=counts)])
789
+ fig.update_layout(title="Top Anchor Text Distribution", height=400, xaxis={'tickangle': 45})
790
+ anchor_chart = plot(fig, include_plotlyjs=False, output_type='div')
791
+
792
+ ref_domains_html = ""
793
+ if referring_domains:
794
+ ref_domains_html = "<h4>🏒 Top Referring Domains</h4><table class='data-table'><tr><th>Domain</th><th>Domain Rating</th><th>Backlinks</th><th>First Seen</th></tr>"
795
+ for rd in referring_domains[:10]:
796
+ ref_domains_html += f"""
797
+ <tr>
798
+ <td>{rd.get('domain', '')}</td>
799
+ <td>{rd.get('domain_rating', 0)}</td>
800
+ <td>{rd.get('backlinks', 0)}</td>
801
+ <td>{rd.get('first_seen', 'N/A')}</td>
802
+ </tr>
803
+ """
804
+ ref_domains_html += "</table>"
805
+
806
+ return f"""
807
+ <div class="card">
808
+ <h3>πŸ”— Backlink Profile Analysis</h3>
809
+ <div class="metrics-grid">
810
+ <div class="metric-card">
811
+ <div class="metric-value">{total_backlinks:,}</div>
812
+ <div class="metric-label">Total Backlinks</div>
813
+ </div>
814
+ <div class="metric-card">
815
+ <div class="metric-value">{total_ref_domains:,}</div>
816
+ <div class="metric-label">Referring Domains</div>
817
+ </div>
818
+ <div class="metric-card">
819
+ <div class="metric-value">{domain_rating}</div>
820
+ <div class="metric-label">Domain Rating</div>
821
+ </div>
822
+ <div class="metric-card">
823
+ <div class="metric-value">{monthly_changes.get('net_change', 0):+d}</div>
824
+ <div class="metric-label">Monthly Change</div>
825
+ </div>
826
+ </div>
827
+ {anchor_chart}
828
+ {ref_domains_html}
829
+ </div>
830
+ """
831
+
832
+ def _generate_recommendations_section(self, llm_recommendations: Dict[str, Any]) -> str:
833
+ """Generate LLM-powered recommendations section"""
834
+ if not llm_recommendations:
835
+ return ""
836
+
837
+ recommendations = llm_recommendations.get('recommendations', [])
838
+ executive_insights = llm_recommendations.get('executive_insights', [])
839
+ priority_actions = llm_recommendations.get('priority_actions', [])
840
+
841
+ insights_html = ""
842
+ if executive_insights:
843
+ insights_html = "<div class='executive-insights'><h4>🎯 Executive Insights</h4><ul>"
844
+ for insight in executive_insights:
845
+ insights_html += f"<li>{insight}</li>"
846
+ insights_html += "</ul></div>"
847
+
848
+ priority_html = ""
849
+ if priority_actions:
850
+ priority_html = "<div class='priority-actions'><h4>πŸ”₯ Priority Actions</h4>"
851
+ for i, action in enumerate(priority_actions[:3], 1):
852
+ priority_html += f"""
853
+ <div class="priority-action">
854
+ <div class="action-number">{i}</div>
855
+ <div class="action-content">
856
+ <div class="action-title">{action.get('title', '')}</div>
857
+ <div class="action-description">{action.get('description', '')}</div>
858
+ <span class="action-priority">{action.get('priority', 'MEDIUM')}</span>
859
+ </div>
860
+ </div>
861
+ """
862
+ priority_html += "</div>"
863
+
864
+ recommendations_html = ""
865
+ if recommendations:
866
+ recommendations_html = "<div class='llm-recommendations'><h4>πŸ€– AI-Generated Recommendations</h4><ul>"
867
+ for rec in recommendations:
868
+ recommendations_html += f"<li>{rec}</li>"
869
+ recommendations_html += "</ul></div>"
870
+
871
+ return f"""
872
+ <div class="card">
873
+ <h3>🧠 Smart Recommendations</h3>
874
+ <p class="data-source">Generated by {llm_recommendations.get('data_source', 'AI Analysis')}</p>
875
+ {insights_html}
876
+ {priority_html}
877
+ {recommendations_html}
878
+ </div>
879
+ """
880
+
881
  def _get_report_template(self) -> str:
882
  """Get the HTML template for the report"""
883
  return """
 
1312
  {content_section}
1313
  </div>
1314
 
1315
+ <div class="section">
1316
+ <h2>πŸ” Keywords Analysis</h2>
1317
+ {keywords_section}
1318
+ </div>
1319
+
1320
+ <div class="section">
1321
+ <h2>πŸ”— Backlinks Profile</h2>
1322
+ {backlinks_section}
1323
+ </div>
1324
+
1325
  {competitor_section}
1326
 
1327
  <div class="section">
 
1332
  <div class="section">
1333
  {recommendations}
1334
  </div>
1335
+
1336
+ {llm_recommendations}
1337
  </div>
1338
  </body>
1339
  </html>
requirements.txt CHANGED
@@ -1,5 +1,9 @@
1
  # Thinkly Labs SEO - Dependencies
 
 
2
  flask
 
 
3
  requests
4
  beautifulsoup4
5
  pandas
@@ -8,4 +12,14 @@ jinja2
8
  validators
9
  urllib3
10
  lxml
11
- reportlab
 
 
 
 
 
 
 
 
 
 
 
1
  # Thinkly Labs SEO - Dependencies
2
+
3
+ # Core Framework
4
  flask
5
+
6
+ # Data Processing & Analysis
7
  requests
8
  beautifulsoup4
9
  pandas
 
12
  validators
13
  urllib3
14
  lxml
15
+
16
+ # PDF Generation
17
+ reportlab
18
+
19
+ # AI/LLM Integration
20
+ groq
21
+ python-dotenv
22
+
23
+ # API Integrations (Optional - set via environment variables)
24
+ # google-api-python-client # For Google Search Console
25
+ # oauth2client # For GSC authentication
simple_pdf_generator.py CHANGED
@@ -4,6 +4,7 @@ or browser-based PDF conversion instructions
4
  """
5
 
6
  import io
 
7
  from typing import Dict, Any
8
 
9
  class SimplePDFGenerator:
 
4
  """
5
 
6
  import io
7
+ import re
8
  from typing import Dict, Any
9
 
10
  class SimplePDFGenerator: