Spaces:
Running
Running
| import os | |
| import json | |
| from typing import Dict, Any, List | |
| from groq import Groq | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| class LLMRecommendations: | |
| def __init__(self): | |
| try: | |
| self.client = Groq(api_key=os.getenv('GROQ_API_KEY')) | |
| self.available = True | |
| except Exception: | |
| self.client = None | |
| self.available = False | |
| def generate_recommendations(self, url: str, technical_data: Dict[str, Any], | |
| content_data: Dict[str, Any], keywords_data: Dict[str, Any], | |
| backlinks_data: Dict[str, Any]) -> Dict[str, Any]: | |
| if not self.available: | |
| return self._generate_fallback_recommendations(technical_data, content_data, keywords_data, backlinks_data) | |
| try: | |
| context = self._prepare_context(url, technical_data, content_data, keywords_data, backlinks_data) | |
| recommendations = self._query_llm(context) | |
| return { | |
| 'recommendations_markdown': recommendations, | |
| 'executive_insights': self._generate_executive_insights(context), | |
| 'priority_actions': self._extract_priority_actions([recommendations]), | |
| 'data_source': 'Groq LLM Analysis', | |
| 'generated_at': context['analysis_date'] | |
| } | |
| except Exception as e: | |
| return self._generate_fallback_recommendations(technical_data, content_data, keywords_data, backlinks_data, error=str(e)) | |
| def _prepare_context(self, url: str, technical_data: Dict, content_data: Dict, | |
| keywords_data: Dict, backlinks_data: Dict) -> Dict[str, Any]: | |
| context = { | |
| 'website': url, | |
| 'analysis_date': technical_data.get('last_updated', ''), | |
| 'technical_seo': { | |
| 'mobile_score': technical_data.get('mobile_score', 0), | |
| 'desktop_score': technical_data.get('desktop_score', 0), | |
| 'core_web_vitals': technical_data.get('core_web_vitals', {}), | |
| 'issues_count': len(technical_data.get('issues', [])), | |
| 'top_issues': technical_data.get('issues', [])[:3] | |
| }, | |
| 'content_audit': { | |
| 'pages_analyzed': content_data.get('pages_analyzed', 0), | |
| 'metadata_completeness': content_data.get('metadata_completeness', {}), | |
| 'avg_word_count': content_data.get('avg_word_count', 0), | |
| 'cta_presence': content_data.get('cta_presence', 0), | |
| 'content_freshness': content_data.get('content_freshness', {}) | |
| }, | |
| 'keywords': { | |
| 'total_keywords': keywords_data.get('total_keywords', 0), | |
| 'position_distribution': keywords_data.get('position_distribution', {}), | |
| 'data_available': not keywords_data.get('placeholder', False), | |
| 'opportunity_keywords': len(keywords_data.get('opportunity_keywords', [])), | |
| 'data_source': keywords_data.get('data_source', 'Unknown') | |
| }, | |
| 'backlinks': { | |
| 'total_backlinks': backlinks_data.get('total_backlinks', 0), | |
| 'total_ref_domains': backlinks_data.get('total_ref_domains', 0), | |
| 'domain_rating': backlinks_data.get('domain_rating', 0), | |
| 'monthly_changes': backlinks_data.get('monthly_changes', {}), | |
| 'data_available': not backlinks_data.get('placeholder', False), | |
| 'data_source': backlinks_data.get('data_source', 'Unknown') | |
| } | |
| } | |
| return context | |
| def _query_llm(self, context: Dict[str, Any]) -> List[str]: | |
| prompt = f""" | |
| You are an expert SEO consultant analyzing a comprehensive SEO audit for {context['website']}. Based on the data below, provide specific, actionable SEO recommendations. | |
| TECHNICAL SEO DATA: | |
| - Mobile Performance Score: {context['technical_seo']['mobile_score']}/100 | |
| - Desktop Performance Score: {context['technical_seo']['desktop_score']}/100 | |
| - Core Web Vitals: {json.dumps(context['technical_seo']['core_web_vitals'])} | |
| - Critical Issues Found: {context['technical_seo']['issues_count']} | |
| - Top Issues: {context['technical_seo']['top_issues']} | |
| CONTENT AUDIT DATA: | |
| - Pages Analyzed: {context['content_audit']['pages_analyzed']} | |
| - Metadata Completeness: {json.dumps(context['content_audit']['metadata_completeness'])} | |
| - Average Word Count: {context['content_audit']['avg_word_count']} | |
| - CTA Presence: {context['content_audit']['cta_presence']}% | |
| - Content Freshness: {json.dumps(context['content_audit']['content_freshness'])} | |
| KEYWORDS DATA: | |
| - Total Keywords Tracked: {context['keywords']['total_keywords']} | |
| - Position Distribution: {json.dumps(context['keywords']['position_distribution'])} | |
| - Data Available: {context['keywords']['data_available']} | |
| - Opportunity Keywords: {context['keywords']['opportunity_keywords']} | |
| - Source: {context['keywords']['data_source']} | |
| BACKLINKS DATA: | |
| - Total Backlinks: {context['backlinks']['total_backlinks']} | |
| - Referring Domains: {context['backlinks']['total_ref_domains']} | |
| - Domain Rating: {context['backlinks']['domain_rating']} | |
| - Monthly Changes: {json.dumps(context['backlinks']['monthly_changes'])} | |
| - Data Available: {context['backlinks']['data_available']} | |
| - Source: {context['backlinks']['data_source']} | |
| CRITICAL INSTRUCTIONS: | |
| 1. Analyze the data holistically across all 4 modules | |
| 2. Identify the TOP 3 most critical issues that need immediate attention | |
| 3. Provide specific, actionable recommendations with clear steps | |
| 4. If API data is missing (placeholder: true), acknowledge this and focus on available data | |
| 5. Prioritize recommendations by potential impact and ease of implementation | |
| 6. Include technical optimizations, content improvements, keyword opportunities, and link building strategies | |
| 7. Provide estimated timelines and resources needed for each recommendation | |
| 8. IMPORTANT: Use ONLY plain text format with markdown syntax - NO tables, NO complex formatting, NO HTML | |
| 9. Format your response as clean markdown that can be rendered properly | |
| Generate exactly 8-12 specific recommendations using simple markdown format: | |
| ## Priority: HIGH/MEDIUM/LOW | |
| **Action Title** | |
| Description with clear steps and expected impact. | |
| Timeline: X weeks | |
| Priority Levels: HIGH, MEDIUM, LOW | |
| Focus on actionable items that can be implemented within 30-90 days. | |
| Use simple markdown formatting only - headers, bold text, and bullet points. | |
| Response: | |
| """ | |
| try: | |
| chat_completion = self.client.chat.completions.create( | |
| messages=[ | |
| {'role': 'user', 'content': prompt} | |
| ], | |
| model="openai/gpt-oss-120b", | |
| stream=False, | |
| temperature=0.1, | |
| max_tokens=3000 | |
| ) | |
| response = chat_completion.choices[0].message.content.strip() | |
| # Return the full markdown response instead of parsing individual recommendations | |
| return response | |
| except Exception as e: | |
| return [f"LLM Error: {str(e)}"] | |
| def _generate_executive_insights(self, context: Dict[str, Any]) -> List[str]: | |
| insights = [] | |
| mobile_score = context['technical_seo']['mobile_score'] | |
| desktop_score = context['technical_seo']['desktop_score'] | |
| avg_score = (mobile_score + desktop_score) / 2 | |
| if avg_score < 50: | |
| insights.append(f"π΄ Critical: Website performance is severely impacting user experience (avg: {avg_score:.0f}/100)") | |
| elif avg_score < 75: | |
| insights.append(f"π‘ Warning: Website performance needs improvement (avg: {avg_score:.0f}/100)") | |
| else: | |
| insights.append(f"π’ Good: Website performance is solid (avg: {avg_score:.0f}/100)") | |
| pages = context['content_audit']['pages_analyzed'] | |
| if pages > 0: | |
| metadata = context['content_audit']['metadata_completeness'] | |
| title_pct = (metadata.get('with_title', 0) / pages * 100) if pages > 0 else 0 | |
| if title_pct < 80: | |
| insights.append(f"π΄ Content Issue: {100-title_pct:.0f}% of pages missing critical metadata") | |
| else: | |
| insights.append(f"π’ Content Quality: Metadata completeness is good ({title_pct:.0f}%)") | |
| if context['keywords']['data_available']: | |
| total_keywords = context['keywords']['total_keywords'] | |
| pos_dist = context['keywords']['position_distribution'] | |
| top_10_pct = (pos_dist.get('top_10', 0) / total_keywords * 100) if total_keywords > 0 else 0 | |
| if top_10_pct < 15: | |
| insights.append(f"π΄ SEO Visibility: Only {top_10_pct:.0f}% of keywords rank in top 10") | |
| elif top_10_pct < 30: | |
| insights.append(f"π‘ SEO Opportunity: {top_10_pct:.0f}% of keywords in top 10 - room for growth") | |
| else: | |
| insights.append(f"π’ Strong SEO: {top_10_pct:.0f}% of keywords ranking in top 10") | |
| else: | |
| insights.append("π Connect keyword tracking tools for visibility insights") | |
| if context['backlinks']['data_available']: | |
| ref_domains = context['backlinks']['total_ref_domains'] | |
| domain_rating = context['backlinks']['domain_rating'] | |
| if ref_domains < 50: | |
| insights.append(f"π΄ Link Building: Low referring domains ({ref_domains}) - aggressive outreach needed") | |
| elif ref_domains < 200: | |
| insights.append(f"π‘ Authority Building: Moderate link profile ({ref_domains} domains)") | |
| else: | |
| insights.append(f"π’ Strong Authority: Healthy backlink profile ({ref_domains} referring domains)") | |
| else: | |
| insights.append("π Connect backlink analysis tools for authority insights") | |
| return insights | |
| def _extract_priority_actions(self, recommendations: List[str]) -> List[Dict[str, str]]: | |
| priority_actions = [] | |
| # Handle the case where recommendations is a single string (markdown) | |
| if isinstance(recommendations, list) and len(recommendations) == 1: | |
| markdown_text = recommendations[0] | |
| elif isinstance(recommendations, str): | |
| markdown_text = recommendations | |
| else: | |
| markdown_text = "" | |
| # Extract high priority actions from markdown | |
| if markdown_text: | |
| lines = markdown_text.split('\n') | |
| current_priority = None | |
| current_title = None | |
| current_description = [] | |
| for line in lines: | |
| line = line.strip() | |
| if line.startswith('## Priority:'): | |
| # Save previous action if exists | |
| if current_title and current_priority == 'HIGH': | |
| priority_actions.append({ | |
| 'title': current_title, | |
| 'description': ' '.join(current_description).strip(), | |
| 'priority': 'HIGH' | |
| }) | |
| # Start new action | |
| current_priority = line.replace('## Priority:', '').strip() | |
| current_title = None | |
| current_description = [] | |
| elif line.startswith('**') and line.endswith('**'): | |
| current_title = line.replace('**', '').strip() | |
| elif line and not line.startswith('#'): | |
| current_description.append(line) | |
| # Save last action if exists | |
| if current_title and current_priority == 'HIGH': | |
| priority_actions.append({ | |
| 'title': current_title, | |
| 'description': ' '.join(current_description).strip(), | |
| 'priority': 'HIGH' | |
| }) | |
| # Fallback for old format | |
| if not priority_actions and isinstance(recommendations, list): | |
| for rec in recommendations: | |
| if '**HIGH**' in rec or '**CRITICAL**' in rec: | |
| parts = rec.replace('**HIGH**', '').replace('**CRITICAL**', '').strip() | |
| if ':' in parts: | |
| title, description = parts.split(':', 1) | |
| priority_actions.append({ | |
| 'title': title.strip(), | |
| 'description': description.strip(), | |
| 'priority': 'HIGH' | |
| }) | |
| if not priority_actions and recommendations: | |
| for i, rec in enumerate(recommendations[:3]): | |
| if ':' in rec: | |
| title, description = rec.split(':', 1) | |
| priority_actions.append({ | |
| 'title': title.replace('*', '').strip(), | |
| 'description': description.strip(), | |
| 'priority': 'HIGH' | |
| }) | |
| return priority_actions[:5] | |
| def _generate_fallback_recommendations(self, technical_data: Dict, content_data: Dict, | |
| keywords_data: Dict, backlinks_data: Dict, error: str = None) -> Dict[str, Any]: | |
| recommendations = [] | |
| mobile_score = technical_data.get('mobile_score', 0) | |
| desktop_score = technical_data.get('desktop_score', 0) | |
| if mobile_score < 50: | |
| recommendations.append("**HIGH** Improve Mobile Performance: Optimize images, reduce JavaScript, enable compression") | |
| if desktop_score < 50: | |
| recommendations.append("**HIGH** Improve Desktop Performance: Optimize server response time, minimize CSS and JavaScript") | |
| pages = content_data.get('pages_analyzed', 0) | |
| if pages > 0: | |
| metadata = content_data.get('metadata_completeness', {}) | |
| if metadata.get('with_title', 0) < pages * 0.8: | |
| recommendations.append("**HIGH** Fix Metadata: Add missing title tags and meta descriptions") | |
| if content_data.get('avg_word_count', 0) < 300: | |
| recommendations.append("**MEDIUM** Enhance Content: Increase average page content length") | |
| if not keywords_data.get('placeholder', False): | |
| total_keywords = keywords_data.get('total_keywords', 0) | |
| pos_dist = keywords_data.get('position_distribution', {}) | |
| if total_keywords > 0 and pos_dist.get('top_10', 0) < total_keywords * 0.2: | |
| recommendations.append("**HIGH** Improve Keyword Rankings: Focus on on-page SEO for underperforming keywords") | |
| else: | |
| recommendations.append("**MEDIUM** Set Up Keyword Tracking: Connect Google Search Console for keyword insights") | |
| if not backlinks_data.get('placeholder', False): | |
| ref_domains = backlinks_data.get('total_ref_domains', 0) | |
| if ref_domains < 50: | |
| recommendations.append("**HIGH** Build Authority: Implement aggressive link building and outreach strategy") | |
| else: | |
| recommendations.append("**MEDIUM** Set Up Backlink Monitoring: Add RapidAPI key for comprehensive link analysis") | |
| if not recommendations: | |
| recommendations = [ | |
| "**HIGH** Audit Technical Issues: Review site speed and mobile performance", | |
| "**MEDIUM** Optimize Content Strategy: Ensure all pages have unique, valuable content", | |
| "**LOW** Monitor SEO Performance: Set up tracking for keywords and backlinks" | |
| ] | |
| insights = [ | |
| "π Basic SEO analysis completed - connect APIs for deeper insights", | |
| f"π Analyzed {pages} pages for content quality", | |
| "β οΈ Enhanced recommendations require API integrations" | |
| ] | |
| if error: | |
| insights.append(f"β LLM Error: {error}") | |
| # Convert recommendations list to markdown format | |
| markdown_recommendations = "\n".join([f"## Priority: HIGH\n**{rec.replace('**HIGH**', '').replace('**MEDIUM**', '').replace('**LOW**', '').strip()}**\n" for rec in recommendations]) | |
| return { | |
| 'recommendations_markdown': markdown_recommendations, | |
| 'executive_insights': insights, | |
| 'priority_actions': [ | |
| { | |
| 'title': 'Connect SEO APIs', | |
| 'description': 'Set up Google Search Console and RapidAPI for comprehensive analysis', | |
| 'priority': 'HIGH' | |
| } | |
| ], | |
| 'data_source': 'Fallback Analysis', | |
| 'generated_at': technical_data.get('last_updated', '') | |
| } |