Spaces:
Running
Running
| from flask import Flask, render_template, request, jsonify, send_file, redirect, url_for, session | |
| import validators | |
| import os | |
| import tempfile | |
| import uuid | |
| from urllib.parse import urlparse | |
| from typing import Dict, Any, List | |
| # Load environment variables from .env file | |
| try: | |
| from dotenv import load_dotenv | |
| load_dotenv() | |
| except ImportError: | |
| print("python-dotenv not installed. Using system environment variables only.") | |
| from modules.technical_seo import TechnicalSEOModule | |
| from modules.content_audit import ContentAuditModule | |
| from modules.keywords import KeywordsModule | |
| from modules.backlinks import BacklinksModule | |
| from report_generator import ReportGenerator | |
| from simple_pdf_generator import SimplePDFGenerator | |
| from llm_recommendations import LLMRecommendations | |
| from gsc_client import GSCClient | |
| from utils import safe_pct | |
| from benchmarks import BENCHMARKS, badge | |
| app = Flask(__name__, static_folder='static') | |
| app.secret_key = os.getenv('FLASK_SECRET_KEY', 'seo_report_generator_2024') | |
| technical_module = TechnicalSEOModule(api_key=os.getenv('GOOGLE_API_KEY')) | |
| content_module = ContentAuditModule() | |
| keywords_module = KeywordsModule() | |
| backlinks_module = BacklinksModule() | |
| report_gen = ReportGenerator() | |
| pdf_gen = SimplePDFGenerator() | |
| llm_recommendations = LLMRecommendations() | |
| try: | |
| gsc_client = GSCClient() | |
| except ImportError as e: | |
| print(f"GSC client not available: {e}") | |
| gsc_client = None | |
| reports_store = {} | |
| def _transform_keywords_data(new_data: Dict[str, Any]) -> Dict[str, Any]: | |
| if not new_data or new_data.get('placeholder'): | |
| return { | |
| 'placeholder': True, | |
| 'message': 'No keyword data available', | |
| 'total_keywords': 0, | |
| 'position_distribution': {'top_3': 0, 'top_10': 0, 'top_50': 0, 'beyond_50': 0}, | |
| 'best_keywords': [], | |
| 'opportunity_keywords': [], | |
| 'data_source': 'Analysis failed' | |
| } | |
| totals = new_data.get('totals', {}) | |
| distribution = new_data.get('distribution', {}) | |
| movement = new_data.get('movement', {}) | |
| best_keywords = new_data.get('best_keywords', []) | |
| declining_keywords = new_data.get('declining_keywords', []) | |
| opportunities = new_data.get('opportunities', []) | |
| data_sources = new_data.get('data_sources', {}) | |
| pos_dist = { | |
| 'top_3': distribution.get('top3', 0), | |
| 'top_10': distribution.get('top10', 0), | |
| 'top_50': distribution.get('top50', 0), | |
| 'beyond_50': totals.get('keywords', 0) - distribution.get('top50', 0) | |
| } | |
| transformed_best_keywords = [] | |
| for kw in best_keywords: | |
| transformed_best_keywords.append({ | |
| 'keyword': kw.get('keyword', ''), | |
| 'position': kw.get('rank', 0), | |
| 'clicks': 0, | |
| 'impressions': kw.get('volume', 0), | |
| 'url': kw.get('url', ''), | |
| 'estimated_traffic': kw.get('estimated_traffic', 0), | |
| 'trend': kw.get('trend', 'stable') | |
| }) | |
| transformed_opportunities = [] | |
| for opp in opportunities: | |
| transformed_opportunities.append({ | |
| 'keyword': opp.get('keyword', ''), | |
| 'position': 0, | |
| 'impressions': opp.get('volume', 0), | |
| 'ctr': 0, | |
| 'competitor_rank': opp.get('competitor_rank', 0), | |
| 'priority_score': opp.get('priority_score', 0), | |
| 'competitor_domain': opp.get('competitor_domain', '') | |
| }) | |
| return { | |
| 'total_keywords': totals.get('keywords', 0), | |
| 'estimated_traffic': totals.get('estimated_traffic', 0), | |
| 'position_distribution': pos_dist, | |
| 'movement': movement, | |
| 'best_keywords': transformed_best_keywords, | |
| 'declining_keywords': declining_keywords, | |
| 'opportunity_keywords': transformed_opportunities, | |
| 'competitor_summary': new_data.get('competitor_summary', []), | |
| 'data_source': f"{data_sources.get('positions', 'Unknown')} + {data_sources.get('volume', 'Unknown')}", | |
| 'enrichment_rate': data_sources.get('enrichment_rate', 0), | |
| 'meta': new_data.get('meta', {}), | |
| 'placeholder': False | |
| } | |
| def index(): | |
| return render_template('index.html') | |
| def generate_report(): | |
| try: | |
| data = request.json | |
| url = data.get('url', '').strip() | |
| competitors = data.get('competitors', []) | |
| if not url: | |
| return jsonify({'error': 'Website URL is required'}), 400 | |
| if not validators.url(url): | |
| return jsonify({'error': 'Please enter a valid URL'}), 400 | |
| report_id = str(uuid.uuid4()) | |
| competitor_domains = [] | |
| competitor_list = [] | |
| for comp in competitors: | |
| comp = comp.strip() | |
| if comp and validators.url(comp): | |
| competitor_list.append(comp) | |
| domain = urlparse(comp).netloc.replace('www.', '') | |
| competitor_domains.append(domain) | |
| technical_data = technical_module.analyze(url) | |
| content_data = content_module.analyze(url) | |
| # Check if GSC should be used | |
| use_gsc = False | |
| if gsc_client and 'gsc_tokens' in session and gsc_client.property_url: | |
| domain = urlparse(url).netloc.replace('www.', '') | |
| property_domain = urlparse(gsc_client.property_url).netloc.replace('www.', '') | |
| if domain == property_domain: | |
| use_gsc = True | |
| # Analyze keywords | |
| if use_gsc: | |
| keywords_result = app._analyze_with_gsc(url, competitor_domains) | |
| else: | |
| keywords_result = keywords_module.analyze(url, competitor_domains=competitor_domains) | |
| if not keywords_result.success: | |
| keywords_data = { | |
| 'placeholder': True, | |
| 'message': f'Keywords analysis failed: {keywords_result.error}', | |
| 'total_keywords': 0, | |
| 'position_distribution': {'top_3': 0, 'top_10': 0, 'top_50': 0, 'beyond_50': 0}, | |
| 'best_keywords': [], | |
| 'opportunity_keywords': [], | |
| 'data_source': 'Analysis failed' | |
| } | |
| else: | |
| keywords_data = _transform_keywords_data(keywords_result.data) | |
| print(f"DEBUG: Starting backlinks analysis for {url}") | |
| backlinks_result = backlinks_module.analyze(url) | |
| backlinks_data = backlinks_result.data | |
| print(f"DEBUG: Backlinks analysis result - Success: {backlinks_result.success}") | |
| print(f"DEBUG: Backlinks data keys: {list(backlinks_data.keys())}") | |
| if backlinks_data.get('total_backlinks'): | |
| print(f"DEBUG: Total backlinks found: {backlinks_data.get('total_backlinks')}") | |
| if backlinks_data.get('placeholder'): | |
| print(f"DEBUG: Using placeholder data: {backlinks_data.get('message')}") | |
| llm_rec_data = llm_recommendations.generate_recommendations( | |
| url, technical_data, content_data, keywords_data, backlinks_data | |
| ) | |
| competitor_data = [] | |
| for comp_url in competitor_list: | |
| comp_technical = technical_module.analyze(comp_url) | |
| comp_content = content_module.analyze(comp_url, quick_scan=True) | |
| comp_keywords_result = keywords_module.analyze(comp_url, competitor_domains=[], quick_scan=True) | |
| if comp_keywords_result.success: | |
| comp_keywords = _transform_keywords_data(comp_keywords_result.data) | |
| else: | |
| comp_keywords = { | |
| 'placeholder': True, | |
| 'message': f'Keywords analysis failed: {comp_keywords_result.error}', | |
| 'total_keywords': 0, | |
| 'position_distribution': {'top_3': 0, 'top_10': 0, 'top_50': 0, 'beyond_50': 0}, | |
| 'best_keywords': [], | |
| 'opportunity_keywords': [], | |
| 'data_source': 'Analysis failed' | |
| } | |
| comp_backlinks_result = backlinks_module.analyze(comp_url, quick_scan=True) | |
| comp_backlinks = comp_backlinks_result.data | |
| competitor_data.append({ | |
| 'url': comp_url, | |
| 'technical': comp_technical, | |
| 'content': comp_content, | |
| 'keywords': comp_keywords, | |
| 'backlinks': comp_backlinks | |
| }) | |
| report_html = report_gen.generate_html_report( | |
| url=url, | |
| technical_data=technical_data, | |
| content_data=content_data, | |
| competitor_data=competitor_data, | |
| keywords_data=keywords_data, | |
| backlinks_data=backlinks_data, | |
| llm_recommendations=llm_rec_data, | |
| include_charts=True | |
| ) | |
| reports_store[report_id] = { | |
| 'url': url, | |
| 'html': report_html, | |
| 'technical_data': technical_data, | |
| 'content_data': content_data, | |
| 'keywords_data': keywords_data, | |
| 'backlinks_data': backlinks_data, | |
| 'llm_recommendations': llm_rec_data, | |
| 'competitor_data': competitor_data | |
| } | |
| return jsonify({ | |
| 'success': True, | |
| 'report_id': report_id, | |
| 'redirect_url': f'/report/{report_id}' | |
| }) | |
| except Exception as e: | |
| return jsonify({'error': f'Error generating report: {str(e)}'}), 500 | |
| def view_report(report_id): | |
| if report_id not in reports_store: | |
| return redirect(url_for('index')) | |
| report_data = reports_store[report_id] | |
| return render_template('report.html', | |
| report_html=report_data['html'], | |
| report_id=report_id, | |
| url=report_data['url']) | |
| def download_html(report_id): | |
| if report_id not in reports_store: | |
| return jsonify({'error': 'Report not found'}), 404 | |
| report_data = reports_store[report_id] | |
| with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f: | |
| f.write(report_data['html']) | |
| temp_path = f.name | |
| filename = f"seo_report_{report_data['url'].replace('https://', '').replace('http://', '').replace('/', '_')}.html" | |
| return send_file(temp_path, as_attachment=True, download_name=filename, mimetype='text/html') | |
| # PDF Download removed - now using browser print functionality | |
| def _analyze_with_gsc(url: str, competitor_domains: List[str]): | |
| """Analyze keywords using GSC as primary source""" | |
| try: | |
| gsc_tokens = session.get('gsc_tokens', {}) | |
| if not gsc_tokens.get('access_token'): | |
| return keywords_module.analyze(url, competitor_domains=competitor_domains) | |
| # Fetch GSC data using the updated method | |
| gsc_data = gsc_client.get_search_analytics(gsc_tokens) | |
| transformed_data = gsc_client.transform_gsc_data(gsc_data, urlparse(url).netloc) | |
| # Update session with potentially refreshed tokens | |
| session['gsc_tokens'] = gsc_tokens | |
| from modules.keywords import ModuleResult | |
| return ModuleResult(success=True, data=transformed_data) | |
| except Exception as e: | |
| print(f"GSC analysis failed: {e}") | |
| return keywords_module.analyze(url, competitor_domains=competitor_domains) | |
| app._analyze_with_gsc = _analyze_with_gsc | |
| def gsc_auth_start(): | |
| """Start GSC OAuth flow""" | |
| if not gsc_client: | |
| return jsonify({'error': 'Google Search Console integration not available. Install: pip install google-api-python-client google-auth-oauthlib google-auth'}), 500 | |
| try: | |
| auth_url = gsc_client.get_auth_url() | |
| return redirect(auth_url) | |
| except Exception as e: | |
| return jsonify({'error': f'OAuth setup failed: {str(e)}'}), 500 | |
| def gsc_auth_callback(): | |
| """Handle GSC OAuth callback""" | |
| auth_code = request.args.get('code') | |
| error = request.args.get('error') | |
| if error: | |
| return redirect(url_for('index', error=f'OAuth error: {error}')) | |
| if not auth_code: | |
| return redirect(url_for('index', error='No authorization code received')) | |
| try: | |
| tokens = gsc_client.exchange_code(auth_code) | |
| session['gsc_tokens'] = tokens | |
| return redirect(url_for('index', success='Google Search Console connected successfully')) | |
| except Exception as e: | |
| return redirect(url_for('index', error=f'Token exchange failed: {str(e)}')) | |
| def gsc_auth_status(): | |
| """Check GSC authentication status""" | |
| has_tokens = 'gsc_tokens' in session | |
| property_url = gsc_client.property_url | |
| return jsonify({ | |
| 'authenticated': has_tokens, | |
| 'property_url': property_url, | |
| 'client_configured': bool(gsc_client.client_id and gsc_client.client_secret) | |
| }) | |
| if __name__ == '__main__': | |
| app.run(debug=False, host='0.0.0.0', port=7860) |