ThinklySEO / modules /backlinks.py
yashgori20's picture
sdsswfsfv
9b4ad2b
"""
Backlinks Profile Module using RapidAPI endpoints
Combines 3 RapidAPI endpoints: Best Backlink Checker, Majestic, and Domain Metrics Check
"""
import os
import requests
import time
from typing import Dict, Any, List, Optional
from urllib.parse import urlparse
from datetime import datetime, timedelta
from utils import safe_pct
class ModuleResult:
"""Standard result object for SEO modules"""
def __init__(self, success: bool, data: Dict[str, Any], error: str = None):
self.success = success
self.data = data
self.error = error
class BacklinksModule:
def __init__(self):
self.rapidapi_key = os.getenv('RAPIDAPI_KEY')
self.timeout = int(os.getenv('RAPIDAPI_TIMEOUT', '30'))
self.max_retries = int(os.getenv('BACKLINKS_MAX_RETRIES', '3'))
# RapidAPI endpoints
self.backlink_checker_url = "https://best-backlink-checker-api.p.rapidapi.com/excatbacklinks_noneng.php"
self.majestic_url = "https://majestic1.p.rapidapi.com/url_metrics"
self.domain_metrics_url = "https://domain-metrics-check.p.rapidapi.com/domain-metrics"
# Common headers
self.headers = {
'x-rapidapi-key': self.rapidapi_key,
'Accept': 'application/json'
}
def analyze(self, url: str, quick_scan: bool = False) -> ModuleResult:
"""
Analyze backlink profile using multiple RapidAPI endpoints
Args:
url: Target website URL
quick_scan: If True, use cached data or limited analysis
Returns:
ModuleResult with comprehensive backlinks data
"""
try:
if not self.rapidapi_key:
return self._generate_no_api_data(url)
domain = self._extract_domain(url)
# Call all 3 APIs with retry logic and track status
api_status = {
'working_apis': [],
'failed_apis': [],
'failed_messages': []
}
print("πŸ”„ Trying Best Backlink Checker API...")
individual_backlinks = self._get_individual_backlinks(domain, quick_scan)
if individual_backlinks:
api_status['working_apis'].append('Best Backlink Checker')
print("βœ… Best Backlink Checker API - SUCCESS")
else:
api_status['failed_apis'].append('Best Backlink Checker')
api_status['failed_messages'].append("❌ Best Backlink Checker API failed - using mock data")
print("❌ Best Backlink Checker API - FAILED")
print("πŸ”„ Trying Majestic API...")
majestic_metrics = self._get_majestic_metrics(domain)
if majestic_metrics:
api_status['working_apis'].append('Majestic')
print("βœ… Majestic API - SUCCESS")
else:
api_status['failed_apis'].append('Majestic')
api_status['failed_messages'].append("❌ Majestic API failed - using mock data")
print("❌ Majestic API - FAILED")
print("πŸ”„ Trying Domain Metrics Check API...")
domain_metrics = self._get_domain_metrics(domain)
if domain_metrics:
api_status['working_apis'].append('Domain Metrics Check')
print("βœ… Domain Metrics Check API - SUCCESS")
else:
api_status['failed_apis'].append('Domain Metrics Check')
api_status['failed_messages'].append("❌ Domain Metrics Check API failed - using mock data")
print("❌ Domain Metrics Check API - FAILED")
# Combine and process all data
combined_data = self._combine_backlink_data(
domain, individual_backlinks, majestic_metrics, domain_metrics, quick_scan, api_status
)
return ModuleResult(success=True, data=combined_data)
except Exception as e:
return ModuleResult(
success=False,
data={},
error=f"Backlinks analysis failed: {str(e)}"
)
def _extract_domain(self, url: str) -> str:
if not url.startswith(('http://', 'https://')):
url = 'https://' + url
domain = urlparse(url).netloc.replace('www.', '')
return domain
def _api_request_with_retry(self, url: str, params: Dict = None, headers: Dict = None) -> Optional[Dict]:
if headers is None:
headers = self.headers.copy()
for attempt in range(self.max_retries):
try:
response = requests.get(url, params=params, headers=headers, timeout=self.timeout)
if response.status_code == 200:
return response.json()
elif response.status_code == 429:
wait_time = (attempt + 1) * 2
print(f"Rate limited, waiting {wait_time}s...")
time.sleep(wait_time)
continue
else:
print(f"API error {response.status_code}: {response.text}")
except requests.exceptions.Timeout:
print(f"Timeout on attempt {attempt + 1}")
if attempt < self.max_retries - 1:
time.sleep(2)
except Exception as e:
print(f"Request error: {str(e)}")
if attempt < self.max_retries - 1:
time.sleep(2)
return None
def _get_individual_backlinks(self, domain: str, quick_scan: bool = False) -> List[Dict]:
"""Get individual backlinks data"""
try:
headers = self.headers.copy()
headers['x-rapidapi-host'] = 'best-backlink-checker-api.p.rapidapi.com'
params = {'domain': f'https://{domain}'}
data = self._api_request_with_retry(self.backlink_checker_url, params, headers)
if data and isinstance(data, list):
# Limit results for quick scan
if quick_scan:
return data[:50]
return data[:500]
except Exception as e:
print(f"Individual backlinks API error: {str(e)}")
return []
def _get_majestic_metrics(self, domain: str) -> Dict[str, Any]:
try:
headers = self.headers.copy()
headers['x-rapidapi-host'] = 'majestic1.p.rapidapi.com'
params = {'url': domain}
data = self._api_request_with_retry(self.majestic_url, params, headers)
if data and data.get('status') == 'success':
return data
except Exception as e:
print(f"Majestic RapidAPI error: {str(e)}")
return {}
def _get_domain_metrics(self, domain: str) -> Dict[str, Any]:
"""Get comprehensive domain metrics"""
try:
headers = self.headers.copy()
headers['x-rapidapi-host'] = 'domain-metrics-check.p.rapidapi.com'
# API expects domain with trailing slash
url = f"{self.domain_metrics_url}/{domain}/"
data = self._api_request_with_retry(url, headers=headers)
if data and data.get('domain'):
return data
except Exception as e:
print(f"Domain metrics API error: {str(e)}")
return {}
def _combine_backlink_data(self, domain: str, individual_backlinks: List[Dict],
majestic_metrics: Dict, domain_metrics: Dict, quick_scan: bool, api_status: Dict) -> Dict[str, Any]:
"""Combine data from all 3 APIs into comprehensive backlinks profile"""
# Primary metrics (prefer Domain Metrics Check, fallback to Majestic)
total_backlinks = (
int(domain_metrics.get('ahrefsBacklinks', 0)) or
int(domain_metrics.get('majesticLinks', 0)) or
int(majestic_metrics.get('majesticLinks', 0)) or
len(individual_backlinks)
)
total_ref_domains = (
int(domain_metrics.get('ahrefsRefDomains', 0)) or
int(domain_metrics.get('majesticRefDomains', 0)) or
int(majestic_metrics.get('majesticRefDomains', 0)) or
len(set(link.get('url_from', '').split('/')[2] for link in individual_backlinks if link.get('url_from')))
)
# Authority scores (multiple sources for validation)
domain_rating = (
int(domain_metrics.get('ahrefsDR', 0)) or
int(domain_metrics.get('majesticTF', 0)) or
int(majestic_metrics.get('majesticTF', 0))
)
# Process individual backlinks for detailed analysis
referring_domains = self._extract_referring_domains(individual_backlinks)
anchor_distribution = self._extract_anchor_distribution(individual_backlinks)
monthly_changes = self._calculate_monthly_changes(individual_backlinks)
top_backlinks = self._get_top_backlinks(individual_backlinks)
# Link quality analysis
quality_metrics = self._analyze_link_quality(individual_backlinks, domain_metrics)
# Comprehensive backlinks data
backlinks_data = {
'ref_domains': total_ref_domains, # Match expected key name
'new_backlinks_30d': monthly_changes.get('new_backlinks', 0),
'lost_backlinks_30d': None, # Explicit N/A placeholder
'total_backlinks': total_backlinks,
'total_ref_domains': total_ref_domains,
'domain_rating': domain_rating,
# Authority scores from multiple sources
'authority_scores': {
'ahrefs_dr': int(domain_metrics.get('ahrefsDR', 0)),
'moz_da': int(domain_metrics.get('mozDA', 0)),
'moz_pa': int(domain_metrics.get('mozPA', 0)),
'majestic_tf': int(domain_metrics.get('majesticTF', 0) or majestic_metrics.get('majesticTF', 0)),
'majestic_cf': int(domain_metrics.get('majesticCF', 0) or majestic_metrics.get('majesticCF', 0))
},
# Detailed analysis
'referring_domains': referring_domains,
'anchor_distribution': anchor_distribution,
'monthly_changes': monthly_changes,
'top_backlinks': top_backlinks,
'quality_metrics': quality_metrics,
# Educational and government links (high-quality indicators)
'edu_links': int(domain_metrics.get('majesticRefEDU', 0) or majestic_metrics.get('majesticRefEDU', 0)),
'gov_links': int(domain_metrics.get('majesticRefGov', 0) or majestic_metrics.get('majesticRefGov', 0)),
# Traffic estimates (if available)
'estimated_organic_traffic': float(domain_metrics.get('ahrefsTraffic', 0)),
'organic_keywords': int(domain_metrics.get('ahrefsOrganicKeywords', 0)),
# Data sources and metadata
'data_sources': self._get_data_sources(individual_backlinks, majestic_metrics, domain_metrics),
'data_source': self._get_primary_data_source(individual_backlinks, majestic_metrics, domain_metrics),
'api_status': api_status,
'last_updated': datetime.now().isoformat(),
'quick_scan': quick_scan,
'analysis_depth': 'comprehensive' if not quick_scan else 'basic'
}
return backlinks_data
def _extract_referring_domains(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
"""Extract and analyze referring domains"""
domain_stats = {}
for link in backlinks:
if not link.get('url_from'):
continue
try:
source_domain = urlparse(link['url_from']).netloc
if source_domain not in domain_stats:
domain_stats[source_domain] = {
'domain': source_domain,
'backlinks': 0,
'first_seen': link.get('first_seen', ''),
'domain_authority': link.get('domain_inlink_rank', 0),
'follow_links': 0,
'nofollow_links': 0
}
domain_stats[source_domain]['backlinks'] += 1
if link.get('nofollow'):
domain_stats[source_domain]['nofollow_links'] += 1
else:
domain_stats[source_domain]['follow_links'] += 1
except Exception:
continue
# Sort by backlinks count and return top domains
top_domains = sorted(domain_stats.values(), key=lambda x: x['backlinks'], reverse=True)
return top_domains[:20]
def _extract_anchor_distribution(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
"""Analyze anchor text distribution"""
anchor_stats = {}
for link in backlinks:
anchor = link.get('anchor', '').strip()
if not anchor or len(anchor) > 100:
continue
if anchor not in anchor_stats:
anchor_stats[anchor] = {
'anchor_text': anchor,
'backlinks': 0,
'follow_links': 0,
'nofollow_links': 0,
'unique_domains': set()
}
anchor_stats[anchor]['backlinks'] += 1
if link.get('nofollow'):
anchor_stats[anchor]['nofollow_links'] += 1
else:
anchor_stats[anchor]['follow_links'] += 1
# Track unique domains for this anchor
try:
domain = urlparse(link.get('url_from', '')).netloc
anchor_stats[anchor]['unique_domains'].add(domain)
except Exception:
pass
# Convert sets to counts and sort
anchor_distribution = []
for anchor_data in anchor_stats.values():
anchor_data['unique_domains'] = len(anchor_data['unique_domains'])
anchor_distribution.append(anchor_data)
# Sort by backlinks count
anchor_distribution.sort(key=lambda x: x['backlinks'], reverse=True)
return anchor_distribution[:15]
def _calculate_monthly_changes(self, backlinks: List[Dict]) -> Dict[str, int]:
"""Calculate monthly backlinks changes"""
now = datetime.now()
last_month = now - timedelta(days=30)
new_links = 0
recent_links = 0
for link in backlinks:
first_seen = link.get('first_seen', '')
if not first_seen:
continue
try:
link_date = datetime.strptime(first_seen, '%Y-%m-%d')
if link_date >= last_month:
new_links += 1
if link_date >= now - timedelta(days=90):
recent_links += 1
except Exception:
continue
return {
'new_backlinks': new_links,
'lost_backlinks_30d': None, # Explicit N/A placeholder
'net_change': new_links,
'recent_backlinks_3m': recent_links
}
def _get_top_backlinks(self, backlinks: List[Dict]) -> List[Dict[str, Any]]:
"""Get top-quality backlinks"""
# Sort by inlink_rank (higher is better)
sorted_links = sorted(
backlinks,
key=lambda x: x.get('inlink_rank', 0),
reverse=True
)
top_links = []
for link in sorted_links[:10]:
top_links.append({
'source_url': link.get('url_from', ''),
'source_title': link.get('title', ''),
'anchor_text': link.get('anchor', ''),
'is_follow': not link.get('nofollow', True),
'authority_score': link.get('inlink_rank', 0),
'first_seen': link.get('first_seen', '')
})
return top_links
def _analyze_link_quality(self, backlinks: List[Dict], domain_metrics: Dict) -> Dict[str, Any]:
"""Analyze overall link quality metrics"""
if not backlinks:
return {'follow_ratio': 0, 'avg_authority': 0, 'quality_score': 0}
follow_count = sum(1 for link in backlinks if not link.get('nofollow', True))
total_links = len(backlinks)
follow_ratio = (follow_count / total_links * 100) if total_links > 0 else 0
# Average authority score
authority_scores = [link.get('inlink_rank', 0) for link in backlinks if link.get('inlink_rank')]
avg_authority = sum(authority_scores) / len(authority_scores) if authority_scores else 0
# Quality score (0-100)
quality_score = min(100, (
(follow_ratio * 0.4) +
(avg_authority * 2) +
(min(20, len(set(link.get('url_from', '').split('/')[2] for link in backlinks))) * 1)
))
return {
'follow_ratio': round(follow_ratio, 1),
'avg_authority': round(avg_authority, 1),
'quality_score': round(quality_score, 1),
'total_analyzed': total_links,
'edu_gov_count': int(domain_metrics.get('majesticRefEDU', 0)) + int(domain_metrics.get('majesticRefGov', 0))
}
def _get_data_sources(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> List[str]:
sources = []
if individual_backlinks:
sources.append('Best Backlink Checker API')
if majestic_metrics:
sources.append('Majestic RapidAPI')
if domain_metrics:
sources.append('Domain Metrics Check API')
return sources or ['No data sources available']
def _get_primary_data_source(self, individual_backlinks: List, majestic_metrics: Dict, domain_metrics: Dict) -> str:
"""Get primary data source for labeling"""
if domain_metrics:
return 'Domain Metrics Check API'
elif majestic_metrics:
return 'Majestic RapidAPI'
elif individual_backlinks:
return 'Best Backlink Checker API'
else:
return 'No API credentials available'
def _generate_no_api_data(self, url: str) -> ModuleResult:
domain = self._extract_domain(url)
no_api_data = {
'total_backlinks': 0,
'total_ref_domains': 0,
'domain_rating': 0,
'authority_scores': {
'ahrefs_dr': 0,
'moz_da': 0,
'moz_pa': 0,
'majestic_tf': 0,
'majestic_cf': 0
},
'referring_domains': [],
'anchor_distribution': [],
'monthly_changes': {
'new_backlinks': 0,
'lost_backlinks_30d': None, # Explicit N/A
'net_change': 0
},
'ref_domains': 0,
'new_backlinks_30d': 0,
'lost_backlinks_30d': None,
'top_backlinks': [],
'quality_metrics': {
'follow_ratio': 0,
'avg_authority': 0,
'quality_score': 0
},
'edu_links': 0,
'gov_links': 0,
'estimated_organic_traffic': 0,
'organic_keywords': 0,
'data_sources': ['No API credentials available'],
'data_source': 'No API credentials available',
'api_status': {
'working_apis': [],
'failed_apis': ['Best Backlink Checker', 'Majestic', 'Domain Metrics Check'],
'failed_messages': [
'❌ Best Backlink Checker API failed - no RAPIDAPI_KEY',
'❌ Majestic API failed - no RAPIDAPI_KEY',
'❌ Domain Metrics Check API failed - no RAPIDAPI_KEY'
]
},
'last_updated': datetime.now().isoformat(),
'placeholder': True,
'message': 'Add RAPIDAPI_KEY to your .env file to unlock comprehensive backlinks analysis using Best Backlink Checker, Majestic, and Domain Metrics Check RapidAPIs.'
}
return ModuleResult(success=True, data=no_api_data)