FakeNews_Detector / setup_google_drive_rag.py
NLong's picture
Upload 12 files
b5fb8d2 verified
#!/usr/bin/env python3
"""
Setup script for Google Drive RAG system
This script helps you set up Google Drive authentication for the RAG news manager
"""
import os
import json
from google.oauth2.credentials import Credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
# Configuration
SCOPES = ['https://www.googleapis.com/auth/drive.file']
CREDENTIALS_FILE = 'credentials.json'
TOKEN_FILE = 'token.json'
def setup_google_drive_credentials():
"""Set up Google Drive credentials for local development"""
print("πŸ”§ Setting up Google Drive credentials for RAG system...")
print("=" * 60)
# Check if credentials file exists
if not os.path.exists(CREDENTIALS_FILE):
print(f"❌ {CREDENTIALS_FILE} not found!")
print("\nπŸ“‹ To get Google Drive credentials:")
print("1. Go to Google Cloud Console: https://console.cloud.google.com/")
print("2. Create a new project or select existing one")
print("3. Enable Google Drive API")
print("4. Go to 'Credentials' β†’ 'Create Credentials' β†’ 'OAuth 2.0 Client IDs'")
print("5. Choose 'Desktop application'")
print("6. Download the JSON file and rename it to 'credentials.json'")
print("7. Place it in this directory")
return False
print(f"βœ… Found {CREDENTIALS_FILE}")
# Load credentials
try:
with open(CREDENTIALS_FILE, 'r') as f:
creds_data = json.load(f)
print("βœ… Credentials file is valid JSON")
print(f" Client ID: {creds_data.get('client_id', 'N/A')[:20]}...")
print(f" Project ID: {creds_data.get('project_id', 'N/A')}")
except json.JSONDecodeError:
print("❌ Invalid JSON in credentials file")
return False
except Exception as e:
print(f"❌ Error reading credentials: {e}")
return False
# Authenticate
creds = None
# Check if token file exists
if os.path.exists(TOKEN_FILE):
print(f"βœ… Found existing {TOKEN_FILE}")
try:
creds = Credentials.from_authorized_user_file(TOKEN_FILE, SCOPES)
print("βœ… Loaded existing credentials")
except Exception as e:
print(f"⚠️ Error loading existing credentials: {e}")
creds = None
# If no valid credentials, get new ones
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
print("πŸ”„ Refreshing expired credentials...")
try:
creds.refresh(Request())
print("βœ… Credentials refreshed successfully")
except Exception as e:
print(f"❌ Error refreshing credentials: {e}")
creds = None
if not creds:
print("πŸ” Starting OAuth flow...")
print(" A browser window will open for authentication")
print(" Please log in with your Google account and grant permissions")
try:
flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES)
creds = flow.run_local_server(port=0)
print("βœ… Authentication successful!")
except Exception as e:
print(f"❌ Authentication failed: {e}")
return False
# Save credentials for next time
try:
with open(TOKEN_FILE, 'w') as token:
token.write(creds.to_json())
print(f"βœ… Credentials saved to {TOKEN_FILE}")
except Exception as e:
print(f"⚠️ Warning: Could not save credentials: {e}")
# Test the credentials
print("\nπŸ§ͺ Testing Google Drive access...")
try:
from googleapiclient.discovery import build
service = build('drive', 'v3', credentials=creds)
# List files to test access
results = service.files().list(pageSize=1, fields="files(id, name)").execute()
files = results.get('files', [])
print("βœ… Google Drive access successful!")
print(f" Found {len(files)} file(s) in your Drive")
if files:
print(f" Sample file: {files[0]['name']}")
return True
except Exception as e:
print(f"❌ Google Drive access test failed: {e}")
return False
def test_rag_system():
"""Test the RAG system"""
print("\nπŸ§ͺ Testing RAG News Manager...")
print("=" * 40)
try:
from rag_news_manager import initialize_rag_system, get_rag_stats
if initialize_rag_system():
print("βœ… RAG system initialized successfully!")
# Get statistics
stats = get_rag_stats()
if stats:
print(f"πŸ“Š Current RAG Statistics:")
print(f" Total entries: {stats['total_entries']}")
print(f" Real news: {stats['real_count']}")
print(f" Fake news: {stats['fake_count']}")
print(f" Average confidence: {stats['avg_confidence']:.1%}")
print(f" Google Drive folder: {stats['folder_id']}")
print(f" Google Drive file: {stats['file_id']}")
# Provide Google Drive links
if stats['folder_id']:
folder_url = f"https://drive.google.com/drive/folders/{stats['folder_id']}"
print(f"\nπŸ”— Google Drive RAG Folder: {folder_url}")
if stats['file_id']:
file_url = f"https://drive.google.com/file/d/{stats['file_id']}/view"
print(f"πŸ”— Google Drive RAG File: {file_url}")
else:
print("⚠️ Could not get RAG statistics")
else:
print("❌ RAG system initialization failed")
return False
except ImportError as e:
print(f"❌ Could not import RAG system: {e}")
return False
except Exception as e:
print(f"❌ RAG system test failed: {e}")
return False
return True
def main():
"""Main setup function"""
print("πŸš€ Google Drive RAG System Setup")
print("=" * 50)
print("This script will help you set up Google Drive integration")
print("for saving high-confidence news for RAG purposes.")
print()
# Step 1: Setup credentials
if not setup_google_drive_credentials():
print("\n❌ Setup failed at credentials step")
return False
# Step 2: Test RAG system
if not test_rag_system():
print("\n❌ Setup failed at RAG system test")
return False
print("\nπŸŽ‰ Setup completed successfully!")
print("=" * 50)
print("βœ… Google Drive credentials configured")
print("βœ… RAG system initialized")
print("βœ… Ready to save high-confidence news!")
print()
print("πŸ“‹ Next steps:")
print("1. Your app will now automatically save news with 95%+ confidence")
print("2. Check your Google Drive for the 'Vietnamese_Fake_News_RAG' folder")
print("3. View saved news in the 'high_confidence_news.json' file")
print("4. The system will use this data for better RAG analysis")
return True
if __name__ == "__main__":
main()