#!/usr/bin/env python3 """ Setup script for Google Drive RAG system This script helps you set up Google Drive authentication for the RAG news manager """ import os import json from google.oauth2.credentials import Credentials from google_auth_oauthlib.flow import InstalledAppFlow from google.auth.transport.requests import Request # Configuration SCOPES = ['https://www.googleapis.com/auth/drive.file'] CREDENTIALS_FILE = 'credentials.json' TOKEN_FILE = 'token.json' def setup_google_drive_credentials(): """Set up Google Drive credentials for local development""" print("๐Ÿ”ง Setting up Google Drive credentials for RAG system...") print("=" * 60) # Check if credentials file exists if not os.path.exists(CREDENTIALS_FILE): print(f"โŒ {CREDENTIALS_FILE} not found!") print("\n๐Ÿ“‹ To get Google Drive credentials:") print("1. Go to Google Cloud Console: https://console.cloud.google.com/") print("2. Create a new project or select existing one") print("3. Enable Google Drive API") print("4. Go to 'Credentials' โ†’ 'Create Credentials' โ†’ 'OAuth 2.0 Client IDs'") print("5. Choose 'Desktop application'") print("6. Download the JSON file and rename it to 'credentials.json'") print("7. Place it in this directory") return False print(f"โœ… Found {CREDENTIALS_FILE}") # Load credentials try: with open(CREDENTIALS_FILE, 'r') as f: creds_data = json.load(f) print("โœ… Credentials file is valid JSON") print(f" Client ID: {creds_data.get('client_id', 'N/A')[:20]}...") print(f" Project ID: {creds_data.get('project_id', 'N/A')}") except json.JSONDecodeError: print("โŒ Invalid JSON in credentials file") return False except Exception as e: print(f"โŒ Error reading credentials: {e}") return False # Authenticate creds = None # Check if token file exists if os.path.exists(TOKEN_FILE): print(f"โœ… Found existing {TOKEN_FILE}") try: creds = Credentials.from_authorized_user_file(TOKEN_FILE, SCOPES) print("โœ… Loaded existing credentials") except Exception as e: print(f"โš ๏ธ Error loading existing credentials: {e}") creds = None # If no valid credentials, get new ones if not creds or not creds.valid: if creds and creds.expired and creds.refresh_token: print("๐Ÿ”„ Refreshing expired credentials...") try: creds.refresh(Request()) print("โœ… Credentials refreshed successfully") except Exception as e: print(f"โŒ Error refreshing credentials: {e}") creds = None if not creds: print("๐Ÿ” Starting OAuth flow...") print(" A browser window will open for authentication") print(" Please log in with your Google account and grant permissions") try: flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES) creds = flow.run_local_server(port=0) print("โœ… Authentication successful!") except Exception as e: print(f"โŒ Authentication failed: {e}") return False # Save credentials for next time try: with open(TOKEN_FILE, 'w') as token: token.write(creds.to_json()) print(f"โœ… Credentials saved to {TOKEN_FILE}") except Exception as e: print(f"โš ๏ธ Warning: Could not save credentials: {e}") # Test the credentials print("\n๐Ÿงช Testing Google Drive access...") try: from googleapiclient.discovery import build service = build('drive', 'v3', credentials=creds) # List files to test access results = service.files().list(pageSize=1, fields="files(id, name)").execute() files = results.get('files', []) print("โœ… Google Drive access successful!") print(f" Found {len(files)} file(s) in your Drive") if files: print(f" Sample file: {files[0]['name']}") return True except Exception as e: print(f"โŒ Google Drive access test failed: {e}") return False def test_rag_system(): """Test the RAG system""" print("\n๐Ÿงช Testing RAG News Manager...") print("=" * 40) try: from rag_news_manager import initialize_rag_system, get_rag_stats if initialize_rag_system(): print("โœ… RAG system initialized successfully!") # Get statistics stats = get_rag_stats() if stats: print(f"๐Ÿ“Š Current RAG Statistics:") print(f" Total entries: {stats['total_entries']}") print(f" Real news: {stats['real_count']}") print(f" Fake news: {stats['fake_count']}") print(f" Average confidence: {stats['avg_confidence']:.1%}") print(f" Google Drive folder: {stats['folder_id']}") print(f" Google Drive file: {stats['file_id']}") # Provide Google Drive links if stats['folder_id']: folder_url = f"https://drive.google.com/drive/folders/{stats['folder_id']}" print(f"\n๐Ÿ”— Google Drive RAG Folder: {folder_url}") if stats['file_id']: file_url = f"https://drive.google.com/file/d/{stats['file_id']}/view" print(f"๐Ÿ”— Google Drive RAG File: {file_url}") else: print("โš ๏ธ Could not get RAG statistics") else: print("โŒ RAG system initialization failed") return False except ImportError as e: print(f"โŒ Could not import RAG system: {e}") return False except Exception as e: print(f"โŒ RAG system test failed: {e}") return False return True def main(): """Main setup function""" print("๐Ÿš€ Google Drive RAG System Setup") print("=" * 50) print("This script will help you set up Google Drive integration") print("for saving high-confidence news for RAG purposes.") print() # Step 1: Setup credentials if not setup_google_drive_credentials(): print("\nโŒ Setup failed at credentials step") return False # Step 2: Test RAG system if not test_rag_system(): print("\nโŒ Setup failed at RAG system test") return False print("\n๐ŸŽ‰ Setup completed successfully!") print("=" * 50) print("โœ… Google Drive credentials configured") print("โœ… RAG system initialized") print("โœ… Ready to save high-confidence news!") print() print("๐Ÿ“‹ Next steps:") print("1. Your app will now automatically save news with 95%+ confidence") print("2. Check your Google Drive for the 'Vietnamese_Fake_News_RAG' folder") print("3. View saved news in the 'high_confidence_news.json' file") print("4. The system will use this data for better RAG analysis") return True if __name__ == "__main__": main()