Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Setup script for Google Drive RAG system | |
| This script helps you set up Google Drive authentication for the RAG news manager | |
| """ | |
| import os | |
| import json | |
| from google.oauth2.credentials import Credentials | |
| from google_auth_oauthlib.flow import InstalledAppFlow | |
| from google.auth.transport.requests import Request | |
| # Configuration | |
| SCOPES = ['https://www.googleapis.com/auth/drive.file'] | |
| CREDENTIALS_FILE = 'credentials.json' | |
| TOKEN_FILE = 'token.json' | |
| def setup_google_drive_credentials(): | |
| """Set up Google Drive credentials for local development""" | |
| print("π§ Setting up Google Drive credentials for RAG system...") | |
| print("=" * 60) | |
| # Check if credentials file exists | |
| if not os.path.exists(CREDENTIALS_FILE): | |
| print(f"β {CREDENTIALS_FILE} not found!") | |
| print("\nπ To get Google Drive credentials:") | |
| print("1. Go to Google Cloud Console: https://console.cloud.google.com/") | |
| print("2. Create a new project or select existing one") | |
| print("3. Enable Google Drive API") | |
| print("4. Go to 'Credentials' β 'Create Credentials' β 'OAuth 2.0 Client IDs'") | |
| print("5. Choose 'Desktop application'") | |
| print("6. Download the JSON file and rename it to 'credentials.json'") | |
| print("7. Place it in this directory") | |
| return False | |
| print(f"β Found {CREDENTIALS_FILE}") | |
| # Load credentials | |
| try: | |
| with open(CREDENTIALS_FILE, 'r') as f: | |
| creds_data = json.load(f) | |
| print("β Credentials file is valid JSON") | |
| print(f" Client ID: {creds_data.get('client_id', 'N/A')[:20]}...") | |
| print(f" Project ID: {creds_data.get('project_id', 'N/A')}") | |
| except json.JSONDecodeError: | |
| print("β Invalid JSON in credentials file") | |
| return False | |
| except Exception as e: | |
| print(f"β Error reading credentials: {e}") | |
| return False | |
| # Authenticate | |
| creds = None | |
| # Check if token file exists | |
| if os.path.exists(TOKEN_FILE): | |
| print(f"β Found existing {TOKEN_FILE}") | |
| try: | |
| creds = Credentials.from_authorized_user_file(TOKEN_FILE, SCOPES) | |
| print("β Loaded existing credentials") | |
| except Exception as e: | |
| print(f"β οΈ Error loading existing credentials: {e}") | |
| creds = None | |
| # If no valid credentials, get new ones | |
| if not creds or not creds.valid: | |
| if creds and creds.expired and creds.refresh_token: | |
| print("π Refreshing expired credentials...") | |
| try: | |
| creds.refresh(Request()) | |
| print("β Credentials refreshed successfully") | |
| except Exception as e: | |
| print(f"β Error refreshing credentials: {e}") | |
| creds = None | |
| if not creds: | |
| print("π Starting OAuth flow...") | |
| print(" A browser window will open for authentication") | |
| print(" Please log in with your Google account and grant permissions") | |
| try: | |
| flow = InstalledAppFlow.from_client_secrets_file(CREDENTIALS_FILE, SCOPES) | |
| creds = flow.run_local_server(port=0) | |
| print("β Authentication successful!") | |
| except Exception as e: | |
| print(f"β Authentication failed: {e}") | |
| return False | |
| # Save credentials for next time | |
| try: | |
| with open(TOKEN_FILE, 'w') as token: | |
| token.write(creds.to_json()) | |
| print(f"β Credentials saved to {TOKEN_FILE}") | |
| except Exception as e: | |
| print(f"β οΈ Warning: Could not save credentials: {e}") | |
| # Test the credentials | |
| print("\nπ§ͺ Testing Google Drive access...") | |
| try: | |
| from googleapiclient.discovery import build | |
| service = build('drive', 'v3', credentials=creds) | |
| # List files to test access | |
| results = service.files().list(pageSize=1, fields="files(id, name)").execute() | |
| files = results.get('files', []) | |
| print("β Google Drive access successful!") | |
| print(f" Found {len(files)} file(s) in your Drive") | |
| if files: | |
| print(f" Sample file: {files[0]['name']}") | |
| return True | |
| except Exception as e: | |
| print(f"β Google Drive access test failed: {e}") | |
| return False | |
| def test_rag_system(): | |
| """Test the RAG system""" | |
| print("\nπ§ͺ Testing RAG News Manager...") | |
| print("=" * 40) | |
| try: | |
| from rag_news_manager import initialize_rag_system, get_rag_stats | |
| if initialize_rag_system(): | |
| print("β RAG system initialized successfully!") | |
| # Get statistics | |
| stats = get_rag_stats() | |
| if stats: | |
| print(f"π Current RAG Statistics:") | |
| print(f" Total entries: {stats['total_entries']}") | |
| print(f" Real news: {stats['real_count']}") | |
| print(f" Fake news: {stats['fake_count']}") | |
| print(f" Average confidence: {stats['avg_confidence']:.1%}") | |
| print(f" Google Drive folder: {stats['folder_id']}") | |
| print(f" Google Drive file: {stats['file_id']}") | |
| # Provide Google Drive links | |
| if stats['folder_id']: | |
| folder_url = f"https://drive.google.com/drive/folders/{stats['folder_id']}" | |
| print(f"\nπ Google Drive RAG Folder: {folder_url}") | |
| if stats['file_id']: | |
| file_url = f"https://drive.google.com/file/d/{stats['file_id']}/view" | |
| print(f"π Google Drive RAG File: {file_url}") | |
| else: | |
| print("β οΈ Could not get RAG statistics") | |
| else: | |
| print("β RAG system initialization failed") | |
| return False | |
| except ImportError as e: | |
| print(f"β Could not import RAG system: {e}") | |
| return False | |
| except Exception as e: | |
| print(f"β RAG system test failed: {e}") | |
| return False | |
| return True | |
| def main(): | |
| """Main setup function""" | |
| print("π Google Drive RAG System Setup") | |
| print("=" * 50) | |
| print("This script will help you set up Google Drive integration") | |
| print("for saving high-confidence news for RAG purposes.") | |
| print() | |
| # Step 1: Setup credentials | |
| if not setup_google_drive_credentials(): | |
| print("\nβ Setup failed at credentials step") | |
| return False | |
| # Step 2: Test RAG system | |
| if not test_rag_system(): | |
| print("\nβ Setup failed at RAG system test") | |
| return False | |
| print("\nπ Setup completed successfully!") | |
| print("=" * 50) | |
| print("β Google Drive credentials configured") | |
| print("β RAG system initialized") | |
| print("β Ready to save high-confidence news!") | |
| print() | |
| print("π Next steps:") | |
| print("1. Your app will now automatically save news with 95%+ confidence") | |
| print("2. Check your Google Drive for the 'Vietnamese_Fake_News_RAG' folder") | |
| print("3. View saved news in the 'high_confidence_news.json' file") | |
| print("4. The system will use this data for better RAG analysis") | |
| return True | |
| if __name__ == "__main__": | |
| main() | |