Spaces:
Running
Running
| import gradio as gr | |
| from googleapiclient.discovery import build | |
| import google.generativeai as genai | |
| import torch | |
| from transformers import AutoTokenizer, AutoModelForSequenceClassification | |
| import re | |
| import os | |
| import numpy as np | |
| GOOGLE_API_KEY = "AIzaSyDu0819TPX_Z1AcAT5xT1SNjjmb64PSc1I" | |
| SEARCH_ENGINE_ID = "f34f8a4816771488b" | |
| GEMINI_API_KEY = "AIzaSyAHPzJ_VjTX3gZLBV28d3sq97SdER2qfkc" | |
| MODEL_PATH = "./vietnamese_fake_news_model" | |
| genai.configure(api_key=GEMINI_API_KEY) | |
| print("Loading the DistilBERT model we trained...") | |
| try: | |
| if os.path.exists(MODEL_PATH): | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH) | |
| model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH) | |
| print("DistilBERT model loaded successfully!") | |
| else: | |
| print(f"Model directory '{MODEL_PATH}' not found!") | |
| print("Our custom model isn't available, trying a backup model...") | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased") | |
| model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased", num_labels=2) | |
| print("Fallback DistilBERT model loaded successfully!") | |
| except Exception as fallback_error: | |
| print(f"Fallback model also failed: {fallback_error}") | |
| tokenizer = None | |
| model = None | |
| except Exception as e: | |
| print(f"Error loading DistilBERT model: {e}") | |
| print("Something went wrong, trying the backup model...") | |
| try: | |
| tokenizer = AutoTokenizer.from_pretrained("distilbert-base-multilingual-cased") | |
| model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-multilingual-cased", num_labels=2) | |
| print("Fallback DistilBERT model loaded successfully!") | |
| except Exception as fallback_error: | |
| print(f"Fallback model also failed: {fallback_error}") | |
| tokenizer = None | |
| model = None | |
| CREDIBLE_SOURCES = { | |
| 'vnexpress.net': 0.95, | |
| 'tuoitre.vn': 0.95, | |
| 'thanhnien.vn': 0.90, | |
| 'dantri.com.vn': 0.90, | |
| 'vietnamnet.vn': 0.85, | |
| 'zing.vn': 0.85, | |
| 'kenh14.vn': 0.80, | |
| 'soha.vn': 0.80, | |
| 'baotintuc.vn': 0.85, | |
| 'nhandan.vn': 0.90, | |
| 'laodong.vn': 0.85, | |
| 'congan.com.vn': 0.90, | |
| 'quochoi.vn': 0.95, | |
| 'chinhphu.vn': 0.95, | |
| 'moh.gov.vn': 0.90, | |
| 'mofa.gov.vn': 0.90, | |
| 'mard.gov.vn': 0.85, | |
| 'moc.gov.vn': 0.85, | |
| 'mof.gov.vn': 0.85, | |
| 'mst.gov.vn': 0.85, | |
| 'wikipedia.org': 0.95, | |
| 'bbc.com': 0.95, | |
| 'bbc.co.uk': 0.95, | |
| 'cnn.com': 0.90, | |
| 'reuters.com': 0.95, | |
| 'ap.org': 0.95, | |
| 'espn.com': 0.85, | |
| 'fifa.com': 0.95, | |
| 'nytimes.com': 0.90, | |
| 'washingtonpost.com': 0.90, | |
| 'theguardian.com': 0.90 | |
| } | |
| def clean_text(text): | |
| """Clean up the text before feeding it to our model""" | |
| if not isinstance(text, str): | |
| text = str(text) | |
| text = re.sub(r'\s+', ' ', text.strip()) | |
| if len(text) < 10: | |
| text = "Tin tức ngắn: " + text | |
| return text | |
| def predict_with_distilbert(text): | |
| """Run the text through our trained DistilBERT model to get a prediction""" | |
| if model is None or tokenizer is None: | |
| return None, None, None, None | |
| try: | |
| clean_text_input = clean_text(text) | |
| inputs = tokenizer( | |
| clean_text_input, | |
| return_tensors="pt", | |
| truncation=True, | |
| padding=True, | |
| max_length=512 | |
| ) | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| predictions = torch.nn.functional.softmax(outputs.logits, dim=-1) | |
| real_score = predictions[0][0].item() | |
| fake_score = predictions[0][1].item() | |
| if real_score > fake_score: | |
| prediction = "REAL" | |
| confidence = real_score | |
| else: | |
| prediction = "FAKE" | |
| confidence = fake_score | |
| return prediction, confidence, real_score, fake_score | |
| except Exception as e: | |
| print(f"DistilBERT prediction error: {e}") | |
| return None, None, None, None | |
| def process_search_results(items): | |
| search_results = [] | |
| for item in items: | |
| search_results.append({ | |
| 'title': item.get('title', ''), | |
| 'snippet': item.get('snippet', ''), | |
| 'link': item.get('link', '') | |
| }) | |
| return search_results | |
| def google_search_fallback(news_text): | |
| print("Using fallback search system...") | |
| mock_results = [] | |
| if "Argentina" in news_text and "World Cup" in news_text: | |
| mock_results = [ | |
| { | |
| 'title': 'Argentina wins World Cup 2022 - FIFA Official', | |
| 'snippet': 'Argentina defeated France in the 2022 World Cup final to win their third World Cup title.', | |
| 'link': 'https://www.fifa.com/worldcup/news/argentina-wins-world-cup-2022' | |
| }, | |
| { | |
| 'title': 'World Cup 2022 Final: Argentina vs France - BBC Sport', | |
| 'snippet': 'Argentina won the 2022 FIFA World Cup after defeating France in a thrilling final.', | |
| 'link': 'https://www.bbc.com/sport/football/world-cup-2022' | |
| }, | |
| { | |
| 'title': 'Lionel Messi leads Argentina to World Cup victory - ESPN', | |
| 'snippet': 'Lionel Messi finally won the World Cup as Argentina defeated France in Qatar 2022.', | |
| 'link': 'https://www.espn.com/soccer/world-cup/story/argentina-messi-world-cup' | |
| } | |
| ] | |
| elif "COVID" in news_text or "covid" in news_text: | |
| mock_results = [ | |
| { | |
| 'title': 'COVID-19 Updates - World Health Organization', | |
| 'snippet': 'Latest updates on COVID-19 pandemic from WHO official sources.', | |
| 'link': 'https://www.who.int/emergencies/diseases/novel-coronavirus-2019' | |
| }, | |
| { | |
| 'title': 'COVID-19 Vietnam News - Ministry of Health', | |
| 'snippet': 'Official COVID-19 updates from Vietnam Ministry of Health.', | |
| 'link': 'https://moh.gov.vn/covid-19' | |
| } | |
| ] | |
| elif "Việt Nam" in news_text or "Vietnam" in news_text: | |
| mock_results = [ | |
| { | |
| 'title': 'Vietnam News - VnExpress', | |
| 'snippet': 'Latest news from Vietnam covering politics, economy, and society.', | |
| 'link': 'https://vnexpress.net' | |
| }, | |
| { | |
| 'title': 'Vietnam News - Tuổi Trẻ', | |
| 'snippet': 'Vietnamese news and current events from Tuổi Trẻ newspaper.', | |
| 'link': 'https://tuoitre.vn' | |
| } | |
| ] | |
| else: | |
| mock_results = [ | |
| { | |
| 'title': 'News Verification - Fact Check', | |
| 'snippet': 'Fact-checking and news verification from reliable sources.', | |
| 'link': 'https://www.factcheck.org' | |
| }, | |
| { | |
| 'title': 'News Analysis - Reuters', | |
| 'snippet': 'Professional news analysis and reporting from Reuters.', | |
| 'link': 'https://www.reuters.com' | |
| } | |
| ] | |
| print(f"Generated {len(mock_results)} mock search results") | |
| return mock_results | |
| def google_search(news_text): | |
| """Search Google for information about the news, with backup options if it fails""" | |
| try: | |
| service = build("customsearch", "v1", developerKey=GOOGLE_API_KEY) | |
| search_queries = [] | |
| if "Argentina" in news_text and "World Cup" in news_text: | |
| search_queries = [ | |
| "Argentina World Cup 2022 champion winner", | |
| "Argentina vô địch World Cup 2022", | |
| "World Cup 2022 Argentina final" | |
| ] | |
| elif "COVID" in news_text or "covid" in news_text: | |
| search_queries = [ | |
| "COVID-19 Vietnam news", | |
| "COVID Vietnam 2022 2023", | |
| "dịch COVID Việt Nam" | |
| ] | |
| else: | |
| vietnamese_words = re.findall(r'[À-ỹ]+', news_text) | |
| english_words = re.findall(r'[A-Za-z]+', news_text) | |
| numbers = re.findall(r'\d{4}', news_text) # Years | |
| if english_words: | |
| search_queries.append(' '.join(english_words[:5])) | |
| if vietnamese_words: | |
| search_queries.append(' '.join(vietnamese_words[:5])) | |
| if numbers: | |
| search_queries.append(' '.join(english_words[:3] + numbers)) | |
| keywords = re.findall(r'[A-Za-zÀ-ỹ]+|\b(?:19|20)\d{2}\b|\b\d{1,2}\b', news_text) | |
| search_queries.append(' '.join(keywords[:10])) | |
| for i, search_query in enumerate(search_queries): | |
| if not search_query.strip(): | |
| continue | |
| print(f"Strategy {i+1}: Searching for '{search_query}'") | |
| result = service.cse().list( | |
| q=search_query, | |
| cx=SEARCH_ENGINE_ID, | |
| num=10 | |
| ).execute() | |
| print(f"API Response keys: {list(result.keys())}") | |
| if 'searchInformation' in result: | |
| print(f"Total results: {result['searchInformation'].get('totalResults', 'Unknown')}") | |
| if 'items' in result and result['items']: | |
| print(f"Found {len(result['items'])} results with strategy {i+1}") | |
| return process_search_results(result['items']) | |
| else: | |
| print(f"No results with strategy {i+1}") | |
| print("All strategies failed, trying simple phrase search...") | |
| simple_query = news_text[:30] # First 30 characters | |
| result = service.cse().list( | |
| q=simple_query, | |
| cx=SEARCH_ENGINE_ID, | |
| num=5 | |
| ).execute() | |
| if 'items' in result and result['items']: | |
| print(f"Found {len(result['items'])} results with simple search") | |
| return process_search_results(result['items']) | |
| print("All search strategies failed, using fallback...") | |
| return google_search_fallback(news_text) | |
| except Exception as e: | |
| print(f"Google Search error: {e}") | |
| print(f"Error type: {type(e).__name__}") | |
| error_str = str(e).lower() | |
| if any(keyword in error_str for keyword in ["403", "blocked", "quota", "limit", "exceeded"]): | |
| print("Google Search API blocked/quota exceeded, using fallback...") | |
| elif "invalid" in error_str or "unauthorized" in error_str: | |
| print("API key issue, using fallback...") | |
| return google_search_fallback(news_text) | |
| def analyze_sources(search_results): | |
| """Check how trustworthy the news sources are""" | |
| if not search_results: | |
| return 0.50, 0.20, "No sources found" | |
| credible_count = 0 | |
| total_sources = len(search_results) | |
| for result in search_results: | |
| domain = result['link'].split('/')[2] if '//' in result['link'] else '' | |
| for source, credibility in CREDIBLE_SOURCES.items(): | |
| if source in domain: | |
| credible_count += 1 | |
| break | |
| source_credibility = credible_count / total_sources if total_sources > 0 else 0.50 | |
| popularity_score = min(1.0, total_sources / 5.0) # Normalize to 0-1 | |
| # Create a summary of what we found | |
| if source_credibility > 0.7: | |
| credibility_text = f"High credibility: {credible_count}/{total_sources} sources from reputable outlets" | |
| elif source_credibility > 0.4: | |
| credibility_text = f"Medium credibility: {credible_count}/{total_sources} sources from reputable outlets" | |
| else: | |
| credibility_text = f"Low credibility: {credible_count}/{total_sources} sources from reputable outlets" | |
| return source_credibility, popularity_score, credibility_text | |
| def analyze_source_support(news_text, search_results): | |
| """Check if the search results agree or disagree with the news""" | |
| if not search_results: | |
| return 0.5, "No sources to analyze" | |
| support_count = 0 | |
| contradict_count = 0 | |
| total_sources = len(search_results) | |
| # Look for years mentioned in the news | |
| import re | |
| news_years = re.findall(r'\b(20\d{2})\b', news_text) | |
| news_year = news_years[0] if news_years else None | |
| for result in search_results: | |
| title_snippet = (result.get('title', '') + ' ' + result.get('snippet', '')).lower() | |
| # See if the years match up | |
| if news_year: | |
| source_years = re.findall(r'\b(20\d{2})\b', title_snippet) | |
| if source_years and news_year not in source_years: | |
| contradict_count += 1 | |
| continue | |
| # Look for words that suggest agreement or disagreement | |
| support_keywords = ['confirm', 'verify', 'true', 'accurate', 'correct', 'xác nhận', 'chính xác', 'đúng'] | |
| contradict_keywords = ['false', 'fake', 'incorrect', 'wrong', 'sai', 'giả', 'không đúng'] | |
| support_score = sum(1 for keyword in support_keywords if keyword in title_snippet) | |
| contradict_score = sum(1 for keyword in contradict_keywords if keyword in title_snippet) | |
| if contradict_score > support_score: | |
| contradict_count += 1 | |
| elif support_score > contradict_score: | |
| support_count += 1 | |
| else: | |
| # If unclear, assume slight support | |
| support_count += 0.5 | |
| support_ratio = support_count / total_sources if total_sources > 0 else 0.5 | |
| if support_ratio > 0.7: | |
| support_text = f"Sources strongly support the news: {support_count:.1f}/{total_sources} sources confirm" | |
| elif support_ratio > 0.4: | |
| support_text = f"Sources mixed: {support_count:.1f}/{total_sources} sources support, {contradict_count} contradict" | |
| else: | |
| support_text = f"Sources contradict the news: {contradict_count}/{total_sources} sources contradict" | |
| return support_ratio, support_text | |
| def analyze_with_gemini(news_text, search_results, distilbert_prediction, distilbert_confidence): | |
| """Use Gemini AI to analyze the news and compare with our model results""" | |
| try: | |
| # Try to use the latest Gemini model available | |
| try: | |
| model = genai.GenerativeModel('gemini-2.0-flash-exp') | |
| except: | |
| try: | |
| model = genai.GenerativeModel('gemini-2.5-flash') | |
| except: | |
| try: | |
| model = genai.GenerativeModel('gemini-1.5-pro') | |
| except: | |
| model = genai.GenerativeModel('gemini-1.5-flash') | |
| # Format the search results for Gemini | |
| search_summary = "" | |
| if search_results: | |
| search_summary = "Kết quả tìm kiếm Google:\n" | |
| for i, result in enumerate(search_results[:5], 1): | |
| search_summary += f"{i}. {result['title']}\n {result['snippet']}\n Nguồn: {result['link']}\n\n" | |
| else: | |
| search_summary = "Không tìm thấy kết quả tìm kiếm Google cho tin tức này. Điều này có thể do API bị giới hạn hoặc tin tức quá mới/chưa được đăng tải." | |
| # Include our model results in the analysis | |
| distilbert_analysis = "" | |
| if distilbert_prediction: | |
| distilbert_analysis = f"Phân tích DistilBERT: Dự đoán '{distilbert_prediction}' với độ tin cậy {distilbert_confidence:.3f}" | |
| else: | |
| distilbert_analysis = "DistilBERT: Không thể phân tích" | |
| prompt = f""" | |
| Hãy phân tích tin tức sau và đánh giá độ tin cậy của nó một cách đơn giản, dễ hiểu: | |
| "{news_text}" | |
| {search_summary} | |
| {distilbert_analysis} | |
| Hãy trả lời bằng tiếng Việt, ngắn gọn và dễ hiểu cho người dùng bình thường: | |
| 1. Tin tức này có vẻ THẬT hay GIẢ? (Chỉ trả lời THẬT hoặc GIẢ) | |
| 2. Tại sao bạn nghĩ vậy? (Giải thích ngắn gọn, dễ hiểu) | |
| 3. Người đọc nên làm gì? (Lời khuyên đơn giản) | |
| Tránh dùng thuật ngữ kỹ thuật, hãy viết như đang nói chuyện với bạn bè. | |
| """ | |
| print("Calling Gemini API...") | |
| print(f"DEBUG - News text being analyzed: {news_text}") | |
| print(f"DEBUG - Search results count: {len(search_results)}") | |
| if search_results: | |
| print(f"DEBUG - First search result title: {search_results[0].get('title', 'No title')}") | |
| # Use consistent settings to get reliable results | |
| generation_config = genai.types.GenerationConfig( | |
| temperature=0.1, # Low temperature for more consistent results | |
| top_p=0.8, # Focus on most likely tokens | |
| top_k=20, # Limit vocabulary choices | |
| max_output_tokens=1000 | |
| ) | |
| response = model.generate_content(prompt, generation_config=generation_config) | |
| print("Gemini API response received successfully") | |
| return response.text | |
| except Exception as e: | |
| print(f"Gemini analysis error: {e}") | |
| print(f"Error type: {type(e).__name__}") | |
| # If we hit the API limit, provide a basic analysis | |
| if "429" in str(e) or "quota" in str(e).lower(): | |
| print("Gemini API quota exceeded, providing fallback analysis...") | |
| fallback_analysis = f""" | |
| **Phân tích cơ bản (do giới hạn API):** | |
| 🤖 **Kết quả AI:** {'Tin tức này có vẻ THẬT' if distilbert_prediction == 'REAL' else 'Tin tức này có vẻ GIẢ' if distilbert_prediction == 'FAKE' else 'Không thể xác định'} | |
| 📊 **Độ tin cậy:** {f"{distilbert_confidence:.0%}" if distilbert_confidence else 'Không có'} | |
| 🌐 **Nguồn tin:** {len(search_results) if search_results else 0} nguồn được tìm thấy | |
| 💡 **Khuyến nghị:** Hãy kiểm tra thêm từ các nguồn tin chính thống trước khi tin tưởng hoàn toàn. | |
| """ | |
| return fallback_analysis | |
| # For other errors, see what models are available | |
| try: | |
| models = genai.list_models() | |
| print("Available models:") | |
| for model in models: | |
| if 'gemini' in model.name.lower(): | |
| print(f" - {model.name}") | |
| except Exception as list_error: | |
| print(f"Could not list models: {list_error}") | |
| return f"Lỗi phân tích Gemini: {e}" | |
| def calculate_combined_confidence(distilbert_prediction, distilbert_confidence, source_credibility, popularity_score, gemini_analysis, source_support=0.5): | |
| """Calculate combined confidence from all three tools""" | |
| # Base confidence from DistilBERT | |
| if distilbert_prediction == "REAL": | |
| base_confidence = distilbert_confidence | |
| else: | |
| base_confidence = 1 - distilbert_confidence | |
| # Adjust based on source credibility (stronger adjustments) | |
| if source_credibility > 0.7: | |
| credibility_adjustment = 0.2 # Increased from 0.1 | |
| elif source_credibility > 0.4: | |
| credibility_adjustment = 0.05 # Small positive adjustment | |
| else: | |
| credibility_adjustment = -0.1 | |
| # Adjust based on popularity | |
| if popularity_score > 0.7: | |
| popularity_adjustment = 0.1 # Increased from 0.05 | |
| elif popularity_score > 0.4: | |
| popularity_adjustment = 0.0 | |
| else: | |
| popularity_adjustment = -0.05 | |
| # Adjust based on source support (whether sources support or contradict the news) | |
| if source_support > 0.7: | |
| support_adjustment = 0.15 # Sources strongly support | |
| elif source_support > 0.4: | |
| support_adjustment = 0.0 # Sources are neutral | |
| else: | |
| support_adjustment = -0.15 # Sources contradict | |
| # Adjust based on Gemini analysis (stronger adjustments) | |
| gemini_lower = gemini_analysis.lower() | |
| if "độ tin cậy cao" in gemini_lower or "tin cậy cao" in gemini_lower or "cao" in gemini_lower: | |
| gemini_adjustment = 0.2 # Increased from 0.1 | |
| elif "độ tin cậy thấp" in gemini_lower or "tin cậy thấp" in gemini_lower or "thấp" in gemini_lower: | |
| gemini_adjustment = -0.2 # Increased from -0.1 | |
| else: | |
| gemini_adjustment = 0.0 | |
| # Special case: If DistilBERT confidence is very low but sources and Gemini agree it's real | |
| if (distilbert_confidence < 0.6 and | |
| source_credibility > 0.6 and | |
| ("cao" in gemini_lower or "chính xác" in gemini_lower or "đáng tin cậy" in gemini_lower) and | |
| not ("thấp" in gemini_lower or "giả" in gemini_lower or "fake" in gemini_lower)): | |
| # Override with higher confidence ONLY if Gemini says it's real | |
| base_confidence = 0.8 | |
| print("Overriding low DistilBERT confidence due to strong source and Gemini agreement for REAL") | |
| # Special case: If DistilBERT and Gemini both say FAKE, respect that | |
| elif (distilbert_prediction == "FAKE" and | |
| ("thấp" in gemini_lower or "giả" in gemini_lower or "fake" in gemini_lower)): | |
| # Override with low confidence for FAKE | |
| base_confidence = 0.2 | |
| print("Overriding confidence due to DistilBERT and Gemini agreement for FAKE") | |
| # Calculate final confidence | |
| final_confidence = base_confidence + credibility_adjustment + popularity_adjustment + gemini_adjustment + support_adjustment | |
| final_confidence = max(0.05, min(0.95, final_confidence)) | |
| return final_confidence | |
| def analyze_news(news_text): | |
| """Main analysis function combining all three tools""" | |
| try: | |
| if not news_text.strip(): | |
| empty_message = """ | |
| <div style="font-family: 'Segoe UI', Arial, sans-serif; line-height: 1.6; color: #333;"> | |
| ## 📝 **HƯỚNG DẪN SỬ DỤNG** | |
| <div style="background: linear-gradient(135deg, #74b9ff 0%, #0984e3 100%); color: white; padding: 20px; border-radius: 10px; margin: 20px 0; text-align: center;"> | |
| <h2 style="margin: 0; font-size: 24px;">💡 Vui lòng nhập tin tức</h2> | |
| <p style="margin: 10px 0 0 0; font-size: 16px; opacity: 0.9;">Để bắt đầu phân tích</p> | |
| </div> | |
| <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #17a2b8; margin: 10px 0;"> | |
| <p><strong>Hướng dẫn:</strong></p> | |
| <ul> | |
| <li>Nhập tin tức tiếng Việt cần kiểm tra vào ô trên</li> | |
| <li>Nhấn nút "Phân tích với AI nâng cao"</li> | |
| <li>Chờ hệ thống phân tích (có thể mất 10-30 giây)</li> | |
| <li>Xem kết quả phân tích chi tiết</li> | |
| </ul> | |
| </div> | |
| </div> | |
| """ | |
| return gr.update(value=empty_message, visible=True), "Độ chắc chắn là tin thật: 0%", "Độ chắc chắn là tin giả: 0%", gr.update(visible=False) | |
| print(f"Analyzing: {news_text[:50]}...") | |
| # Step 1: Search Google for related information | |
| print("1. Running Google Search...") | |
| try: | |
| search_results = google_search(news_text) | |
| except Exception as e: | |
| print(f"Google Search error: {e}") | |
| search_results = [] | |
| # Step 2: Run our trained model | |
| print("2. Running DistilBERT analysis...") | |
| try: | |
| distilbert_prediction, distilbert_confidence, real_score, fake_score = predict_with_distilbert(news_text) | |
| except Exception as e: | |
| print(f"DistilBERT analysis error: {e}") | |
| distilbert_prediction, distilbert_confidence, real_score, fake_score = None, None, None, None | |
| # Step 3: Check the sources we found | |
| print("3. Analyzing sources and popularity...") | |
| try: | |
| source_credibility, popularity_score, credibility_text = analyze_sources(search_results) | |
| source_support, support_text = analyze_source_support(news_text, search_results) | |
| except Exception as e: | |
| print(f"Source analysis error: {e}") | |
| source_credibility, popularity_score, credibility_text = 0.5, 0.2, "Lỗi phân tích nguồn" | |
| source_support, support_text = 0.5, "Lỗi phân tích hỗ trợ nguồn" | |
| # Step 4: Get Gemini AI analysis | |
| print("4. Running Gemini analysis...") | |
| try: | |
| gemini_analysis = analyze_with_gemini(news_text, search_results, distilbert_prediction, distilbert_confidence) | |
| except Exception as e: | |
| print(f"Gemini analysis error: {e}") | |
| gemini_analysis = f"Lỗi phân tích Gemini: {str(e)}" | |
| # Step 5: Combine everything into final result | |
| print("5. Calculating combined confidence...") | |
| print(f" DistilBERT: {distilbert_prediction} ({distilbert_confidence:.3f})") | |
| print(f" Source credibility: {source_credibility:.3f}") | |
| print(f" Source support: {source_support:.3f}") | |
| print(f" Popularity: {popularity_score:.3f}") | |
| try: | |
| combined_confidence = calculate_combined_confidence( | |
| distilbert_prediction, distilbert_confidence, | |
| source_credibility, popularity_score, gemini_analysis, source_support | |
| ) | |
| print(f" Final combined confidence: {combined_confidence:.3f}") | |
| except Exception as e: | |
| print(f"Confidence calculation error: {e}") | |
| combined_confidence = 0.5 # Default to neutral | |
| # Step 6: Format the final results | |
| real_confidence = combined_confidence | |
| fake_confidence = 1 - combined_confidence | |
| # Build the detailed report with better formatting | |
| prediction_emoji = "✅" if distilbert_prediction == "REAL" else "❌" if distilbert_prediction == "FAKE" else "❓" | |
| confidence_level = "Cao" if combined_confidence > 0.7 else "Trung bình" if combined_confidence > 0.4 else "Thấp" | |
| confidence_emoji = "🟢" if combined_confidence > 0.7 else "🟡" if combined_confidence > 0.4 else "🔴" | |
| # Convert technical metrics to user-friendly Vietnamese | |
| source_quality = "Tốt" if source_credibility > 0.7 else "Trung bình" if source_credibility > 0.4 else "Kém" | |
| source_count_text = f"{len(search_results)} nguồn tin" if len(search_results) > 0 else "Không tìm thấy nguồn" | |
| # Simplify credibility text | |
| if "High credibility" in credibility_text: | |
| credibility_summary = f"✅ Nguồn tin đáng tin cậy" | |
| elif "Medium credibility" in credibility_text: | |
| credibility_summary = f"⚠️ Nguồn tin trung bình" | |
| else: | |
| credibility_summary = f"❌ Nguồn tin kém tin cậy" | |
| # Simplify support text | |
| if "strongly support" in support_text.lower(): | |
| support_summary = "✅ Các nguồn ủng hộ tin tức này" | |
| elif "contradict" in support_text.lower(): | |
| support_summary = "❌ Các nguồn phản bác tin tức này" | |
| else: | |
| support_summary = "⚠️ Các nguồn có ý kiến trái chiều" | |
| detailed_analysis = f""" | |
| <div style="font-family: 'Segoe UI', Arial, sans-serif; line-height: 1.6; color: #333;"> | |
| ## 🔍 **KẾT QUẢ PHÂN TÍCH TIN TỨC** | |
| <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 10px; margin: 20px 0; text-align: center;"> | |
| <h2 style="margin: 0; font-size: 24px;">{prediction_emoji} {'TIN THẬT' if distilbert_prediction == 'REAL' else 'TIN GIẢ' if distilbert_prediction == 'FAKE' else 'KHÔNG XÁC ĐỊNH'}</h2> | |
| <p style="margin: 10px 0 0 0; font-size: 18px; opacity: 0.9;">{confidence_emoji} Độ tin cậy: {confidence_level} ({combined_confidence:.0%})</p> | |
| </div> | |
| ### 🤖 **Phân tích bằng AI** | |
| <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #007bff; margin: 10px 0;"> | |
| <p><strong>Kết quả:</strong> {prediction_emoji} {'Tin tức này có vẻ THẬT' if distilbert_prediction == 'REAL' else 'Tin tức này có vẻ GIẢ' if distilbert_prediction == 'FAKE' else 'Không thể xác định'}</p> | |
| <p><strong>Độ chắc chắn:</strong> {f"{distilbert_confidence:.0%}" if distilbert_confidence else 'Không có'} - {'Rất cao' if distilbert_confidence and distilbert_confidence > 0.8 else 'Cao' if distilbert_confidence and distilbert_confidence > 0.6 else 'Trung bình' if distilbert_confidence and distilbert_confidence > 0.4 else 'Thấp'}</p> | |
| </div> | |
| ### 🌐 **Kiểm tra nguồn tin** | |
| <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #28a745; margin: 10px 0;"> | |
| <p><strong>Tìm thấy:</strong> {source_count_text}</p> | |
| <p><strong>Chất lượng nguồn:</strong> {source_quality} ({source_credibility:.0%})</p> | |
| <p><strong>Đánh giá:</strong> {credibility_summary}</p> | |
| <p><strong>Hỗ trợ:</strong> {support_summary}</p> | |
| </div> | |
| ### 🧠 **Phân tích thông minh** | |
| <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #ffc107; margin: 10px 0;"> | |
| {gemini_analysis} | |
| </div> | |
| ### 📊 **KẾT LUẬN CUỐI CÙNG** | |
| <div style="background: linear-gradient(135deg, #f093fb 0%, #f5576c 100%); color: white; padding: 15px; border-radius: 8px; margin: 20px 0;"> | |
| <p style="margin: 0; font-size: 16px;"><strong>Tin tức này có khả năng {'THẬT' if real_confidence > fake_confidence else 'GIẢ'} với độ tin cậy {max(real_confidence, fake_confidence):.0%}</strong></p> | |
| <p style="margin: 5px 0 0 0; font-size: 14px; opacity: 0.9;">Dựa trên phân tích AI, kiểm tra nguồn tin và đánh giá thông minh</p> | |
| </div> | |
| </div> | |
| """ | |
| return gr.update(value=detailed_analysis, visible=True), f"Độ chắc chắn là tin thật: {real_confidence:.1%}", f"Độ chắc chắn là tin giả: {fake_confidence:.1%}", gr.update(visible=True) | |
| except Exception as e: | |
| error_message = f""" | |
| <div style="font-family: 'Segoe UI', Arial, sans-serif; line-height: 1.6; color: #333;"> | |
| ## ❌ **LỖI PHÂN TÍCH** | |
| <div style="background: linear-gradient(135deg, #ff6b6b 0%, #ee5a24 100%); color: white; padding: 20px; border-radius: 10px; margin: 20px 0; text-align: center;"> | |
| <h2 style="margin: 0; font-size: 24px;">⚠️ Có lỗi xảy ra</h2> | |
| <p style="margin: 10px 0 0 0; font-size: 16px; opacity: 0.9;">Vui lòng thử lại sau</p> | |
| </div> | |
| <div style="background: #f8f9fa; padding: 15px; border-radius: 8px; border-left: 4px solid #dc3545; margin: 10px 0;"> | |
| <p><strong>Chi tiết lỗi:</strong> {str(e)}</p> | |
| <p><strong>Gợi ý:</strong> Kiểm tra kết nối internet và thử lại</p> | |
| </div> | |
| </div> | |
| """ | |
| print(f"Analysis error: {e}") | |
| return gr.update(value=error_message, visible=True), "Độ chắc chắn là tin thật: 0%", "Độ chắc chắn là tin giả: 0%", gr.update(visible=True) | |
| # --- GRADIO INTERFACE --- | |
| def create_interface(): | |
| with gr.Blocks(title="Vietnamese Fake News Detection System", theme=gr.themes.Soft()) as interface: | |
| gr.Markdown(""" | |
| <div style="text-align: center; padding: 20px;"> | |
| <h1 style="color: #2c3e50; margin-bottom: 10px;">🔍 Vietnamese Fake News Detection System</h1> | |
| <p style="color: #7f8c8d; font-size: 16px; margin-bottom: 30px;">Powered by Google Search + Gemini AI + DistilBERT</p> | |
| <div style="background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border-radius: 15px; margin: 20px 0;"> | |
| <h3 style="margin: 0 0 15px 0;">🛡️ Hệ thống phát hiện tin giả tiếng Việt</h3> | |
| <div style="display: flex; justify-content: space-around; flex-wrap: wrap; gap: 15px;"> | |
| <div style="text-align: center;"> | |
| <div style="font-size: 24px; margin-bottom: 5px;">🌐</div> | |
| <strong>Google Search</strong><br> | |
| <small>Tìm kiếm thông tin thực tế</small> | |
| </div> | |
| <div style="text-align: center;"> | |
| <div style="font-size: 24px; margin-bottom: 5px;">🧠</div> | |
| <strong>Gemini AI</strong><br> | |
| <small>Phân tích thông minh</small> | |
| </div> | |
| <div style="text-align: center;"> | |
| <div style="font-size: 24px; margin-bottom: 5px;">🤖</div> | |
| <strong>DistilBERT</strong><br> | |
| <small>AI chuyên tiếng Việt</small> | |
| </div> | |
| </div> | |
| </div> | |
| <div style="background: #f8f9fa; padding: 15px; border-radius: 10px; border-left: 4px solid #17a2b8; margin: 20px 0;"> | |
| <p style="margin: 0; color: #495057;"><strong>💡 Lưu ý:</strong> Kết quả có thể thay đổi nhẹ giữa các lần phân tích do tính chất AI của Gemini, nhưng độ chính xác tổng thể vẫn được đảm bảo.</p> | |
| </div> | |
| </div> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| gr.Markdown("### 📝 Nhập tin tức cần kiểm tra") | |
| news_input = gr.Textbox( | |
| placeholder="Nhập tin tức tiếng Việt cần kiểm tra...", | |
| lines=4, | |
| show_label=False | |
| ) | |
| analyze_btn = gr.Button("🔍 Phân tích với AI nâng cao", variant="primary", size="lg") | |
| with gr.Column(scale=1, visible=False) as results_column: | |
| gr.Markdown("### 📊 Kết quả phân tích") | |
| real_confidence = gr.Label(value="Độ chắc chắn là tin thật: 0%") | |
| fake_confidence = gr.Label(value="Độ chắc chắn là tin giả: 0%") | |
| detailed_analysis = gr.Markdown("### 📋 Phân tích chi tiết sẽ hiển thị ở đây...", visible=False) | |
| # Event handlers | |
| analyze_btn.click( | |
| fn=analyze_news, | |
| inputs=[news_input], | |
| outputs=[detailed_analysis, real_confidence, fake_confidence, results_column] | |
| ) | |
| return interface | |
| def test_google_search(): | |
| """Test Google Search API functionality""" | |
| print("Testing Google Search API...") | |
| print("=" * 50) | |
| # Test queries | |
| test_queries = [ | |
| "Argentina World Cup 2022", | |
| "Vietnam COVID-19 news", | |
| "Tin tức Việt Nam" | |
| ] | |
| results_found = 0 | |
| for i, query in enumerate(test_queries, 1): | |
| print(f"\nTest {i}: '{query}'") | |
| print("-" * 30) | |
| try: | |
| results = google_search(query) | |
| print(f"Results: {len(results)} found") | |
| if results: | |
| results_found += 1 | |
| print(f"First result: {results[0]['title'][:50]}...") | |
| print(f" Link: {results[0]['link']}") | |
| else: | |
| print("No results found") | |
| except Exception as e: | |
| print(f"Error: {e}") | |
| print(f"\nTest Summary: {results_found}/{len(test_queries)} tests passed") | |
| if results_found == 0: | |
| print("\nGoogle Search is not working!") | |
| print("Possible solutions:") | |
| print(" 1. Check API quota in Google Cloud Console") | |
| print(" 2. Verify API keys are correct") | |
| print(" 3. Ensure Custom Search API is enabled") | |
| print(" 4. Check Search Engine ID is valid") | |
| elif results_found < len(test_queries): | |
| print("\nGoogle Search partially working") | |
| print("Some queries work, others don't - check query formatting") | |
| else: | |
| print("\nGoogle Search is working perfectly!") | |
| return results_found > 0 | |
| def test_complete_system(): | |
| """Test the complete fake news detection system""" | |
| print("Testing Complete Vietnamese Fake News Detection System") | |
| print("=" * 60) | |
| # Test cases | |
| test_cases = [ | |
| "Argentina vô địch World Cup 2022", | |
| "Hôm nay trời mưa ở Hà Nội", | |
| "COVID-19 đã được chữa khỏi hoàn toàn" | |
| ] | |
| for i, test_text in enumerate(test_cases, 1): | |
| print(f"\nTest Case {i}: '{test_text}'") | |
| print("-" * 40) | |
| try: | |
| result = analyze_news(test_text) | |
| print("Analysis completed successfully") | |
| print(f"Result type: {type(result)}") | |
| except Exception as e: | |
| print(f"Analysis failed: {e}") | |
| # --- LAUNCH APP --- | |
| if __name__ == "__main__": | |
| print("Starting Vietnamese Fake News Detection System...") | |
| print("Tools integrated: Google Search + Gemini AI + DistilBERT") | |
| # Uncomment the line below to run tests first | |
| # test_google_search() | |
| interface = create_interface() | |
| interface.launch( | |
| server_name="0.0.0.0", | |
| server_port=7860, # Standard port for Hugging Face Spaces | |
| share=False, # Not needed for Hugging Face Spaces | |
| show_error=True | |
| ) |