Spaces:
Running
Running
| import pandas as pd | |
| import faiss | |
| import os | |
| import json | |
| def run_text_pipeline(claim: str, state: dict): | |
| """ | |
| Executes the text analysis pipeline using pre-loaded tools. | |
| """ | |
| # Unpack all the necessary tools and data from the state dictionary | |
| retriever = state['retriever'] | |
| reranker = state['reranker'] | |
| classifier = state['classifier'] | |
| summarizer = state['summarizer'] | |
| fact_checker = state['fact_checker'] | |
| df = state['df'] | |
| evidence_corpus = state['evidence_corpus'] | |
| faiss_index = state['faiss_index'] | |
| # --- RAG Pipeline --- | |
| retrieved_docs, indices = retriever.retrieve_evidence(claim, faiss_index, evidence_corpus) | |
| reranked_docs = reranker.rerank_evidendce(claim, retrieved_docs) | |
| if not reranked_docs: | |
| # --- Fallback to Google Fact Check --- | |
| print("No results from RAG, trying Google Fact Check...") | |
| result = fact_checker.check_claim(claim) | |
| return { | |
| "final_verdict": result.get('verdict', 'NEUTRAL'), | |
| "explanation": result.get('summary', 'Could not verify claim.'), | |
| "source": {result.get('source'): result.get('URLs', ['#'])[0]} if result else {} | |
| } | |
| final_verdict, _ = classifier(claim, reranked_docs) | |
| top_evidence_for_summary = reranked_docs[:3] | |
| _, explanation = summarizer(claim, top_evidence_for_summary, final_verdict) | |
| # Get sources from the original dataframe | |
| sources_dict = {} | |
| if len(indices) > 0 and 'source' in df.columns and 'url' in df.columns: | |
| df_rel = df.iloc[indices] | |
| # Handle potential duplicate sources by taking the first URL for each source | |
| sources_dict = df_rel.groupby('source')['url'].first().to_dict() | |
| return { | |
| "final_verdict": final_verdict, | |
| "explanation": explanation, | |
| "source": sources_dict | |
| } | |