Spaces:

TanmayTomar
/

FORENSIC-TOOLKIT

Sleeping

File size: 1,897 Bytes

0cec2c9

import pandas as pd
import faiss
import os
import json

def run_text_pipeline(claim: str, state: dict):
    """

    Executes the text analysis pipeline using pre-loaded tools.

    """
    # Unpack all the necessary tools and data from the state dictionary
    retriever = state['retriever']
    reranker = state['reranker']
    classifier = state['classifier']
    summarizer = state['summarizer']
    fact_checker = state['fact_checker']
    df = state['df']
    evidence_corpus = state['evidence_corpus']
    faiss_index = state['faiss_index']

    # --- RAG Pipeline ---
    retrieved_docs, indices = retriever.retrieve_evidence(claim, faiss_index, evidence_corpus)
    reranked_docs = reranker.rerank_evidendce(claim, retrieved_docs)
    
    if not reranked_docs:
        # --- Fallback to Google Fact Check ---
        print("No results from RAG, trying Google Fact Check...")
        result = fact_checker.check_claim(claim)
        return {
            "final_verdict": result.get('verdict', 'NEUTRAL'),
            "explanation": result.get('summary', 'Could not verify claim.'),
            "source": {result.get('source'): result.get('URLs', ['#'])[0]} if result else {}
        }

    final_verdict, _ = classifier(claim, reranked_docs)
    top_evidence_for_summary = reranked_docs[:3]
    _, explanation = summarizer(claim, top_evidence_for_summary, final_verdict)
    
    # Get sources from the original dataframe
    sources_dict = {}
    if len(indices) > 0 and 'source' in df.columns and 'url' in df.columns:
        df_rel = df.iloc[indices]
        # Handle potential duplicate sources by taking the first URL for each source
        sources_dict = df_rel.groupby('source')['url'].first().to_dict()

    return {
        "final_verdict": final_verdict,
        "explanation": explanation,
        "source": sources_dict
    }