Spaces:

TanmayTomar
/

FORENSIC-TOOLKIT

Running

App Files Files Community

FORENSIC-TOOLKIT / TEXT_PIPELINE.py

TanmayTomar

Upload 8 files

0cec2c9 verified about 2 months ago

raw

history blame contribute delete

1.9 kB

	import pandas as pd
	import faiss
	import os
	import json

	def run_text_pipeline(claim: str, state: dict):
	"""
	Executes the text analysis pipeline using pre-loaded tools.
	"""
	# Unpack all the necessary tools and data from the state dictionary
	retriever = state['retriever']
	reranker = state['reranker']
	classifier = state['classifier']
	summarizer = state['summarizer']
	fact_checker = state['fact_checker']
	df = state['df']
	evidence_corpus = state['evidence_corpus']
	faiss_index = state['faiss_index']

	# --- RAG Pipeline ---
	retrieved_docs, indices = retriever.retrieve_evidence(claim, faiss_index, evidence_corpus)
	reranked_docs = reranker.rerank_evidendce(claim, retrieved_docs)

	if not reranked_docs:
	# --- Fallback to Google Fact Check ---
	print("No results from RAG, trying Google Fact Check...")
	result = fact_checker.check_claim(claim)
	return {
	"final_verdict": result.get('verdict', 'NEUTRAL'),
	"explanation": result.get('summary', 'Could not verify claim.'),
	"source": {result.get('source'): result.get('URLs', ['#'])[0]} if result else {}
	}

	final_verdict, _ = classifier(claim, reranked_docs)
	top_evidence_for_summary = reranked_docs[:3]
	_, explanation = summarizer(claim, top_evidence_for_summary, final_verdict)

	# Get sources from the original dataframe
	sources_dict = {}
	if len(indices) > 0 and 'source' in df.columns and 'url' in df.columns:
	df_rel = df.iloc[indices]
	# Handle potential duplicate sources by taking the first URL for each source
	sources_dict = df_rel.groupby('source')['url'].first().to_dict()

	return {
	"final_verdict": final_verdict,
	"explanation": explanation,
	"source": sources_dict
	}