Spaces:

anakin87
/

fact-checking-rocks

Running

App Files Files Community

anakin87 commited on Jan 22, 2023

Commit

5b26a96

1 Parent(s): 5fe5c67

add LLM explanation feat

Browse files

Files changed (3) hide show

Rock_fact_checker.py +9 -2
app_utils/backend_utils.py +29 -4
app_utils/config.py +9 -0

Rock_fact_checker.py CHANGED Viewed

@@ -5,7 +5,7 @@ from json import JSONDecodeError
 import streamlit as st
-from app_utils.backend_utils import load_statements, query
 from app_utils.frontend_utils import (
     set_state_if_absent,
     reset_results,
@@ -80,7 +80,7 @@ def main():
         st.session_state.statement = statement
         with st.spinner("🧠 &nbsp;&nbsp; Performing neural search on documents..."):
             try:
-                st.session_state.results = query(statement, RETRIEVER_TOP_K)
                 print(f"S: {statement}")
                 time_end = time.time()
                 print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))
@@ -121,5 +121,12 @@ def main():
             str_wiki_pages += f"[{doc}]({url}) "
         st.markdown(str_wiki_pages)
 main()

 import streamlit as st
+from app_utils.backend_utils import load_statements, check_statement, explain_using_llm
 from app_utils.frontend_utils import (
     set_state_if_absent,
     reset_results,
         st.session_state.statement = statement
         with st.spinner("🧠 &nbsp;&nbsp; Performing neural search on documents..."):
             try:
+                st.session_state.results = check_statement(statement, RETRIEVER_TOP_K)
                 print(f"S: {statement}")
                 time_end = time.time()
                 print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))
             str_wiki_pages += f"[{doc}]({url}) "
         st.markdown(str_wiki_pages)
+        if max_key != "neutral":
+            explanation = explain_using_llm(
+                statement=statement, documents=docs, entailment_or_contradiction=max_key
+            )
+            explanation = "#### Explanation 🧠 (experimental):\n" + explanation
+            st.markdown(explanation)
 main()

app_utils/backend_utils.py CHANGED Viewed

@@ -1,7 +1,9 @@
 import shutil
 from haystack.document_stores import FAISSDocumentStore
-from haystack.nodes import EmbeddingRetriever
 from haystack.pipelines import Pipeline
 import streamlit as st
@@ -12,6 +14,7 @@ from app_utils.config import (
     RETRIEVER_MODEL,
     RETRIEVER_MODEL_FORMAT,
     NLI_MODEL,
 )
@@ -53,15 +56,37 @@ def start_haystack():
     pipe = Pipeline()
     pipe.add_node(component=retriever, name="retriever", inputs=["Query"])
     pipe.add_node(component=entailment_checker, name="ec", inputs=["retriever"])
-    return pipe
-pipe = start_haystack()
 # the pipeline is not included as parameter of the following function,
 # because it is difficult to cache
 @st.cache(allow_output_mutation=True)
-def query(statement: str, retriever_top_k: int = 5):
     """Run query and verify statement"""
     params = {"retriever": {"top_k": retriever_top_k}}
     return pipe.run(statement, params=params)

 import shutil
+from typing import List
+from haystack import Document
 from haystack.document_stores import FAISSDocumentStore
+from haystack.nodes import EmbeddingRetriever, PromptNode
 from haystack.pipelines import Pipeline
 import streamlit as st
     RETRIEVER_MODEL,
     RETRIEVER_MODEL_FORMAT,
     NLI_MODEL,
+    PROMPT_MODEL,
 )
     pipe = Pipeline()
     pipe.add_node(component=retriever, name="retriever", inputs=["Query"])
     pipe.add_node(component=entailment_checker, name="ec", inputs=["retriever"])
+    prompt_node = PromptNode(model_name_or_path=PROMPT_MODEL, max_length=150)
+    return pipe, prompt_node
+pipe, prompt_node = start_haystack()
 # the pipeline is not included as parameter of the following function,
 # because it is difficult to cache
 @st.cache(allow_output_mutation=True)
+def check_statement(statement: str, retriever_top_k: int = 5):
     """Run query and verify statement"""
     params = {"retriever": {"top_k": retriever_top_k}}
     return pipe.run(statement, params=params)
+@st.cache(
+    hash_funcs={"tokenizers.Tokenizer": lambda _: None}, allow_output_mutation=True
+)
+def explain_using_llm(
+    statement: str, documents: List[Document], entailment_or_contradiction: str
+) -> str:
+    """Explain entailment/contradiction, by prompting a LLM"""
+    premise = " \n".join([doc.content.replace("\n", ". ") for doc in documents])
+    if entailment_or_contradiction == "entailment":
+        verb = "entails"
+    elif entailment_or_contradiction == "contradiction":
+        verb = "contradicts"
+    prompt = f"Premise: {premise}; Hypothesis: {statement}; Please explain in detail why the Premise {verb} the Hypothesis. Step by step Explanation:"
+    print(prompt)
+    return prompt_node(prompt)[0]

app_utils/config.py CHANGED Viewed

@@ -14,3 +14,12 @@ try:
 except:
     NLI_MODEL = "valhalla/distilbart-mnli-12-1"
 print(f"Used NLI model: {NLI_MODEL}")

 except:
     NLI_MODEL = "valhalla/distilbart-mnli-12-1"
 print(f"Used NLI model: {NLI_MODEL}")
+# In HF Space, we use google/flan-t5-large
+# for local testing, a smaller model is better
+try:
+    PROMPT_MODEL = st.secrets["PROMPT_MODEL"]
+except:
+    PROMPT_MODEL = "google/flan-t5-small"
+print(f"Used Prompt model: {PROMPT_MODEL}")