Spaces:
Running
Running
anakin87
commited on
Commit
·
408dd7e
1
Parent(s):
274b354
rebuild the ternary plot w cliponaxis=False
Browse files- Rock_fact_checker.py +11 -16
- app_utils/config.py +5 -4
- app_utils/frontend_utils.py +53 -0
- data/statements.txt +1 -1
Rock_fact_checker.py
CHANGED
|
@@ -4,8 +4,6 @@ import logging
|
|
| 4 |
from json import JSONDecodeError
|
| 5 |
|
| 6 |
import streamlit as st
|
| 7 |
-
import pandas as pd
|
| 8 |
-
import plotly.express as px
|
| 9 |
|
| 10 |
from app_utils.backend_utils import load_statements, query
|
| 11 |
from app_utils.frontend_utils import (
|
|
@@ -13,6 +11,7 @@ from app_utils.frontend_utils import (
|
|
| 13 |
reset_results,
|
| 14 |
entailment_html_messages,
|
| 15 |
create_df_for_relevant_snippets,
|
|
|
|
| 16 |
)
|
| 17 |
from app_utils.config import RETRIEVER_TOP_K
|
| 18 |
|
|
@@ -59,12 +58,14 @@ def main():
|
|
| 59 |
# Re-runs the script setting the random statement as the textbox value
|
| 60 |
# Unfortunately necessary as the Random statement button is _below_ the textbox
|
| 61 |
# Adapted for Streamlit>=1.12
|
| 62 |
-
if hasattr(st,
|
| 63 |
-
raise st.scriptrunner.script_runner.RerunException(
|
|
|
|
|
|
|
| 64 |
else:
|
| 65 |
raise st.runtime.scriptrunner.script_runner.RerunException(
|
| 66 |
-
|
| 67 |
-
|
| 68 |
else:
|
| 69 |
st.session_state.random_statement_requested = False
|
| 70 |
run_query = (
|
|
@@ -79,7 +80,7 @@ def main():
|
|
| 79 |
with st.spinner("🧠 Performing neural search on documents..."):
|
| 80 |
try:
|
| 81 |
st.session_state.results = query(statement, RETRIEVER_TOP_K)
|
| 82 |
-
print(
|
| 83 |
time_end = time.time()
|
| 84 |
print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))
|
| 85 |
print(f"elapsed time: {time_end - time_start}")
|
|
@@ -105,20 +106,14 @@ def main():
|
|
| 105 |
|
| 106 |
st.markdown(f"###### Aggregate entailment information:")
|
| 107 |
col1, col2 = st.columns([2, 1])
|
| 108 |
-
|
| 109 |
-
fig =
|
| 110 |
-
df_agg_entailment_info,
|
| 111 |
-
a="contradiction",
|
| 112 |
-
b="neutral",
|
| 113 |
-
c="entailment",
|
| 114 |
-
size="contradiction",
|
| 115 |
-
)
|
| 116 |
with col1:
|
| 117 |
st.plotly_chart(fig, use_container_width=True)
|
| 118 |
with col2:
|
| 119 |
st.write(results["agg_entailment_info"])
|
| 120 |
|
| 121 |
-
st.markdown(f"###### Relevant snippets:")
|
| 122 |
df, urls = create_df_for_relevant_snippets(docs)
|
| 123 |
st.dataframe(df)
|
| 124 |
|
|
|
|
| 4 |
from json import JSONDecodeError
|
| 5 |
|
| 6 |
import streamlit as st
|
|
|
|
|
|
|
| 7 |
|
| 8 |
from app_utils.backend_utils import load_statements, query
|
| 9 |
from app_utils.frontend_utils import (
|
|
|
|
| 11 |
reset_results,
|
| 12 |
entailment_html_messages,
|
| 13 |
create_df_for_relevant_snippets,
|
| 14 |
+
create_ternary_plot,
|
| 15 |
)
|
| 16 |
from app_utils.config import RETRIEVER_TOP_K
|
| 17 |
|
|
|
|
| 58 |
# Re-runs the script setting the random statement as the textbox value
|
| 59 |
# Unfortunately necessary as the Random statement button is _below_ the textbox
|
| 60 |
# Adapted for Streamlit>=1.12
|
| 61 |
+
if hasattr(st, "scriptrunner"):
|
| 62 |
+
raise st.scriptrunner.script_runner.RerunException(
|
| 63 |
+
st.scriptrunner.script_requests.RerunData("")
|
| 64 |
+
)
|
| 65 |
else:
|
| 66 |
raise st.runtime.scriptrunner.script_runner.RerunException(
|
| 67 |
+
st.runtime.scriptrunner.script_requests.RerunData("")
|
| 68 |
+
)
|
| 69 |
else:
|
| 70 |
st.session_state.random_statement_requested = False
|
| 71 |
run_query = (
|
|
|
|
| 80 |
with st.spinner("🧠 Performing neural search on documents..."):
|
| 81 |
try:
|
| 82 |
st.session_state.results = query(statement, RETRIEVER_TOP_K)
|
| 83 |
+
print(statement)
|
| 84 |
time_end = time.time()
|
| 85 |
print(time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()))
|
| 86 |
print(f"elapsed time: {time_end - time_start}")
|
|
|
|
| 106 |
|
| 107 |
st.markdown(f"###### Aggregate entailment information:")
|
| 108 |
col1, col2 = st.columns([2, 1])
|
| 109 |
+
agg_entailment_info = results["agg_entailment_info"]
|
| 110 |
+
fig = create_ternary_plot(agg_entailment_info)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
with col1:
|
| 112 |
st.plotly_chart(fig, use_container_width=True)
|
| 113 |
with col2:
|
| 114 |
st.write(results["agg_entailment_info"])
|
| 115 |
|
| 116 |
+
st.markdown(f"###### Most Relevant snippets:")
|
| 117 |
df, urls = create_df_for_relevant_snippets(docs)
|
| 118 |
st.dataframe(df)
|
| 119 |
|
app_utils/config.py
CHANGED
|
@@ -11,7 +11,8 @@ RETRIEVER_TOP_K = 5
|
|
| 11 |
|
| 12 |
# In HF Space, we use microsoft/deberta-v2-xlarge-mnli
|
| 13 |
# for local testing, a smaller model is better
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
|
|
|
|
|
| 11 |
|
| 12 |
# In HF Space, we use microsoft/deberta-v2-xlarge-mnli
|
| 13 |
# for local testing, a smaller model is better
|
| 14 |
+
try:
|
| 15 |
+
NLI_MODEL = st.secrets["NLI_MODEL"]
|
| 16 |
+
except:
|
| 17 |
+
NLI_MODEL = "valhalla/distilbart-mnli-12-1"
|
| 18 |
+
print(f"Used NLI model: {NLI_MODEL}")
|
app_utils/frontend_utils.py
CHANGED
|
@@ -1,5 +1,7 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
|
|
|
|
|
|
| 3 |
|
| 4 |
entailment_html_messages = {
|
| 5 |
"entailment": 'The knowledge base seems to <span style="color:green">confirm</span> your statement',
|
|
@@ -20,6 +22,57 @@ def reset_results(*args):
|
|
| 20 |
st.session_state.raw_json = None
|
| 21 |
|
| 22 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
def highlight_cols(s):
|
| 24 |
coldict = {"con": "#FFA07A", "neu": "#E5E4E2", "ent": "#a9d39e"}
|
| 25 |
if s.name in coldict.keys():
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
+
import plotly.graph_objects as go
|
| 4 |
+
|
| 5 |
|
| 6 |
entailment_html_messages = {
|
| 7 |
"entailment": 'The knowledge base seems to <span style="color:green">confirm</span> your statement',
|
|
|
|
| 22 |
st.session_state.raw_json = None
|
| 23 |
|
| 24 |
|
| 25 |
+
def create_ternary_plot(entailment_data):
|
| 26 |
+
hover_text = ""
|
| 27 |
+
for label, value in entailment_data.items():
|
| 28 |
+
hover_text += f"{label}: {value}<br>"
|
| 29 |
+
|
| 30 |
+
fig = go.Figure(
|
| 31 |
+
go.Scatterternary(
|
| 32 |
+
{
|
| 33 |
+
"cliponaxis": False,
|
| 34 |
+
"mode": "markers",
|
| 35 |
+
"a": [i for i in map(lambda x: x["entailment"], [entailment_data])],
|
| 36 |
+
"b": [i for i in map(lambda x: x["contradiction"], [entailment_data])],
|
| 37 |
+
"c": [i for i in map(lambda x: x["neutral"], [entailment_data])],
|
| 38 |
+
"hoverinfo": "text",
|
| 39 |
+
"text": hover_text,
|
| 40 |
+
"marker": {
|
| 41 |
+
"color": "#636efa",
|
| 42 |
+
"size": [0.01],
|
| 43 |
+
"sizemode": "area",
|
| 44 |
+
"sizeref": 2.5e-05,
|
| 45 |
+
"symbol": "circle",
|
| 46 |
+
},
|
| 47 |
+
}
|
| 48 |
+
)
|
| 49 |
+
)
|
| 50 |
+
|
| 51 |
+
fig.update_layout(
|
| 52 |
+
{
|
| 53 |
+
"ternary": {
|
| 54 |
+
"sum": 1,
|
| 55 |
+
"aaxis": makeAxis("Entailment", 0),
|
| 56 |
+
"baxis": makeAxis("<br>Contradiction", 45),
|
| 57 |
+
"caxis": makeAxis("<br>Neutral", -45),
|
| 58 |
+
}
|
| 59 |
+
}
|
| 60 |
+
)
|
| 61 |
+
return fig
|
| 62 |
+
|
| 63 |
+
|
| 64 |
+
def makeAxis(title, tickangle):
|
| 65 |
+
return {
|
| 66 |
+
"title": title,
|
| 67 |
+
"titlefont": {"size": 20},
|
| 68 |
+
"tickangle": tickangle,
|
| 69 |
+
"tickcolor": "rgba(0,0,0,0)",
|
| 70 |
+
"ticklen": 5,
|
| 71 |
+
"showline": False,
|
| 72 |
+
"showgrid": True,
|
| 73 |
+
}
|
| 74 |
+
|
| 75 |
+
|
| 76 |
def highlight_cols(s):
|
| 77 |
coldict = {"con": "#FFA07A", "neu": "#E5E4E2", "ent": "#a9d39e"}
|
| 78 |
if s.name in coldict.keys():
|
data/statements.txt
CHANGED
|
@@ -29,7 +29,7 @@ Sum 41 were originally called Kaspir
|
|
| 29 |
Bruce Springsteen has been named "the buzz"
|
| 30 |
Talking Heads collaborated with Brian Eno
|
| 31 |
Chris Cornell took part in Soundgarden and Audioslave
|
| 32 |
-
Chris Cornell
|
| 33 |
"There is a light that never goes out" is a song by The Smiths
|
| 34 |
Guns N' Roses formed in 1987
|
| 35 |
Izzy Stradlin took part in Guns N' Roses
|
|
|
|
| 29 |
Bruce Springsteen has been named "the buzz"
|
| 30 |
Talking Heads collaborated with Brian Eno
|
| 31 |
Chris Cornell took part in Soundgarden and Audioslave
|
| 32 |
+
Chris Cornell was a member of Pearl Jam
|
| 33 |
"There is a light that never goes out" is a song by The Smiths
|
| 34 |
Guns N' Roses formed in 1987
|
| 35 |
Izzy Stradlin took part in Guns N' Roses
|