theolex_streamlit / score_app.py
Jawad's picture
add fields
a67c43f
raw
history blame
2.92 kB
# -*- coding: utf-8 -*-
import streamlit as st
import requests
import pandas as pd
from data import headers
st.title("Production scores")
st.sidebar.title("Parameters")
source_type_value = st.sidebar.selectbox('Authority country', ['all', 'cftc', 'doj', 'cfbp', 'sec'])
created_at = '2021-01-01'
# load data
@st.cache
def load_data(source_type):
def get_decision_hist(d_id):
url = f"https://www.theolex.io/data/decisions/{d_id}/return_hist/"
res = requests.get(url, headers=headers)
return res.json()
url_d = f"https://www.theolex.io/data/data_source/?per_page=4000&"
if source_type != 'all':
url_d = f"{url_d}&source_type={source_type}"
response = requests.get(url_d, headers=headers)
data = response.json()
data_sources = pd.DataFrame(data['data_sources'])
# filter per date
data_sources = data_sources[data_sources.created_at >= created_at]
# get decisions history
data_list = [(_id, get_decision_hist(_id)) for _id in data_sources['decision_id']]
return [(_id, pd.DataFrame(pd.DataFrame(data).fields.to_dict()).T)
for _id, data in data_list if len(data) > 0]
df_list = load_data(source_type_value)
# filter to keep processing -- 45 is the airflow user id
processed_decisions = {}
for decision_id, decision in df_list:
_df = decision[(decision.status == 'P') & (decision.history_user == 45)]
if _df.shape[0] > 0:
processed_decisions[decision_id] = _df
# filter to keep validated
validated_decisions = {}
for decision_id, decision in df_list:
_df = decision[(decision.status == 'V')]
if _df.shape[0] > 0:
validated_decisions[decision_id] = _df
# Intersection of the precessed and validated decisions
scope = list(set(processed_decisions.keys()) & set(validated_decisions.keys()))
st.metric(label="Number of elements", value=len(scope))
# compare fields between processing and validation
all_fields = ['monetary_sanction',
'currency',
'justice_type',
'decision_date',
'defendant',
'monitor',
'nature_de_sanction',
'nature_of_violations',
'reference',
'type',
'country_of_violation']
compare_list = st.sidebar.multiselect('Fields to evaluate',
all_fields, all_fields)
result = {}
details = {}
for decision_id in scope:
# last processed version
p = processed_decisions[decision_id].iloc[-1].to_dict()
# last validated version
v = validated_decisions[decision_id].iloc[-1].to_dict()
details[decision_id] = {col: (p[col], v[col]) for col in compare_list}
result[decision_id] = {col: p[col] == v[col] for col in compare_list}
st.subheader("Accuracy scores:")
st.dataframe(pd.DataFrame(result).T.mean())
st.subheader("fields results:")
st.json(details)
# st.dataframe(pd.DataFrame(details))