# -*- coding: utf-8 -*- import streamlit as st import requests import pandas as pd from data import headers st.title("Production scores") st.sidebar.title("Parameters") source_type_value = st.sidebar.selectbox('Authority country', ['all', 'cftc', 'doj', 'cfbp', 'sec']) created_at = '2021-01-01' # load data @st.cache def load_data(source_type): def get_decision_hist(d_id): url = f"https://www.theolex.io/data/decisions/{d_id}/return_hist/" res = requests.get(url, headers=headers) return res.json() url_d = f"https://www.theolex.io/data/data_source/?per_page=4000&" if source_type != 'all': url_d = f"{url_d}&source_type={source_type}" response = requests.get(url_d, headers=headers) data = response.json() data_sources = pd.DataFrame(data['data_sources']) # filter per date data_sources = data_sources[data_sources.created_at >= created_at] # get decisions history data_list = [(_id, get_decision_hist(_id)) for _id in data_sources['decision_id']] return [(_id, pd.DataFrame(pd.DataFrame(data).fields.to_dict()).T) for _id, data in data_list if len(data) > 0] df_list = load_data(source_type_value) # filter to keep processing -- 45 is the airflow user id processed_decisions = {} for decision_id, decision in df_list: _df = decision[(decision.status == 'P') & (decision.history_user == 45)] if _df.shape[0] > 0: processed_decisions[decision_id] = _df # filter to keep validated validated_decisions = {} for decision_id, decision in df_list: _df = decision[(decision.status == 'V')] if _df.shape[0] > 0: validated_decisions[decision_id] = _df # Intersection of the precessed and validated decisions scope = list(set(processed_decisions.keys()) & set(validated_decisions.keys())) st.metric(label="Number of elements", value=len(scope)) # compare fields between processing and validation all_fields = ['monetary_sanction', 'currency', 'justice_type', 'decision_date', 'defendant', 'monitor', 'nature_de_sanction', 'nature_of_violations', 'reference', 'type', 'country_of_violation'] compare_list = st.sidebar.multiselect('Fields to evaluate', all_fields, all_fields) result = {} details = {} for decision_id in scope: # last processed version p = processed_decisions[decision_id].iloc[-1].to_dict() # last validated version v = validated_decisions[decision_id].iloc[-1].to_dict() details[decision_id] = {col: (p[col], v[col]) for col in compare_list} result[decision_id] = {col: p[col] == v[col] for col in compare_list} st.subheader("Accuracy scores:") st.dataframe(pd.DataFrame(result).T.mean()) st.subheader("fields results:") st.json(details) # st.dataframe(pd.DataFrame(details))