Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| import streamlit as st | |
| import requests | |
| import pandas as pd | |
| import numpy as np | |
| import datetime | |
| from data import headers | |
| st.title("Production scores") | |
| st.sidebar.title("Parameters") | |
| source_type_value = st.sidebar.selectbox('Authority country', ['all', 'cftc', 'doj', 'cfbp', 'sec']) | |
| created_at = st.sidebar.date_input('Date input', value=datetime.date(2021, 1, 1), | |
| min_value=datetime.date(2020, 1, 1), | |
| max_value=datetime.date(2022, 1, 1)) | |
| # load data | |
| def load_data(source_type, start_date): | |
| def get_decision_hist(d_id): | |
| url = f"https://www.theolex.io/data/decisions/{int(d_id)}/return_hist/" | |
| res = requests.get(url, headers=headers) | |
| return res.json() | |
| url_d = f"https://www.theolex.io/data/data_source/?per_page=4000&" | |
| if source_type != 'all': | |
| url_d = f"{url_d}&source_type={source_type}" | |
| response = requests.get(url_d, headers=headers) | |
| data = response.json() | |
| data_sources = pd.DataFrame(data['data_sources']) | |
| # filter per date | |
| data_sources['created_at'] = pd.to_datetime(data_sources['created_at']).dt.date | |
| data_sources = data_sources[data_sources.created_at >= start_date] | |
| # get decisions history | |
| # can be optimized by filtering first on validated decision for decision table | |
| data_list = [(_id, get_decision_hist(_id)) for _id in data_sources['decision_id'] if not np.isnan(_id)] | |
| return [(_id, pd.DataFrame(pd.DataFrame(data).fields.to_dict()).T) | |
| for _id, data in data_list if len(data) > 0] | |
| df_list = load_data(source_type_value, start_date=created_at) | |
| # filter to keep processing -- 45 is the airflow user id | |
| processed_decisions = {} | |
| for decision_id, decision in df_list: | |
| _df = decision[(decision.status == 'P') & (decision.history_user == 45)] | |
| if _df.shape[0] > 0: | |
| processed_decisions[decision_id] = _df | |
| # filter to keep validated | |
| validated_decisions = {} | |
| for decision_id, decision in df_list: | |
| _df = decision[(decision.status == 'V')] | |
| if _df.shape[0] > 0: | |
| validated_decisions[decision_id] = _df | |
| # Intersection of the precessed and validated decisions | |
| scope = list(set(processed_decisions.keys()) & set(validated_decisions.keys())) | |
| st.metric(label="Number of elements", value=len(scope)) | |
| # compare fields between processing and validation | |
| all_fields = ['monetary_sanction', | |
| 'currency', | |
| 'justice_type', | |
| 'decision_date', | |
| 'defendant', | |
| 'monitor', | |
| 'nature_de_sanction', | |
| 'nature_of_violations', | |
| 'reference', | |
| 'type', | |
| 'country_of_violation'] | |
| compare_list = st.sidebar.multiselect('Fields to evaluate', | |
| all_fields, all_fields) | |
| result = {} | |
| details = {} | |
| for decision_id in scope: | |
| # last processed version | |
| p = processed_decisions[decision_id].iloc[-1].to_dict() | |
| # last validated version | |
| v = validated_decisions[decision_id].iloc[-1].to_dict() | |
| details[decision_id] = {col: (p[col], v[col]) for col in compare_list} | |
| result[decision_id] = {col: p[col] == v[col] for col in compare_list} | |
| st.subheader("Accuracy scores:") | |
| tab_accuracy = pd.DataFrame(result).T.mean().sort_values(ascending=False) | |
| tab_accuracy = tab_accuracy.to_frame().rename(columns={0: "Accuracy"}).rename_axis("fields") | |
| st.dataframe(tab_accuracy, width=400, height=1000) | |
| st.subheader("fields results:") | |
| st.json(details) | |
| # st.dataframe(pd.DataFrame(details)) | |