Spaces:
Runtime error
Runtime error
| # -*- coding: utf-8 -*- | |
| import streamlit as st | |
| import requests | |
| import pandas as pd | |
| from data import headers | |
| st.title("Production scores") | |
| st.sidebar.title("Parameters") | |
| source_type_value = st.sidebar.selectbox('Authority country', ['cftc', 'doj', 'cfbp', 'sec']) | |
| created_at = '2021-01-01' | |
| # load data | |
| def load_data(source_type): | |
| def get_decision_hist(d_id): | |
| url = f"https://www.theolex.io/data/decisions/{d_id}/return_hist/" | |
| res = requests.get(url, headers=headers) | |
| return res.json() | |
| url_d = f"https://www.theolex.io/data/data_source/?source_type={source_type}&per_page=4000&" | |
| response = requests.get(url_d, headers=headers) | |
| data = response.json() | |
| data_sources = pd.DataFrame(data['data_sources']) | |
| # filter per date | |
| data_sources = data_sources[data_sources.created_at >= created_at] | |
| # get decisions history | |
| data_list = [(_id, get_decision_hist(_id)) for _id in data_sources['decision_id']] | |
| return [(_id, pd.DataFrame(pd.DataFrame(data).fields.to_dict()).T) | |
| for _id, data in data_list if len(data) > 0] | |
| df_list = load_data(source_type_value) | |
| # filter to keep processing -- 45 is the airflow user id | |
| processed_decisions = {} | |
| for decision_id, decision in df_list: | |
| _df = decision[(decision.status == 'P') & (decision.history_user == 45)] | |
| if _df.shape[0] > 0: | |
| processed_decisions[decision_id] = _df | |
| # filter to keep validated | |
| validated_decisions = {} | |
| for decision_id, decision in df_list: | |
| _df = decision[(decision.status == 'V')] | |
| if _df.shape[0] > 0: | |
| validated_decisions[decision_id] = _df | |
| # Intersection of the precessed and validated decisions | |
| scope = list(set(processed_decisions.keys()) & set(validated_decisions.keys())) | |
| st.metric(label="Number of elements", value=len(scope)) | |
| # compare fields between processing and validation | |
| compare_list = st.sidebar.multiselect('Fields to evaluate', | |
| ['monetary_sanction', | |
| 'currency', | |
| 'justice_type', | |
| 'decision_date', | |
| 'country_of_violation'], | |
| ['monetary_sanction', 'decision_date']) | |
| result = {} | |
| details = {} | |
| for decision_id in scope: | |
| # last processed version | |
| p = processed_decisions[decision_id].iloc[-1].to_dict() | |
| # last validated version | |
| v = validated_decisions[decision_id].iloc[-1].to_dict() | |
| details[decision_id] = {col: (p[col], v[col]) for col in compare_list} | |
| result[decision_id] = {col: p[col] == v[col] for col in compare_list} | |
| st.subheader("Accuracy scores:") | |
| st.dataframe(pd.DataFrame(result).T.mean()) | |
| st.subheader("fields results:") | |
| st.json(details) | |
| #st.dataframe(pd.DataFrame(details)) |