theolex_streamlit / exploration_app.py
Jawad's picture
add exploration
a61cdb6
# -*- coding: utf-8 -*-
import pickle
import pandas as pd
import streamlit as st
from scipy import stats
import plotly.express as px
import plotly.figure_factory as ff
import scipy
import numpy as np
from data_processing import load_data, process_data, get_monetary_dataframe, get_themes_per_year
from model import prepare_predictors, prepare_data, run_training, split, predict, features_importance, run_cv_training, \
automl_training
def _max_width_():
max_width_str = f"max-width: 1500px;"
st.markdown(
f"""
<style>
.reportview-container .main .block-container{{
{max_width_str}
}}
</style>
""",
unsafe_allow_html=True,
)
# force screen width
_max_width_()
st.title("Data Analysis 🌎 πŸ“ƒ")
st.write("by [Theolex](https://www.theolex.io/)")
# load and process data
data = load_data()
decisions, organizations, authorities = process_data(data)
col1, col2, col3, col4 = st.columns(4)
with col1:
authorities_country = st.selectbox('Authority country', authorities.country.unique())
with col2:
nb_years = st.selectbox('Number of years', range(1, 11), 4)
with col3:
list_continents = decisions.org_continent.unique().tolist()
org_continent = st.selectbox("Company's continent", list_continents, list_continents.index("europe"))
with col4:
list_company_types = decisions.org_company_type.unique().tolist()
org_company_type = st.selectbox("Company's activity", list_company_types,
list_company_types.index("Banking & Finance"))
st.subheader(f"Which {authorities_country} regulators and prosecutors have been "
f"the most active in enforcement actions against {org_continent} "
f"{org_company_type} companies in the last {nb_years} years?")
# apply filters
select_auth = authorities[authorities.country == authorities_country].name.sort_values()
authority_filter = decisions.authorities_name.apply(lambda a: bool(set(select_auth) & set(a)))
year_filter = (decisions.year >= (2021 - nb_years))
org_continent_filter = (decisions.org_continent == org_continent)
org_company_type_filter = (decisions.org_company_type == org_company_type)
decision_scope = decisions[authority_filter & year_filter & org_continent_filter & org_company_type_filter]
decision_scope = decision_scope.explode("authorities_name")
top_auths = decision_scope.groupby(['authorities_name'])['authorities_name'].count().sort_values(ascending=False).head(
5)
fig = px.bar(top_auths,
template="simple_white",
color_continuous_scale='RdBu',
width=1200, height=600)
st.plotly_chart(fig)
with st.expander("Explore cases"):
st.dataframe(decision_scope[['authorities_name', 'org_name', 'decision_date', 'monetary_sanction', 'org_country',
'org_company_type']])
# st.subheader("What are the top 10 negotiated settlements in France "
# "(involving French or foreign authorities) in the last 5 years?")
#
# st.subheader(
# "What are the top 3 areas (sanctions, anti-corruption, fraud, market manipulation, tax, etc.) "
# "of enforcement against banks in Germany in the last 3 years?")
#
# st.subheader("What are the largest enforcement actions involving French banks in the last 5 years?")
#
# st.subheader(
# "Which US regulators have imposed the largest penalties against financial institutions in the last 3 years?")