Spaces:
Runtime error
Runtime error
add exploration
Browse files- exploration_app.py +94 -0
- stream_app.py +1 -0
exploration_app.py
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
import pickle
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import streamlit as st
|
| 5 |
+
from scipy import stats
|
| 6 |
+
|
| 7 |
+
import plotly.express as px
|
| 8 |
+
import plotly.figure_factory as ff
|
| 9 |
+
|
| 10 |
+
import scipy
|
| 11 |
+
import numpy as np
|
| 12 |
+
from data_processing import load_data, process_data, get_monetary_dataframe, get_themes_per_year
|
| 13 |
+
from model import prepare_predictors, prepare_data, run_training, split, predict, features_importance, run_cv_training, \
|
| 14 |
+
automl_training
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
def _max_width_():
|
| 18 |
+
max_width_str = f"max-width: 1500px;"
|
| 19 |
+
st.markdown(
|
| 20 |
+
f"""
|
| 21 |
+
<style>
|
| 22 |
+
.reportview-container .main .block-container{{
|
| 23 |
+
{max_width_str}
|
| 24 |
+
}}
|
| 25 |
+
</style>
|
| 26 |
+
""",
|
| 27 |
+
unsafe_allow_html=True,
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
|
| 31 |
+
# force screen width
|
| 32 |
+
_max_width_()
|
| 33 |
+
|
| 34 |
+
st.title("Data Analysis 🌎 📃")
|
| 35 |
+
st.write("by [Theolex](https://www.theolex.io/)")
|
| 36 |
+
|
| 37 |
+
# load and process data
|
| 38 |
+
data = load_data()
|
| 39 |
+
decisions, organizations, authorities = process_data(data)
|
| 40 |
+
|
| 41 |
+
col1, col2, col3, col4 = st.columns(4)
|
| 42 |
+
|
| 43 |
+
with col1:
|
| 44 |
+
authorities_country = st.selectbox('Authority country', authorities.country.unique())
|
| 45 |
+
|
| 46 |
+
with col2:
|
| 47 |
+
nb_years = st.selectbox('Number of years', range(1, 11), 4)
|
| 48 |
+
|
| 49 |
+
with col3:
|
| 50 |
+
list_continents = decisions.org_continent.unique().tolist()
|
| 51 |
+
org_continent = st.selectbox("Company's continent", list_continents, list_continents.index("europe"))
|
| 52 |
+
|
| 53 |
+
with col4:
|
| 54 |
+
list_company_types = decisions.org_company_type.unique().tolist()
|
| 55 |
+
org_company_type = st.selectbox("Company's activity", list_company_types,
|
| 56 |
+
list_company_types.index("Banking & Finance"))
|
| 57 |
+
|
| 58 |
+
st.subheader(f"Which {authorities_country} regulators and prosecutors have been "
|
| 59 |
+
f"the most active in enforcement actions against {org_continent} "
|
| 60 |
+
f"{org_company_type} companies in the last {nb_years} years?")
|
| 61 |
+
|
| 62 |
+
# apply filters
|
| 63 |
+
select_auth = authorities[authorities.country == authorities_country].name.sort_values()
|
| 64 |
+
authority_filter = decisions.authorities_name.apply(lambda a: bool(set(select_auth) & set(a)))
|
| 65 |
+
year_filter = (decisions.year >= (2021 - nb_years))
|
| 66 |
+
org_continent_filter = (decisions.org_continent == org_continent)
|
| 67 |
+
org_company_type_filter = (decisions.org_company_type == org_company_type)
|
| 68 |
+
decision_scope = decisions[authority_filter & year_filter & org_continent_filter & org_company_type_filter]
|
| 69 |
+
|
| 70 |
+
decision_scope = decision_scope.explode("authorities_name")
|
| 71 |
+
top_auths = decision_scope.groupby(['authorities_name'])['authorities_name'].count().sort_values(ascending=False).head(
|
| 72 |
+
5)
|
| 73 |
+
|
| 74 |
+
fig = px.bar(top_auths,
|
| 75 |
+
template="simple_white",
|
| 76 |
+
color_continuous_scale='RdBu',
|
| 77 |
+
width=1200, height=600)
|
| 78 |
+
st.plotly_chart(fig)
|
| 79 |
+
|
| 80 |
+
with st.expander("Explore cases"):
|
| 81 |
+
st.dataframe(decision_scope[['authorities_name', 'org_name', 'decision_date', 'monetary_sanction', 'org_country',
|
| 82 |
+
'org_company_type']])
|
| 83 |
+
|
| 84 |
+
# st.subheader("What are the top 10 negotiated settlements in France "
|
| 85 |
+
# "(involving French or foreign authorities) in the last 5 years?")
|
| 86 |
+
#
|
| 87 |
+
# st.subheader(
|
| 88 |
+
# "What are the top 3 areas (sanctions, anti-corruption, fraud, market manipulation, tax, etc.) "
|
| 89 |
+
# "of enforcement against banks in Germany in the last 3 years?")
|
| 90 |
+
#
|
| 91 |
+
# st.subheader("What are the largest enforcement actions involving French banks in the last 5 years?")
|
| 92 |
+
#
|
| 93 |
+
# st.subheader(
|
| 94 |
+
# "Which US regulators have imposed the largest penalties against financial institutions in the last 3 years?")
|
stream_app.py
CHANGED
|
@@ -61,6 +61,7 @@ st.subheader("Dataset Description")
|
|
| 61 |
|
| 62 |
st.metric('Number of validated decisions linked to organisations (and not individuals)', decision_scope.shape[0])
|
| 63 |
|
|
|
|
| 64 |
st.metric('Decisions with monetary sanctions',
|
| 65 |
decision_scope[decision_scope.monetary_sanction > 0].shape[0])
|
| 66 |
|
|
|
|
| 61 |
|
| 62 |
st.metric('Number of validated decisions linked to organisations (and not individuals)', decision_scope.shape[0])
|
| 63 |
|
| 64 |
+
|
| 65 |
st.metric('Decisions with monetary sanctions',
|
| 66 |
decision_scope[decision_scope.monetary_sanction > 0].shape[0])
|
| 67 |
|