# -*- coding: utf-8 -*- import pickle import pandas as pd import streamlit as st from scipy import stats import plotly.express as px import plotly.figure_factory as ff import scipy import numpy as np from data_processing import load_data, process_data, get_monetary_dataframe, get_themes_per_year from model import prepare_predictors, prepare_data, run_training, split, predict, features_importance, run_cv_training, \ automl_training def _max_width_(): max_width_str = f"max-width: 1500px;" st.markdown( f""" """, unsafe_allow_html=True, ) # force screen width _max_width_() st.title("Data Analysis 🌎 📃") st.write("by [Theolex](https://www.theolex.io/)") # load and process data data = load_data() decisions, organizations, authorities = process_data(data) col1, col2, col3, col4 = st.columns(4) with col1: authorities_country = st.selectbox('Authority country', authorities.country.unique()) with col2: nb_years = st.selectbox('Number of years', range(1, 11), 4) with col3: list_continents = decisions.org_continent.unique().tolist() org_continent = st.selectbox("Company's continent", list_continents, list_continents.index("europe")) with col4: list_company_types = decisions.org_company_type.unique().tolist() org_company_type = st.selectbox("Company's activity", list_company_types, list_company_types.index("Banking & Finance")) st.subheader(f"Which {authorities_country} regulators and prosecutors have been " f"the most active in enforcement actions against {org_continent} " f"{org_company_type} companies in the last {nb_years} years?") # apply filters select_auth = authorities[authorities.country == authorities_country].name.sort_values() authority_filter = decisions.authorities_name.apply(lambda a: bool(set(select_auth) & set(a))) year_filter = (decisions.year >= (2021 - nb_years)) org_continent_filter = (decisions.org_continent == org_continent) org_company_type_filter = (decisions.org_company_type == org_company_type) decision_scope = decisions[authority_filter & year_filter & org_continent_filter & org_company_type_filter] decision_scope = decision_scope.explode("authorities_name") top_auths = decision_scope.groupby(['authorities_name'])['authorities_name'].count().sort_values(ascending=False).head( 5) fig = px.bar(top_auths, template="simple_white", color_continuous_scale='RdBu', width=1200, height=600) st.plotly_chart(fig) with st.expander("Explore cases"): st.dataframe(decision_scope[['authorities_name', 'org_name', 'decision_date', 'monetary_sanction', 'org_country', 'org_company_type']]) # st.subheader("What are the top 10 negotiated settlements in France " # "(involving French or foreign authorities) in the last 5 years?") # # st.subheader( # "What are the top 3 areas (sanctions, anti-corruption, fraud, market manipulation, tax, etc.) " # "of enforcement against banks in Germany in the last 3 years?") # # st.subheader("What are the largest enforcement actions involving French banks in the last 5 years?") # # st.subheader( # "Which US regulators have imposed the largest penalties against financial institutions in the last 3 years?")