Jawad commited on
Commit
a61cdb6
·
1 Parent(s): 8fc52b2

add exploration

Browse files
Files changed (2) hide show
  1. exploration_app.py +94 -0
  2. stream_app.py +1 -0
exploration_app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ import pickle
3
+ import pandas as pd
4
+ import streamlit as st
5
+ from scipy import stats
6
+
7
+ import plotly.express as px
8
+ import plotly.figure_factory as ff
9
+
10
+ import scipy
11
+ import numpy as np
12
+ from data_processing import load_data, process_data, get_monetary_dataframe, get_themes_per_year
13
+ from model import prepare_predictors, prepare_data, run_training, split, predict, features_importance, run_cv_training, \
14
+ automl_training
15
+
16
+
17
+ def _max_width_():
18
+ max_width_str = f"max-width: 1500px;"
19
+ st.markdown(
20
+ f"""
21
+ <style>
22
+ .reportview-container .main .block-container{{
23
+ {max_width_str}
24
+ }}
25
+ </style>
26
+ """,
27
+ unsafe_allow_html=True,
28
+ )
29
+
30
+
31
+ # force screen width
32
+ _max_width_()
33
+
34
+ st.title("Data Analysis 🌎 📃")
35
+ st.write("by [Theolex](https://www.theolex.io/)")
36
+
37
+ # load and process data
38
+ data = load_data()
39
+ decisions, organizations, authorities = process_data(data)
40
+
41
+ col1, col2, col3, col4 = st.columns(4)
42
+
43
+ with col1:
44
+ authorities_country = st.selectbox('Authority country', authorities.country.unique())
45
+
46
+ with col2:
47
+ nb_years = st.selectbox('Number of years', range(1, 11), 4)
48
+
49
+ with col3:
50
+ list_continents = decisions.org_continent.unique().tolist()
51
+ org_continent = st.selectbox("Company's continent", list_continents, list_continents.index("europe"))
52
+
53
+ with col4:
54
+ list_company_types = decisions.org_company_type.unique().tolist()
55
+ org_company_type = st.selectbox("Company's activity", list_company_types,
56
+ list_company_types.index("Banking & Finance"))
57
+
58
+ st.subheader(f"Which {authorities_country} regulators and prosecutors have been "
59
+ f"the most active in enforcement actions against {org_continent} "
60
+ f"{org_company_type} companies in the last {nb_years} years?")
61
+
62
+ # apply filters
63
+ select_auth = authorities[authorities.country == authorities_country].name.sort_values()
64
+ authority_filter = decisions.authorities_name.apply(lambda a: bool(set(select_auth) & set(a)))
65
+ year_filter = (decisions.year >= (2021 - nb_years))
66
+ org_continent_filter = (decisions.org_continent == org_continent)
67
+ org_company_type_filter = (decisions.org_company_type == org_company_type)
68
+ decision_scope = decisions[authority_filter & year_filter & org_continent_filter & org_company_type_filter]
69
+
70
+ decision_scope = decision_scope.explode("authorities_name")
71
+ top_auths = decision_scope.groupby(['authorities_name'])['authorities_name'].count().sort_values(ascending=False).head(
72
+ 5)
73
+
74
+ fig = px.bar(top_auths,
75
+ template="simple_white",
76
+ color_continuous_scale='RdBu',
77
+ width=1200, height=600)
78
+ st.plotly_chart(fig)
79
+
80
+ with st.expander("Explore cases"):
81
+ st.dataframe(decision_scope[['authorities_name', 'org_name', 'decision_date', 'monetary_sanction', 'org_country',
82
+ 'org_company_type']])
83
+
84
+ # st.subheader("What are the top 10 negotiated settlements in France "
85
+ # "(involving French or foreign authorities) in the last 5 years?")
86
+ #
87
+ # st.subheader(
88
+ # "What are the top 3 areas (sanctions, anti-corruption, fraud, market manipulation, tax, etc.) "
89
+ # "of enforcement against banks in Germany in the last 3 years?")
90
+ #
91
+ # st.subheader("What are the largest enforcement actions involving French banks in the last 5 years?")
92
+ #
93
+ # st.subheader(
94
+ # "Which US regulators have imposed the largest penalties against financial institutions in the last 3 years?")
stream_app.py CHANGED
@@ -61,6 +61,7 @@ st.subheader("Dataset Description")
61
 
62
  st.metric('Number of validated decisions linked to organisations (and not individuals)', decision_scope.shape[0])
63
 
 
64
  st.metric('Decisions with monetary sanctions',
65
  decision_scope[decision_scope.monetary_sanction > 0].shape[0])
66
 
 
61
 
62
  st.metric('Number of validated decisions linked to organisations (and not individuals)', decision_scope.shape[0])
63
 
64
+
65
  st.metric('Decisions with monetary sanctions',
66
  decision_scope[decision_scope.monetary_sanction > 0].shape[0])
67