File size: 4,097 Bytes
cfd4139
 
 
2d5e6ee
 
 
cfd4139
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d5e6ee
cfd4139
2d5e6ee
cfd4139
 
 
 
 
 
 
 
 
2d5e6ee
cfd4139
 
 
 
 
2d5e6ee
 
 
 
 
 
 
 
 
fe0f3db
 
2d5e6ee
b816a05
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
import streamlit as st
import requests
import pandas as pd
import numpy as np

from data import countries, country_ref


# util functions
def get_id(x):
    return int(x.split("/")[-2])


def get_dict(df, col):
    return df[col].to_dict()


def replace_lis_val(df, col):
    def _replace(l):
        return [get_dict(df, col)[i] for i in l]

    return _replace


def mode(lst):
    if lst:
        return max(set(lst), key=lst.count)


@st.cache
def load_data():
    url = "https://www.theolex.io/data"
    validated_filter = "status=V"
    linked_dataset = "include[]=violations.*&include[]=organizations.*&include[]=authorities.*"
    url_d = f"{url}/decisions/?per_page=4000&{validated_filter}&{linked_dataset}"
    response = requests.get(url_d, headers={'authorization': 'Token 8d55a74628aee8122b7a5a1a51f7caad6d613ec1',
                                            'accept': 'application/json'})

    # work on decisions
    return response.json()


def process_data(data):
    decisions = pd.DataFrame(data['decisions'])
    decisions['year'] = pd.to_datetime(decisions['decision_date']).dt.year
    decisions.monetary_sanction = decisions.monetary_sanction.astype(float)
    decisions = decisions[decisions.status == 'V']
    decisions.decision_date = pd.to_datetime(decisions['decision_date']).dt.date
    decisions['id'] = decisions.url.apply(get_id)
    decision_col = ['violations', 'authorities', 'organizations', 'country_of_violation', 'type', 'justice_type',
                    'defendant', 'decision_date', 'monetary_sanction', 'nature_de_sanction', 'violation_theme', 'year']
    decisions = decisions[decision_col]
    decisions = decisions.explode('organizations')

    # work on organisations
    organizations = pd.DataFrame(data['organizations'])
    organizations['id'] = organizations.url.apply(get_id)
    organizations.country = organizations.country.str.lower().str.strip().apply(lambda v: countries.get(v, v))
    organizations = organizations[["id", "name", "company_type", "revenues", "currency", "country", "lei"]]
    organizations['continent'] = organizations.country.apply(lambda v: country_ref.get(v,v))
    organizations.columns = ['org_' + col for col in organizations.columns]
    decisions = decisions.merge(organizations, left_on='organizations', right_on='org_id')
    # remove Individual
    decisions = decisions[decisions.org_company_type != "Individual"]

    # work on authorities
    authorities = pd.DataFrame(data['authorities'])
    authorities.index = authorities.url.apply(get_id)
    authorities = authorities[["country", "type", "name"]]
    authorities.country = authorities.country.str.lower().str.strip().apply(lambda v: countries.get(v, v))

    decisions['authorities_name'] = decisions.authorities.apply(replace_lis_val(authorities, 'name'))
    decisions['authorities_country'] = decisions.authorities.apply(replace_lis_val(authorities, 'country')).apply(mode)

    return decisions, organizations, authorities


def get_monetary_dataframe(decision_scope):
    monetary_decision = decision_scope[decision_scope.monetary_sanction > 0]
    monetary_decision = monetary_decision[monetary_decision.org_revenues != ""]
    monetary_decision['org_revenues'] = monetary_decision.org_revenues.astype(float)
    monetary_decision['log10_org_revenues'] = monetary_decision.org_revenues.apply(np.log10)
    monetary_decision['log10_monetary_sanction'] = monetary_decision.monetary_sanction.apply(np.log10)
    monetary_decision['same_country'] = (monetary_decision.org_country == monetary_decision.authorities_country)
    monetary_decision['monetary_sanction_rate'] = monetary_decision.monetary_sanction/monetary_decision.org_revenues
    monetary_decision['log10_monetary_sanction_rate'] = monetary_decision.monetary_sanction_rate.apply(np.log10)
    return monetary_decision


def get_themes_per_year(monetary_decision):
    #return monetary_decision.groupby(['year', 'violation_theme'])['monetary_sanction'].sum().unstack().fillna(0)
    return monetary_decision.groupby(['year', 'violation_theme'])['monetary_sanction'].sum().reset_index()