import streamlit as st import requests import pandas as pd import numpy as np from data import countries, country_ref # util functions def get_id(x): return int(x.split("/")[-2]) def get_dict(df, col): return df[col].to_dict() def replace_lis_val(df, col): def _replace(l): return [get_dict(df, col)[i] for i in l] return _replace def mode(lst): if lst: return max(set(lst), key=lst.count) @st.cache def load_data(): url = "https://www.theolex.io/data" validated_filter = "status=V" linked_dataset = "include[]=violations.*&include[]=organizations.*&include[]=authorities.*" url_d = f"{url}/decisions/?per_page=4000&{validated_filter}&{linked_dataset}" response = requests.get(url_d, headers={'authorization': 'Token 8d55a74628aee8122b7a5a1a51f7caad6d613ec1', 'accept': 'application/json'}) # work on decisions return response.json() def process_data(data): decisions = pd.DataFrame(data['decisions']) decisions['year'] = pd.to_datetime(decisions['decision_date']).dt.year decisions.monetary_sanction = decisions.monetary_sanction.astype(float) # keep validated decisions decisions = decisions[decisions.status == 'V'] decisions.decision_date = pd.to_datetime(decisions['decision_date']).dt.date decisions['id'] = decisions.url.apply(get_id) decision_col = ['violations', 'authorities', 'organizations', 'country_of_violation', 'type', 'justice_type', 'defendant', 'decision_date', 'monetary_sanction', 'nature_de_sanction', 'violation_theme', 'year'] decisions = decisions[decision_col] decisions = decisions.explode('organizations') # work on organisations organizations = pd.DataFrame(data['organizations']) organizations['id'] = organizations.url.apply(get_id) organizations.country = organizations.country.str.lower().str.strip().apply(lambda v: countries.get(v, v)) organizations = organizations[["id", "name", "company_type", "revenues", "currency", "country", "lei"]] organizations['continent'] = organizations.country.apply(lambda v: country_ref.get(v,v)) organizations.columns = ['org_' + col for col in organizations.columns] decisions = decisions.merge(organizations, left_on='organizations', right_on='org_id') # remove Individual decisions = decisions[decisions.org_company_type != "Individual"] # work on authorities authorities = pd.DataFrame(data['authorities']) authorities.index = authorities.url.apply(get_id) authorities = authorities[["country", "type", "name"]] authorities.country = authorities.country.str.lower().str.strip().apply(lambda v: countries.get(v, v)) decisions['authorities_name'] = decisions.authorities.apply(replace_lis_val(authorities, 'name')) decisions['authorities_country'] = decisions.authorities.apply(replace_lis_val(authorities, 'country')).apply(mode) return decisions, organizations, authorities def get_monetary_dataframe(decision_scope): monetary_decision = decision_scope[decision_scope.monetary_sanction > 0] monetary_decision['has_revenues'] = (monetary_decision.org_revenues != "") monetary_decision['org_revenues'] = monetary_decision.org_revenues.str.replace('', '0').astype(float) monetary_decision['log10_org_revenues'] = monetary_decision.org_revenues.apply(lambda x: np.log10(x+1)) monetary_decision['log10_monetary_sanction'] = monetary_decision.monetary_sanction.apply(lambda x: np.log10(x+1)) monetary_decision['same_country'] = (monetary_decision.org_country == monetary_decision.authorities_country) monetary_decision['monetary_sanction_rate'] = monetary_decision.monetary_sanction/monetary_decision.org_revenues monetary_decision['log10_monetary_sanction_rate'] = monetary_decision.monetary_sanction_rate.apply(np.log10) return monetary_decision def get_themes_per_year(monetary_decision): #return monetary_decision.groupby(['year', 'violation_theme'])['monetary_sanction'].sum().unstack().fillna(0) return monetary_decision.groupby(['year', 'violation_theme'])['monetary_sanction'].sum().reset_index()