Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import requests | |
| import pandas as pd | |
| import numpy as np | |
| from data import countries, country_ref | |
| # util functions | |
| def get_id(x): | |
| return int(x.split("/")[-2]) | |
| def get_dict(df, col): | |
| return df[col].to_dict() | |
| def replace_lis_val(df, col): | |
| def _replace(l): | |
| return [get_dict(df, col)[i] for i in l] | |
| return _replace | |
| def mode(lst): | |
| if lst: | |
| return max(set(lst), key=lst.count) | |
| def load_data(): | |
| url = "https://www.theolex.io/data" | |
| validated_filter = "status=V" | |
| linked_dataset = "include[]=violations.*&include[]=organizations.*&include[]=authorities.*" | |
| url_d = f"{url}/decisions/?per_page=4000&{validated_filter}&{linked_dataset}" | |
| response = requests.get(url_d, headers={'authorization': 'Token 8d55a74628aee8122b7a5a1a51f7caad6d613ec1', | |
| 'accept': 'application/json'}) | |
| # work on decisions | |
| return response.json() | |
| def process_data(data): | |
| decisions = pd.DataFrame(data['decisions']) | |
| decisions['year'] = pd.to_datetime(decisions['decision_date']).dt.year | |
| decisions.monetary_sanction = decisions.monetary_sanction.astype(float) | |
| decisions = decisions[decisions.status == 'V'] | |
| decisions.decision_date = pd.to_datetime(decisions['decision_date']).dt.date | |
| decisions['id'] = decisions.url.apply(get_id) | |
| decision_col = ['violations', 'authorities', 'organizations', 'country_of_violation', 'type', 'justice_type', | |
| 'defendant', 'decision_date', 'monetary_sanction', 'nature_de_sanction', 'violation_theme', 'year'] | |
| decisions = decisions[decision_col] | |
| decisions = decisions.explode('organizations') | |
| # work on organisations | |
| organizations = pd.DataFrame(data['organizations']) | |
| organizations['id'] = organizations.url.apply(get_id) | |
| organizations.country = organizations.country.str.lower().str.strip().apply(lambda v: countries.get(v, v)) | |
| organizations = organizations[["id", "name", "company_type", "revenues", "currency", "country", "lei"]] | |
| organizations['continent'] = organizations.country.apply(lambda v: country_ref.get(v,v)) | |
| organizations.columns = ['org_' + col for col in organizations.columns] | |
| decisions = decisions.merge(organizations, left_on='organizations', right_on='org_id') | |
| # remove Individual | |
| decisions = decisions[decisions.org_company_type != "Individual"] | |
| # work on authorities | |
| authorities = pd.DataFrame(data['authorities']) | |
| authorities.index = authorities.url.apply(get_id) | |
| authorities = authorities[["country", "type", "name"]] | |
| authorities.country = authorities.country.str.lower().str.strip().apply(lambda v: countries.get(v, v)) | |
| decisions['authorities_name'] = decisions.authorities.apply(replace_lis_val(authorities, 'name')) | |
| decisions['authorities_country'] = decisions.authorities.apply(replace_lis_val(authorities, 'country')).apply(mode) | |
| return decisions, organizations, authorities | |
| def get_monetary_dataframe(decision_scope): | |
| monetary_decision = decision_scope[decision_scope.monetary_sanction > 0] | |
| monetary_decision = monetary_decision[monetary_decision.org_revenues != ""] | |
| monetary_decision['org_revenues'] = monetary_decision.org_revenues.astype(float) | |
| monetary_decision['log10_org_revenues'] = monetary_decision.org_revenues.apply(np.log10) | |
| monetary_decision['log10_monetary_sanction'] = monetary_decision.monetary_sanction.apply(np.log10) | |
| monetary_decision['same_country'] = (monetary_decision.org_country == monetary_decision.authorities_country) | |
| monetary_decision['monetary_sanction_rate'] = monetary_decision.monetary_sanction/monetary_decision.org_revenues | |
| monetary_decision['log10_monetary_sanction_rate'] = monetary_decision.monetary_sanction_rate.apply(np.log10) | |
| return monetary_decision | |
| def get_themes_per_year(monetary_decision): | |
| #return monetary_decision.groupby(['year', 'violation_theme'])['monetary_sanction'].sum().unstack().fillna(0) | |
| return monetary_decision.groupby(['year', 'violation_theme'])['monetary_sanction'].sum().reset_index() |