Spaces:
Runtime error
Runtime error
File size: 2,284 Bytes
fe0f3db a67c43f fe0f3db a67c43f fe0f3db a67c43f fe0f3db a67c43f fe0f3db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import numpy as np
import pandas as pd
import xgboost as xgb
from xgboost import cv
from sklearn.model_selection import train_test_split
def prepare_data(monetary_decision):
monetary_decision = monetary_decision.reset_index(drop=True)
time = round((monetary_decision.decision_date - monetary_decision.decision_date.min()) / np.timedelta64(1, "M"))
monetary_decision.loc[:, ('time')] = time
col_num = ['log10_org_revenues',
'time']
col_cat = ['authorities_country',
'type',
'violation_theme',
'justice_type',
'org_country',
'org_currency',
'org_continent',
'same_country',
'org_company_type']
predictors = monetary_decision[col_num + col_cat]
target = monetary_decision.log10_monetary_sanction
for col in col_cat:
predictors[col] = predictors[col].astype("category")
return predictors, target
def split(predictors, target):
predictors_train, predictors_test, target_train, target_test = train_test_split(predictors,
target,
test_size=0.2,
random_state=42)
return predictors_train, predictors_test, target_train, target_test
def run_training(predictors_train, predictors_test):
data_train = xgb.DMatrix(predictors_train, label=predictors_test, enable_categorical=True)
params = {'max_depth': 4,
'learning_rate': 0.05,
'colsample_bytree': 0.3,
'subsample': 0.8,
'gamma': 0.5,
'objective': 'reg:squarederror'}
num_round = 1000
xgb_cv = cv(dtrain=data_train, params=params, nfold=3,
num_boost_round=1000, early_stopping_rounds=10, metrics="rmse", as_pandas=True, seed=123)
print(xgb_cv)
return xgb.train(params, data_train, num_round)
def predict(model, predictors):
data = xgb.DMatrix(predictors, enable_categorical=True)
return model.predict(data)
def features_importance(model):
return pd.Series(model.get_score(importance_type='gain')).sort_values() |