-
728x90
xgboost
# xgb.DMatrix -> xgb.train -> xgb.train.predict -> xgb.plot_importance
xgb.DMatrix(train_df, train_y)
xgb.train(매개변수리스트, DMatrix로 변환한 train, DMatrix로 변환한 train/test리스트)
xgb.train.predict(DMatrix로 변환한 test)plst = list(param.items()) # ['multi:softprob', 0.1, 3, 1, 3, 'mlogloss', 1, 0.8, 0.3, 0]
xgtrain = xgb.DMatrix(train_X, label=train_y) # dev_X는 (15663rows×8columns)인 df, dev_y는 15663인 series
xgtest = xgb.DMatrix(test_X, label=test_y) # val_X는 (3916rows×8columns)인 df, val_y는 3916인 series
watchlist = [ (xgtrain,'train'), (xgtest, 'test') ]
model = xgb.train(plst, xgtrain, num_rounds, watchlist, early_stopping_rounds=50, verbose_eval=20)
pred_test_y = model.predict(xgtest, ntree_limit = model.best_ntree_limit)cv_scores = []
cv_scores.append(metrics.log_loss(val_y, pred_val_y)) # (실제값, 예측값) 3916*5xgb.plot_importance(model, max_num_features=50, height=0.8, ax=ax)
[:, np.newaxis] # arrray 차원 1개늘리기
2022-05-20 simple-feature-engg-notebook-spooky-author.ipynb0.43MB728x90