5#include <MedProcessTools/MedProcessTools/MedProcessUtils.h>
9#include "MedProcessTools/MedProcessTools/MedSamples.h"
19 vector<string> eval_metric;
23 float colsample_bytree;
24 float colsample_bylevel;
26 float scale_pos_weight;
33 string split_penalties;
34 string monotone_constraints;
38 booster =
"gbtree"; objective =
"binary:logistic"; eta = 1.0; gamma = 1.0;
39 min_child_weight = 1; max_depth = 3; num_round = 500; silent = 1; eval_metric.push_back(
"auc"); missing_value = MED_MAT_MISSING_VALUE;
41 colsample_bytree = 1.0; colsample_bylevel = 1.0; subsample = 1.0; scale_pos_weight = 1.0; tree_method =
"auto"; lambda = 1; alpha = 0;
48 ADD_SERIALIZATION_FUNCS(booster, objective, eta, gamma, min_child_weight, max_depth, num_round, eval_metric, silent, missing_value, num_class,
49 colsample_bytree, colsample_bylevel, subsample, scale_pos_weight, tree_method, lambda, alpha, seed, verbose_eval, validate_frac, split_penalties, monotone_constraints)
54 explicit XGBBooster(
const std::vector<std::shared_ptr<xgboost::DMatrix> >& cache_mats)
60 return learner_.get();
63 inline void SetParam(
const std::string& name,
const std::string& val) {
64 auto it = std::find_if(cfg_.begin(), cfg_.end(),
65 [&name, &val](
decltype(*cfg_.begin()) &x) {
66 if (name ==
"eval_metric") {
67 return x.first == name && x.second == val;
69 return x.first == name;
71 if (it == cfg_.end()) {
72 cfg_.push_back(std::make_pair(name, val));
78 learner_->SetParams(cfg_);
79 learner_->Configure();
83 inline void LazyInit() {
85 learner_->SetParams(cfg_);
86 learner_->Configure();
103 std::unique_ptr<xgboost::Learner> learner_;
104 std::vector<std::pair<std::string, std::string> > cfg_;
109 BoosterHandle my_learner = NULL;
112 void init_defaults();
113 int feat_contrib_flags = 0;
114 virtual int init(
void *classifier_params) { this->params = *((
MedXGBParams*)classifier_params);
return 0; };
117 virtual int set_params(map<string, string>& initialization_map);
120 MedXGB() { init_defaults(); };
123 int Learn(
float *x,
float *y,
const float *w,
int nsamples,
int nftrs);
124 int Learn(
float *x,
float *y,
int nsamples,
int nftrs);
125 int Predict(
float *x,
float *&preds,
int nsamples,
int nftrs)
const;
126 void prepare_mat_handle(
float *x,
float *y,
const float *w,
int nsamples,
int nftrs,
DMatrixHandle &matrix_handle);
128 virtual void print(FILE *fp,
const string& prefix,
int level = 0)
const;
130 void calc_feature_importance(vector<float> &features_importance_scores,
131 const string &general_params,
const MedFeatures *features);
138 void export_predictor(
const string &output_fname);
142 void pre_serialization() {
143 const char* out_dptr;
145 string cfg_js =
"{ \"format\":\"json\" }";
146 if (my_learner != NULL) {
148 throw runtime_error(
"failed XGBoosterGetModelRaw\n");
149 serial_xgb.resize(len);
150 memcpy(&serial_xgb[0], out_dptr, len);
156 void post_deserialization() {
157 if (this->my_learner != NULL)
159 if (!serial_xgb.empty()) {
162 throw runtime_error(
"failed XGBoosterCreate\n");
164 throw runtime_error(
"failed XGBoosterLoadModelFromBuffer\n");
169 void prepare_predict_single();
170 void predict_single(
const vector<float> &x, vector<float> &preds)
const;
172 void get_json(
const char ***json,
int& len,
string type) {
173 if (my_learner != NULL) {
178 HMTHROW_AND_ERR(
"Error MedXGB::get_json - can't get model\n");
189 bool _mark_learn_done;
190 bool prepared_single;
191 vector<BoosterHandle> learner_per_thread;
193 void translate_split_penalties(
string& split_penalties_s);
194 void translate_monotone_constraints(
string& monotone_constraints_s);
195 void calc_feature_importance_local(vector<float> &features_importance_scores,
string &importance_type);
196 vector<char> serial_xgb;
MedAlgo - APIs to different algorithms: Linear Models, RF, GBM, KNN, and more.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
Base Interface for predictor.
Definition MedAlgo.h:78
int features_count
The model features count used in Learn, to validate when caling predict.
Definition MedAlgo.h:96
MedPredictorTypes classifier_type
The Predicotr enum type.
Definition MedAlgo.h:80
vector< string > model_features
The model features used in Learn, to validate when caling predict.
Definition MedAlgo.h:93
virtual int set_params(map< string, string > &initialization_map)
The parsed fields from init command.
Definition MedXGB.cpp:438
void calc_feature_contribs(MedMat< float > &x, MedMat< float > &contribs)
Feature contributions explains the prediction on each sample (aka BUT_WHY)
Definition MedXGB.cpp:75
int Predict(float *x, float *&preds, int nsamples, int nftrs) const
Predict should be implemented for each model.
Definition MedXGB.cpp:57
int n_preds_per_sample() const
Number of predictions per sample. typically 1 - but some models return several per sample (for exampl...
Definition MedXGB.cpp:34
int Learn(float *x, float *y, const float *w, int nsamples, int nftrs)
Learn should be implemented for each model.
Definition MedXGB.cpp:152
Definition SerializableObject.h:32
interface of stream I/O for serialization
Definition io.h:30
Internal data structured used by XGBoost during training.
Definition data.h:509
Learner class that does training and prediction. This is the user facing module of xgboost training....
Definition learner.h:65
static Learner * Create(const std::vector< std::shared_ptr< DMatrix > > &cache_data)
Create a new instance of learner.
Definition learner.cc:1485
XGB_DLL int XGBoosterFree(BoosterHandle handle)
free obj in handle
Definition c_api.cc:896
XGB_DLL int XGBoosterDumpModelEx(BoosterHandle handle, const char *fmap, int with_stats, const char *format, bst_ulong *out_len, const char ***out_dump_array)
dump model, return array of strings representing model dump
Definition c_api.cc:1464
XGB_DLL int XGBoosterCreate(const DMatrixHandle dmats[], bst_ulong len, BoosterHandle *out)
create xgboost learner
Definition c_api.cc:882
void * DMatrixHandle
handle to DMatrix
Definition c_api.h:49
XGB_DLL int XGBoosterLoadModelFromBuffer(BoosterHandle handle, const void *buf, bst_ulong len)
load model from in memory buffer
Definition c_api.cc:1277
XGB_DLL int XGBoosterSaveModelToBuffer(BoosterHandle handle, char const *config, bst_ulong *out_len, char const **out_dptr)
Save model into raw bytes, return header of the array. User must copy the result out,...
Definition c_api.cc:1288
Copyright 2015-2023 by XGBoost Contributors.
Copyright 2015-2023 by XGBoost Contributors.
uint64_t bst_ulong
unsigned long integers
Definition base.h:95
Copyright 2015~2023 by XGBoost Contributors.