8#if __GNUC__ >= 5 || (defined(_MSC_VER) && !defined(_DEBUG))
9#define NEW_COMPLIER false
11#define NEW_COMPLIER false
15#include <MedUtils/MedUtils/MedUtils.h>
16#include <MedStat/MedStat/MedStat.h>
17#include <MedFeat/MedFeat/MedFeat.h>
18#include <QRF/QRF/QRF.h>
19#include <micNet/micNet/micNet.h>
22#include <MedProcessTools/MedProcessTools/MedProcessUtils.h>
24#include <TQRF/TQRF/TQRF.h>
26#include <unordered_map>
34#pragma warning(disable: 4297)
101 virtual int init(
void *classifier_params) {
return 0; };
102 int init_from_string(
string initialization_text);
103 virtual int init(map<string, string>& mapper);
104 virtual int set_params(map<string, string>& mapper) { fprintf(stderr,
"????? Using the base class set_params() ?????\n"); fflush(stderr);
return 0; };
105 virtual void init_defaults() {};
111 virtual int Learn(
float *x,
float *y,
const float *w,
int n_samples,
int n_ftrs) {
return 0; };
116 virtual int Predict(
float *x,
float *&preds,
int n_samples,
int n_ftrs)
const {
return 0; }
119 virtual void print(FILE *fp,
const string& prefix,
int level = 0)
const;
124 virtual int denormalize_model(
float *f_avg,
float *f_std,
float label_avg,
float label_std) {
return 0; };
130 int learn(
float *x,
float *y,
int nsamples,
int nftrs) {
return Learn(x, y, NULL, nsamples, nftrs); }
142 int learn(
MedMat<float> &x, vector<float> &y,
const vector<float> &wgts);
147 int learn(vector<float> &x, vector<float> &y,
const vector<float> &wgts,
int n_samples,
int n_ftrs);
149 int learn(vector<float> &x, vector<float> &y,
int n_samples,
int n_ftrs) { vector<float> w;
return learn(x, y, w, n_samples, n_ftrs); }
152 virtual int predict(
MedMat<float> &x, vector<float> &preds)
const;
153 int predict(vector<float> &x, vector<float> &preds,
int n_samples,
int n_ftrs)
const;
154 int threaded_predict(
MedMat<float> &x, vector<float> &preds,
int nthreads)
const;
157 int learn(
const MedFeatures& features, vector<string>& names);
162 const string &general_params)
166 general_params, features);
169 const string &general_params,
const MedFeatures *features) {
173 throw logic_error(
"ERROR:: operation calc_feature_importance "
174 "isn't supported for " + model_name +
" yet.");
182 throw logic_error(
"ERROR:: operation calc_feature_contribs "
183 "isn't supported for " + model_name +
" yet.");
191 throw logic_error(
"ERROR:: operation calc_feature_contribs_conditional "
192 "isn't supported for " + model_name +
" yet.");
195 virtual void export_predictor(
const string &output_fname) {
199 throw logic_error(
"ERROR:: operation export_predictor "
200 "isn't supported for " + model_name +
" yet.");
218 vector<float> &min_range, vector<float> &max_range, vector<float> &map_prob,
int min_bucket_size = 10000,
219 float min_score_jump = 0.001,
float min_prob_jump = 0.005,
bool fix_prob_order =
false);
225 const vector<float> &max_range,
const vector<float> &map_prob, vector<float> &probs)
const;
241 template<
class T,
class L>
int convert_scores_to_prob(
const vector<T> &preds,
const vector<double> ¶ms, vector<L> &converted)
const;
246 static MedPredictor *make_predictor(
string model_type,
string params);
251 virtual void prepare_predict_single();
252 virtual void predict_single(
const vector<float> &x, vector<float> &preds)
const;
253 virtual void predict_single(
const vector<double> &x, vector<double> &preds)
const;
254 virtual void calc_feature_importance_shap(vector<float> &features_importance_scores,
string &importance_type,
const MedFeatures *features);
260 size_t get_predictor_size();
261 size_t predictor_serialize(
unsigned char *blob);
266 void prepare_x_mat(
MedMat<float> &x,
const vector<float> &wgts,
int &nsamples,
int &nftrs,
bool transpose_needed)
const;
267 void predict_thread(
void *p)
const;
288int KMeans(
float *x,
int nrows,
int ncols,
int K,
float *centers,
int *clusters,
float *dists);
293int KMeans(
float *x,
int nrows,
int ncols,
int K,
int max_iter,
float *centers,
int *clusters,
float *dists,
bool verbose_print =
true);
322 void learnInfraModel(
void *model,
const vector<vector<float>> &xTrain, vector<float> &y, vector<float> &weights);
324 vector<float>
predictInfraModel(
void *model,
const vector<vector<float>> &xTest);
327 mt19937 &generator, vector<float> &preds);
330 mt19937 &generator, vector<float> &preds);
339 const string &name1,
const string &name2,
const string &output_file,
340 const string &predictor_type =
"",
const string &predictor_init =
"",
int nfolds = 5,
int max_learn = 0);
Logger.h - allowing logs with more control.
int KMeans(MedMat< float > &x, int K, MedMat< float > ¢ers, vector< int > &clusters, MedMat< float > &dists)
K-Means: x is input matrix(each row is sample N*M).
Definition MedCluster.cpp:167
MedPredictorTypes predictor_name_to_type(const string &model_name)
Maping from model name in string to enum MedPredictorTypes.
Definition MedAlgo.cpp:69
unordered_map< int, string > predictor_type_to_name
Maping from predictor enum type MedPredictorTypes to model name in string.
Definition MedAlgo.cpp:40
int MedPCA_project(MedMat< float > &x, MedMat< float > &pca_base, int dim, MedMat< float > &projected)
returns the projection of the pca base on the first dim dimensions.
Definition MedCluster.cpp:227
MedPredictorTypes
Definition MedAlgo.h:44
@ MODEL_LINEAR_SGD
to_use:"linear_sgd" linear model using our customized SGD - creates MedLinearModel
Definition MedAlgo.h:60
@ MODEL_EXTERNAL_NN
to_use: "external_nn" , initialize a neural net using a layers file. creates MedExternalNN
Definition MedAlgo.h:64
@ MODEL_LASSO
to_use:"lasso" Lasso model - creates MedLasso
Definition MedAlgo.h:53
@ MODEL_BART
to_use:"bart" MedBART model using BART
Definition MedAlgo.h:63
@ MODEL_LINEAR_MODEL
to_use:"linear_model" Linear Model - creates MedLM
Definition MedAlgo.h:45
@ MODEL_QRF
to_use:"qrf" Q-Random-Forest - creates MedQRF
Definition MedAlgo.h:46
@ MODEL_GD_LINEAR
to_use:"gdlm" Gradient Descent/Full solution ridge - creates MedGDLM
Definition MedAlgo.h:50
@ MODEL_MULTI_CLASS
to_use:"multi_class" general one vs. all multi class extention - creates MedMultiClass
Definition MedAlgo.h:51
@ MODEL_TQRF
to_use:"tqrf" TQRF model - creates MedTQRF
Definition MedAlgo.h:62
@ MODEL_SIMPLE_ENSEMBLE
to_use: "simple_ensemble" , give 1 or more models to train, and ensemble them with given weights from...
Definition MedAlgo.h:65
@ MODEL_MIC_NET
to_use:"micNet" Home brew Neural Net implementation (Allows deep learning) - creates MedMicNet
Definition MedAlgo.h:54
@ MODEL_BOOSTER
to_use:"booster" general booster (meta algorithm) - creates MedBooster
Definition MedAlgo.h:55
@ MODEL_VW
to_use:"vw" VowpalWabbit yahoo reasearch library - creates MedVW
Definition MedAlgo.h:61
@ MODEL_LIGHTGBM
to_use:"lightgbm" the celebrated LightGBM algorithm - creates MedLightGBM
Definition MedAlgo.h:57
@ MODEL_KNN
to_use:"knn" K Nearest Neighbour - creates MedKNN
Definition MedAlgo.h:47
@ MODEL_BP
to_use:"BP" Neural Network Back Propagation - creates MedBP
Definition MedAlgo.h:48
@ MODEL_XGB
to_use:"xgb" XGBoost - creates MedXGB
Definition MedAlgo.h:52
@ MODEL_SPECIFIC_GROUPS_MODELS
to_use:"multi_models" spliting model by specific value (for example age-range) and train diffretn mod...
Definition MedAlgo.h:58
@ MODEL_MARS
to_use:"mars" Multivariate Adaptive Regression Splines - creates MedMars
Definition MedAlgo.h:49
@ MODEL_BY_MISSING_VALUES_SUBSET
to_use: "by_missing_value_subset", choosed MedPredictor on subset of the features based on missing va...
Definition MedAlgo.h:66
@ MODEL_DEEP_BIT
to_use:"deep_bit" Nir\'s DeepBit method - creates MedDeepBit
Definition MedAlgo.h:56
@ MODEL_SVM
to_use:"svm" Svm model - creates MedSvm
Definition MedAlgo.h:59
int MedPCA(MedMat< float > &x, MedMat< float > &pca_base, vector< float > &varsum)
given a matrix, returns the base PCA matrix and the cummulative relative variance explained by them.
Definition MedCluster.cpp:184
An Abstract class that can be serialized and written/read from file.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
Base Interface for predictor.
Definition MedAlgo.h:78
int learn(MedMat< float > &x, vector< float > &y)
MedMat x, vector y: will transpose normalize x if needed (y assumed to be normalized)
Definition MedAlgo.h:144
int learn(float *x, float *y, int nsamples, int nftrs)
simple no weights call
Definition MedAlgo.h:130
int convert_scores_to_prob(const vector< float > &preds, const vector< float > &min_range, const vector< float > &max_range, const vector< float > &map_prob, vector< float > &probs) const
If you have ran learn_prob_calibration before, you have min_range,max_range,map_prob from This functi...
Definition MedAlgo.cpp:659
int learn(MedMat< float > &x, MedMat< float > &y)
MedMat x,y : will transpose/normalize x,y if needed by algorithm The convention is that untransposed ...
Definition MedAlgo.h:139
virtual int Learn(float *x, float *y, const float *w, int n_samples, int n_ftrs)
Learn should be implemented for each model.
Definition MedAlgo.h:111
virtual int Predict(float *x, float *&preds, int n_samples, int n_ftrs) const
Predict should be implemented for each model.
Definition MedAlgo.h:116
bool normalize_for_learn
True if need to normalize before learn.
Definition MedAlgo.h:87
int learn(vector< float > &x, vector< float > &y, int n_samples, int n_ftrs)
vector x,y: transpose/normalizations not done.
Definition MedAlgo.h:149
bool transpose_for_predict
True if need to transpose before predict.
Definition MedAlgo.h:90
bool normalize_for_predict
True if need to normalize before predict.
Definition MedAlgo.h:91
int features_count
The model features count used in Learn, to validate when caling predict.
Definition MedAlgo.h:96
bool normalize_y_for_learn
True if need to normalize labels before learn.
Definition MedAlgo.h:88
MedPredictorTypes classifier_type
The Predicotr enum type.
Definition MedAlgo.h:80
vector< string > model_features
The model features used in Learn, to validate when caling predict.
Definition MedAlgo.h:93
virtual int n_preds_per_sample() const
Number of predictions per sample. typically 1 - but some models return several per sample (for exampl...
Definition MedAlgo.h:122
bool transpose_for_learn
True if need to transpose before learn.
Definition MedAlgo.h:86
virtual bool predict_single_not_implemented()
Prepartion function for fast prediction on single item each time.
Definition MedAlgo.h:250
int learn_prob_calibration(MedMat< float > &x, vector< float > &y, vector< float > &min_range, vector< float > &max_range, vector< float > &map_prob, int min_bucket_size=10000, float min_score_jump=0.001, float min_prob_jump=0.005, bool fix_prob_order=false)
calibration for probability using training data
Definition MedAlgo.cpp:567
virtual void calc_feature_importance(vector< float > &features_importance_scores, const string &general_params)
Feature Importance - assume called after learn.
Definition MedAlgo.h:161
void * new_polymorphic(string derived_class_name)
for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived clas...
Definition MedAlgo.cpp:92
virtual void calc_feature_contribs(MedMat< float > &x, MedMat< float > &contribs)
Feature contributions explains the prediction on each sample (aka BUT_WHY)
Definition MedAlgo.h:178
Definition SerializableObject.h:32