9#include <MedUtils/MedUtils/MedUtils.h>
10#include <MedStat/MedStat/MedStat.h>
11#include <MedFeat/MedFeat/MedFeat.h>
12#include <QRF/QRF/QRF.h>
13#include <micNet/micNet/micNet.h>
16#include <MedProcessTools/MedProcessTools/MedProcessUtils.h>
18#include <TQRF/TQRF/TQRF.h>
20#include <unordered_map>
28#pragma warning(disable: 4297)
95 virtual int init(
void *classifier_params) {
return 0; };
96 int init_from_string(
string initialization_text);
97 virtual int init(map<string, string>& mapper);
98 virtual int set_params(map<string, string>& mapper) { fprintf(stderr,
"????? Using the base class set_params() ?????\n"); fflush(stderr);
return 0; };
99 virtual void init_defaults() {};
105 virtual int Learn(
float *x,
float *y,
const float *w,
int n_samples,
int n_ftrs) {
return 0; };
110 virtual int Predict(
float *x,
float *&preds,
int n_samples,
int n_ftrs)
const {
return 0; }
113 virtual void print(FILE *fp,
const string& prefix,
int level = 0)
const;
118 virtual int denormalize_model(
float *f_avg,
float *f_std,
float label_avg,
float label_std) {
return 0; };
124 int learn(
float *x,
float *y,
int nsamples,
int nftrs) {
return Learn(x, y, NULL, nsamples, nftrs); }
136 int learn(
MedMat<float> &x, vector<float> &y,
const vector<float> &wgts);
141 int learn(vector<float> &x, vector<float> &y,
const vector<float> &wgts,
int n_samples,
int n_ftrs);
143 int learn(vector<float> &x, vector<float> &y,
int n_samples,
int n_ftrs) { vector<float> w;
return learn(x, y, w, n_samples, n_ftrs); }
146 virtual int predict(
MedMat<float> &x, vector<float> &preds)
const;
147 int predict(vector<float> &x, vector<float> &preds,
int n_samples,
int n_ftrs)
const;
148 int threaded_predict(
MedMat<float> &x, vector<float> &preds,
int nthreads)
const;
151 int learn(
const MedFeatures& features, vector<string>& names);
156 const string &general_params)
160 general_params, features);
163 const string &general_params,
const MedFeatures *features) {
167 throw logic_error(
"ERROR:: operation calc_feature_importance "
168 "isn't supported for " + model_name +
" yet.");
176 throw logic_error(
"ERROR:: operation calc_feature_contribs "
177 "isn't supported for " + model_name +
" yet.");
185 throw logic_error(
"ERROR:: operation calc_feature_contribs_conditional "
186 "isn't supported for " + model_name +
" yet.");
189 virtual void export_predictor(
const string &output_fname) {
193 throw logic_error(
"ERROR:: operation export_predictor "
194 "isn't supported for " + model_name +
" yet.");
212 vector<float> &min_range, vector<float> &max_range, vector<float> &map_prob,
int min_bucket_size = 10000,
213 float min_score_jump = 0.001,
float min_prob_jump = 0.005,
bool fix_prob_order =
false);
219 const vector<float> &max_range,
const vector<float> &map_prob, vector<float> &probs)
const;
235 template<
class T,
class L>
int convert_scores_to_prob(
const vector<T> &preds,
const vector<double> ¶ms, vector<L> &converted)
const;
240 static MedPredictor *make_predictor(
string model_type,
string params);
245 virtual void prepare_predict_single();
246 virtual void predict_single(
const vector<float> &x, vector<float> &preds)
const;
247 virtual void predict_single(
const vector<double> &x, vector<double> &preds)
const;
248 virtual void calc_feature_importance_shap(vector<float> &features_importance_scores,
string &importance_type,
const MedFeatures *features);
254 size_t get_predictor_size();
255 size_t predictor_serialize(
unsigned char *blob);
260 void prepare_x_mat(
MedMat<float> &x,
const vector<float> &wgts,
int &nsamples,
int &nftrs,
bool transpose_needed)
const;
261 void predict_thread(
void *p)
const;
282int KMeans(
float *x,
int nrows,
int ncols,
int K,
float *centers,
int *clusters,
float *dists);
287int KMeans(
float *x,
int nrows,
int ncols,
int K,
int max_iter,
float *centers,
int *clusters,
float *dists,
bool verbose_print =
true);
316 void learnInfraModel(
void *model,
const vector<vector<float>> &xTrain, vector<float> &y, vector<float> &weights);
318 vector<float>
predictInfraModel(
void *model,
const vector<vector<float>> &xTest);
321 mt19937 &generator, vector<float> &preds);
324 mt19937 &generator, vector<float> &preds);
333 const string &name1,
const string &name2,
const string &output_file,
334 const string &predictor_type =
"",
const string &predictor_init =
"",
int nfolds = 5,
int max_learn = 0);
Logger.h - allowing logs with more control.
int KMeans(MedMat< float > &x, int K, MedMat< float > ¢ers, vector< int > &clusters, MedMat< float > &dists)
K-Means: x is input matrix(each row is sample N*M).
Definition MedCluster.cpp:167
MedPredictorTypes predictor_name_to_type(const string &model_name)
Maping from model name in string to enum MedPredictorTypes.
Definition MedAlgo.cpp:65
unordered_map< int, string > predictor_type_to_name
Maping from predictor enum type MedPredictorTypes to model name in string.
Definition MedAlgo.cpp:36
int MedPCA_project(MedMat< float > &x, MedMat< float > &pca_base, int dim, MedMat< float > &projected)
returns the projection of the pca base on the first dim dimensions.
Definition MedCluster.cpp:227
MedPredictorTypes
Definition MedAlgo.h:38
@ MODEL_LINEAR_SGD
to_use:"linear_sgd" linear model using our customized SGD - creates MedLinearModel
Definition MedAlgo.h:54
@ MODEL_EXTERNAL_NN
to_use: "external_nn" , initialize a neural net using a layers file. creates MedExternalNN
Definition MedAlgo.h:58
@ MODEL_LASSO
to_use:"lasso" Lasso model - creates MedLasso
Definition MedAlgo.h:47
@ MODEL_BART
to_use:"bart" MedBART model using BART
Definition MedAlgo.h:57
@ MODEL_LINEAR_MODEL
to_use:"linear_model" Linear Model - creates MedLM
Definition MedAlgo.h:39
@ MODEL_QRF
to_use:"qrf" Q-Random-Forest - creates MedQRF
Definition MedAlgo.h:40
@ MODEL_GD_LINEAR
to_use:"gdlm" Gradient Descent/Full solution ridge - creates MedGDLM
Definition MedAlgo.h:44
@ MODEL_MULTI_CLASS
to_use:"multi_class" general one vs. all multi class extention - creates MedMultiClass
Definition MedAlgo.h:45
@ MODEL_TQRF
to_use:"tqrf" TQRF model - creates MedTQRF
Definition MedAlgo.h:56
@ MODEL_SIMPLE_ENSEMBLE
to_use: "simple_ensemble" , give 1 or more models to train, and ensemble them with given weights from...
Definition MedAlgo.h:59
@ MODEL_MIC_NET
to_use:"micNet" Home brew Neural Net implementation (Allows deep learning) - creates MedMicNet
Definition MedAlgo.h:48
@ MODEL_BOOSTER
to_use:"booster" general booster (meta algorithm) - creates MedBooster
Definition MedAlgo.h:49
@ MODEL_VW
to_use:"vw" VowpalWabbit yahoo reasearch library - creates MedVW
Definition MedAlgo.h:55
@ MODEL_LIGHTGBM
to_use:"lightgbm" the celebrated LightGBM algorithm - creates MedLightGBM
Definition MedAlgo.h:51
@ MODEL_KNN
to_use:"knn" K Nearest Neighbour - creates MedKNN
Definition MedAlgo.h:41
@ MODEL_BP
to_use:"BP" Neural Network Back Propagation - creates MedBP
Definition MedAlgo.h:42
@ MODEL_XGB
to_use:"xgb" XGBoost - creates MedXGB
Definition MedAlgo.h:46
@ MODEL_SPECIFIC_GROUPS_MODELS
to_use:"multi_models" spliting model by specific value (for example age-range) and train diffretn mod...
Definition MedAlgo.h:52
@ MODEL_MARS
to_use:"mars" Multivariate Adaptive Regression Splines - creates MedMars
Definition MedAlgo.h:43
@ MODEL_BY_MISSING_VALUES_SUBSET
to_use: "by_missing_value_subset", choosed MedPredictor on subset of the features based on missing va...
Definition MedAlgo.h:60
@ MODEL_DEEP_BIT
to_use:"deep_bit" Nir\'s DeepBit method - creates MedDeepBit
Definition MedAlgo.h:50
@ MODEL_SVM
to_use:"svm" Svm model - creates MedSvm
Definition MedAlgo.h:53
int MedPCA(MedMat< float > &x, MedMat< float > &pca_base, vector< float > &varsum)
given a matrix, returns the base PCA matrix and the cummulative relative variance explained by them.
Definition MedCluster.cpp:184
An Abstract class that can be serialized and written/read from file.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:121
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:107
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
Base Interface for predictor.
Definition MedAlgo.h:72
int learn(MedMat< float > &x, vector< float > &y)
MedMat x, vector y: will transpose normalize x if needed (y assumed to be normalized)
Definition MedAlgo.h:138
int learn(float *x, float *y, int nsamples, int nftrs)
simple no weights call
Definition MedAlgo.h:124
int convert_scores_to_prob(const vector< float > &preds, const vector< float > &min_range, const vector< float > &max_range, const vector< float > &map_prob, vector< float > &probs) const
If you have ran learn_prob_calibration before, you have min_range,max_range,map_prob from This functi...
Definition MedAlgo.cpp:641
int learn(MedMat< float > &x, MedMat< float > &y)
MedMat x,y : will transpose/normalize x,y if needed by algorithm The convention is that untransposed ...
Definition MedAlgo.h:133
virtual int Learn(float *x, float *y, const float *w, int n_samples, int n_ftrs)
Learn should be implemented for each model.
Definition MedAlgo.h:105
virtual int Predict(float *x, float *&preds, int n_samples, int n_ftrs) const
Predict should be implemented for each model.
Definition MedAlgo.h:110
bool normalize_for_learn
True if need to normalize before learn.
Definition MedAlgo.h:81
int learn(vector< float > &x, vector< float > &y, int n_samples, int n_ftrs)
vector x,y: transpose/normalizations not done.
Definition MedAlgo.h:143
bool transpose_for_predict
True if need to transpose before predict.
Definition MedAlgo.h:84
bool normalize_for_predict
True if need to normalize before predict.
Definition MedAlgo.h:85
int features_count
The model features count used in Learn, to validate when caling predict.
Definition MedAlgo.h:90
bool normalize_y_for_learn
True if need to normalize labels before learn.
Definition MedAlgo.h:82
MedPredictorTypes classifier_type
The Predicotr enum type.
Definition MedAlgo.h:74
vector< string > model_features
The model features used in Learn, to validate when caling predict.
Definition MedAlgo.h:87
virtual int n_preds_per_sample() const
Number of predictions per sample. typically 1 - but some models return several per sample (for exampl...
Definition MedAlgo.h:116
bool transpose_for_learn
True if need to transpose before learn.
Definition MedAlgo.h:80
virtual bool predict_single_not_implemented()
Prepartion function for fast prediction on single item each time.
Definition MedAlgo.h:244
int learn_prob_calibration(MedMat< float > &x, vector< float > &y, vector< float > &min_range, vector< float > &max_range, vector< float > &map_prob, int min_bucket_size=10000, float min_score_jump=0.001, float min_prob_jump=0.005, bool fix_prob_order=false)
calibration for probability using training data
Definition MedAlgo.cpp:549
virtual void calc_feature_importance(vector< float > &features_importance_scores, const string &general_params)
Feature Importance - assume called after learn.
Definition MedAlgo.h:155
void * new_polymorphic(string derived_class_name)
for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived clas...
Definition MedAlgo.cpp:88
virtual void calc_feature_contribs(MedMat< float > &x, MedMat< float > &contribs)
Feature contributions explains the prediction on each sample (aka BUT_WHY)
Definition MedAlgo.h:172
Definition SerializableObject.h:31