Medial Code Documentation
Loading...
Searching...
No Matches
MedAlgo.h
Go to the documentation of this file.
1
4
5#ifndef __MED_ALGO_H__
6#define __MED_ALGO_H__
7
9#include <MedUtils/MedUtils/MedUtils.h>
10#include <MedStat/MedStat/MedStat.h>
11#include <MedFeat/MedFeat/MedFeat.h>
12#include <QRF/QRF/QRF.h>
13#include <micNet/micNet/micNet.h>
14#include <string.h>
15#include <limits.h>
16#include <MedProcessTools/MedProcessTools/MedProcessUtils.h>
18#include <TQRF/TQRF/TQRF.h>
19#include "svm.h"
20#include <unordered_map>
21#include <random>
22#include <map>
23#include <string>
24
25// Forward Declaration
26class MedFeatures;
27
28#pragma warning(disable: 4297) //disable annoying " function assumed not to throw an exception but does "
29
30using namespace std;
31
32//================================================================================
33// MedPredictor - wrapper for classical learn/predict algorithms
34//================================================================================
35
63
65extern unordered_map<int, string> predictor_type_to_name;
67MedPredictorTypes predictor_name_to_type(const string& model_name);
68
73public:
75
76 // General constructor
77 MedPredictor() {}
78 virtual ~MedPredictor() {};
79
83
86
87 vector<string> model_features;
91
92 // Each wrapped algorithm needs to implement the following:
93 //.........................................................
94 // Init
95 virtual int init(void *classifier_params) { return 0; };
96 int init_from_string(string initialization_text);
97 virtual int init(map<string, string>& mapper);
98 virtual int set_params(map<string, string>& mapper) { fprintf(stderr, "????? Using the base class set_params() ?????\n"); fflush(stderr); return 0; };
99 virtual void init_defaults() {};
100
101
105 virtual int Learn(float *x, float *y, const float *w, int n_samples, int n_ftrs) { return 0; };
106
110 virtual int Predict(float *x, float *&preds, int n_samples, int n_ftrs) const { return 0; }
111
112 // Print
113 virtual void print(FILE *fp, const string& prefix, int level = 0) const;
114
116 virtual int n_preds_per_sample() const { return 1; };
117
118 virtual int denormalize_model(float *f_avg, float *f_std, float label_avg, float label_std) { return 0; };
119
120 // methods relying on virtual methods, and applicable to all predictors: (one can still reimplement in derived class if needed)
121 //..............................................................................................................................
122
124 int learn(float *x, float *y, int nsamples, int nftrs) { return Learn(x, y, NULL, nsamples, nftrs); }
125
126 // simple c++ style learn
127
130 virtual int learn(MedMat<float> &x, MedMat<float> &y, const vector<float> &wgts);
133 int learn(MedMat<float> &x, MedMat<float> &y) { vector<float> w; return(learn(x, y, w)); }
134
136 int learn(MedMat<float> &x, vector<float> &y, const vector<float> &wgts);
138 int learn(MedMat<float> &x, vector<float> &y) { vector<float> w; return(learn(x, y, w)); }
139
141 int learn(vector<float> &x, vector<float> &y, const vector<float> &wgts, int n_samples, int n_ftrs);
143 int learn(vector<float> &x, vector<float> &y, int n_samples, int n_ftrs) { vector<float> w; return learn(x, y, w, n_samples, n_ftrs); }
144
145 // simple c++ style predict
146 virtual int predict(MedMat<float> &x, vector<float> &preds) const;
147 int predict(vector<float> &x, vector<float> &preds, int n_samples, int n_ftrs) const;
148 int threaded_predict(MedMat<float> &x, vector<float> &preds, int nthreads) const;
149
150 int learn(const MedFeatures& features);
151 int learn(const MedFeatures& features, vector<string>& names);
152 virtual int predict(MedFeatures& features) const;
153
155 virtual void calc_feature_importance(vector<float> &features_importance_scores,
156 const string &general_params)
157 {
158 const MedFeatures *features = NULL;
159 calc_feature_importance(features_importance_scores,
160 general_params, features);
161 }
162 virtual void calc_feature_importance(vector<float> &features_importance_scores,
163 const string &general_params, const MedFeatures *features) {
164 string model_name = "model_id=" + to_string(classifier_type);
167 throw logic_error("ERROR:: operation calc_feature_importance "
168 "isn't supported for " + model_name + " yet.");
169 };
170
173 string model_name = "model_id=" + to_string(classifier_type);
176 throw logic_error("ERROR:: operation calc_feature_contribs "
177 "isn't supported for " + model_name + " yet.");
178 };
179
180 virtual void calc_feature_contribs_conditional(MedMat<float> &mat_x_in, unordered_map<string, float> &contiditional_variables, MedMat<float> &mat_x_out, MedMat<float> &mat_contribs)
181 {
182 string model_name = "model_id=" + to_string(classifier_type);
185 throw logic_error("ERROR:: operation calc_feature_contribs_conditional "
186 "isn't supported for " + model_name + " yet.");
187 }
188
189 virtual void export_predictor(const string &output_fname) {
190 string model_name = "model_id=" + to_string(classifier_type);
193 throw logic_error("ERROR:: operation export_predictor "
194 "isn't supported for " + model_name + " yet.");
195 }
196
211 int learn_prob_calibration(MedMat<float> &x, vector<float> &y,
212 vector<float> &min_range, vector<float> &max_range, vector<float> &map_prob, int min_bucket_size = 10000,
213 float min_score_jump = 0.001, float min_prob_jump = 0.005, bool fix_prob_order = false);
218 int convert_scores_to_prob(const vector<float> &preds, const vector<float> &min_range,
219 const vector<float> &max_range, const vector<float> &map_prob, vector<float> &probs) const;
231 int learn_prob_calibration(MedMat<float> &x, vector<float> &y, int poly_rank, vector<double> &params, int min_bucket_size = 10000, float min_score_jump = 0.001);
235 template<class T, class L> int convert_scores_to_prob(const vector<T> &preds, const vector<double> &params, vector<L> &converted) const;
236
237 // init
238 static MedPredictor *make_predictor(string model_type);
239 static MedPredictor *make_predictor(MedPredictorTypes model_type);
240 static MedPredictor *make_predictor(string model_type, string params);
241 static MedPredictor *make_predictor(MedPredictorTypes model_type, string params);
242
244 virtual bool predict_single_not_implemented() { return false; }
245 virtual void prepare_predict_single();
246 virtual void predict_single(const vector<float> &x, vector<float> &preds) const;
247 virtual void predict_single(const vector<double> &x, vector<double> &preds) const;
248 virtual void calc_feature_importance_shap(vector<float> &features_importance_scores, string &importance_type, const MedFeatures *features);
249
250 // (De)Serialize
251 ADD_CLASS_NAME(MedPredictor)
253 void *new_polymorphic(string derived_class_name);
254 size_t get_predictor_size();
255 size_t predictor_serialize(unsigned char *blob);
256
257
258protected:
259 // some needed helpers
260 void prepare_x_mat(MedMat<float> &x, const vector<float> &wgts, int &nsamples, int &nftrs, bool transpose_needed) const;
261 void predict_thread(void *p) const;
262
263};
264
265
266
267//================================================================
268// Unsupervised
269//================================================================
270
274int KMeans(MedMat<float> &x, int K, MedMat<float> &centers, vector<int> &clusters, MedMat<float> &dists);
278int KMeans(MedMat<float> &x, int K, int max_iter, MedMat<float> &centers, vector<int> &clusters, MedMat<float> &dists);
282int KMeans(float *x, int nrows, int ncols, int K, float *centers, int *clusters, float *dists);
283
287int KMeans(float *x, int nrows, int ncols, int K, int max_iter, float *centers, int *clusters, float *dists, bool verbose_print = true); // actual implemetation routine
288
289// PCA
290
293int MedPCA(MedMat<float> &x, MedMat<float> &pca_base, vector<float> &varsum);
294
296int MedPCA_project(MedMat<float> &x, MedMat<float> &pca_base, int dim, MedMat<float> &projected);
297
298
299//=========================================================================================
300
304namespace medial {
308 namespace models {
310 string getParamsInfraModel(void *model);
312 void *copyInfraModel(void *model, bool delete_old = true);
314 void initInfraModel(void *&model);
316 void learnInfraModel(void *model, const vector<vector<float>> &xTrain, vector<float> &y, vector<float> &weights);
318 vector<float> predictInfraModel(void *model, const vector<vector<float>> &xTest);
320 void get_pids_cv(MedPredictor *pred, MedFeatures &matrix, int nFolds,
321 mt19937 &generator, vector<float> &preds);
323 void get_cv(MedPredictor *pred, MedFeatures &matrix, int nFolds,
324 mt19937 &generator, vector<float> &preds);
325 }
329 namespace process {
332 void compare_populations(const MedFeatures &population1, const MedFeatures &population2,
333 const string &name1, const string &name2, const string &output_file,
334 const string &predictor_type = "", const string &predictor_init = "", int nfolds = 5, int max_learn = 0);
335 }
336}
337
338
339//=================================================================
340// Joining the MedSerialize Wagon
341//=================================================================
343
344#endif
Logger.h - allowing logs with more control.
int KMeans(MedMat< float > &x, int K, MedMat< float > &centers, vector< int > &clusters, MedMat< float > &dists)
K-Means: x is input matrix(each row is sample N*M).
Definition MedCluster.cpp:167
MedPredictorTypes predictor_name_to_type(const string &model_name)
Maping from model name in string to enum MedPredictorTypes.
Definition MedAlgo.cpp:65
unordered_map< int, string > predictor_type_to_name
Maping from predictor enum type MedPredictorTypes to model name in string.
Definition MedAlgo.cpp:36
int MedPCA_project(MedMat< float > &x, MedMat< float > &pca_base, int dim, MedMat< float > &projected)
returns the projection of the pca base on the first dim dimensions.
Definition MedCluster.cpp:227
MedPredictorTypes
Definition MedAlgo.h:38
@ MODEL_LINEAR_SGD
to_use:"linear_sgd" linear model using our customized SGD - creates MedLinearModel
Definition MedAlgo.h:54
@ MODEL_EXTERNAL_NN
to_use: "external_nn" , initialize a neural net using a layers file. creates MedExternalNN
Definition MedAlgo.h:58
@ MODEL_LASSO
to_use:"lasso" Lasso model - creates MedLasso
Definition MedAlgo.h:47
@ MODEL_BART
to_use:"bart" MedBART model using BART
Definition MedAlgo.h:57
@ MODEL_LINEAR_MODEL
to_use:"linear_model" Linear Model - creates MedLM
Definition MedAlgo.h:39
@ MODEL_QRF
to_use:"qrf" Q-Random-Forest - creates MedQRF
Definition MedAlgo.h:40
@ MODEL_GD_LINEAR
to_use:"gdlm" Gradient Descent/Full solution ridge - creates MedGDLM
Definition MedAlgo.h:44
@ MODEL_MULTI_CLASS
to_use:"multi_class" general one vs. all multi class extention - creates MedMultiClass
Definition MedAlgo.h:45
@ MODEL_TQRF
to_use:"tqrf" TQRF model - creates MedTQRF
Definition MedAlgo.h:56
@ MODEL_SIMPLE_ENSEMBLE
to_use: "simple_ensemble" , give 1 or more models to train, and ensemble them with given weights from...
Definition MedAlgo.h:59
@ MODEL_MIC_NET
to_use:"micNet" Home brew Neural Net implementation (Allows deep learning) - creates MedMicNet
Definition MedAlgo.h:48
@ MODEL_BOOSTER
to_use:"booster" general booster (meta algorithm) - creates MedBooster
Definition MedAlgo.h:49
@ MODEL_VW
to_use:"vw" VowpalWabbit yahoo reasearch library - creates MedVW
Definition MedAlgo.h:55
@ MODEL_LIGHTGBM
to_use:"lightgbm" the celebrated LightGBM algorithm - creates MedLightGBM
Definition MedAlgo.h:51
@ MODEL_KNN
to_use:"knn" K Nearest Neighbour - creates MedKNN
Definition MedAlgo.h:41
@ MODEL_BP
to_use:"BP" Neural Network Back Propagation - creates MedBP
Definition MedAlgo.h:42
@ MODEL_XGB
to_use:"xgb" XGBoost - creates MedXGB
Definition MedAlgo.h:46
@ MODEL_SPECIFIC_GROUPS_MODELS
to_use:"multi_models" spliting model by specific value (for example age-range) and train diffretn mod...
Definition MedAlgo.h:52
@ MODEL_MARS
to_use:"mars" Multivariate Adaptive Regression Splines - creates MedMars
Definition MedAlgo.h:43
@ MODEL_BY_MISSING_VALUES_SUBSET
to_use: "by_missing_value_subset", choosed MedPredictor on subset of the features based on missing va...
Definition MedAlgo.h:60
@ MODEL_DEEP_BIT
to_use:"deep_bit" Nir\'s DeepBit method - creates MedDeepBit
Definition MedAlgo.h:50
@ MODEL_SVM
to_use:"svm" Svm model - creates MedSvm
Definition MedAlgo.h:53
int MedPCA(MedMat< float > &x, MedMat< float > &pca_base, vector< float > &varsum)
given a matrix, returns the base PCA matrix and the cummulative relative variance explained by them.
Definition MedCluster.cpp:184
An Abstract class that can be serialized and written/read from file.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:121
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:107
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
Definition MedMat.h:63
Base Interface for predictor.
Definition MedAlgo.h:72
int learn(MedMat< float > &x, vector< float > &y)
MedMat x, vector y: will transpose normalize x if needed (y assumed to be normalized)
Definition MedAlgo.h:138
int learn(float *x, float *y, int nsamples, int nftrs)
simple no weights call
Definition MedAlgo.h:124
int convert_scores_to_prob(const vector< float > &preds, const vector< float > &min_range, const vector< float > &max_range, const vector< float > &map_prob, vector< float > &probs) const
If you have ran learn_prob_calibration before, you have min_range,max_range,map_prob from This functi...
Definition MedAlgo.cpp:641
int learn(MedMat< float > &x, MedMat< float > &y)
MedMat x,y : will transpose/normalize x,y if needed by algorithm The convention is that untransposed ...
Definition MedAlgo.h:133
virtual int Learn(float *x, float *y, const float *w, int n_samples, int n_ftrs)
Learn should be implemented for each model.
Definition MedAlgo.h:105
virtual int Predict(float *x, float *&preds, int n_samples, int n_ftrs) const
Predict should be implemented for each model.
Definition MedAlgo.h:110
bool normalize_for_learn
True if need to normalize before learn.
Definition MedAlgo.h:81
int learn(vector< float > &x, vector< float > &y, int n_samples, int n_ftrs)
vector x,y: transpose/normalizations not done.
Definition MedAlgo.h:143
bool transpose_for_predict
True if need to transpose before predict.
Definition MedAlgo.h:84
bool normalize_for_predict
True if need to normalize before predict.
Definition MedAlgo.h:85
int features_count
The model features count used in Learn, to validate when caling predict.
Definition MedAlgo.h:90
bool normalize_y_for_learn
True if need to normalize labels before learn.
Definition MedAlgo.h:82
MedPredictorTypes classifier_type
The Predicotr enum type.
Definition MedAlgo.h:74
vector< string > model_features
The model features used in Learn, to validate when caling predict.
Definition MedAlgo.h:87
virtual int n_preds_per_sample() const
Number of predictions per sample. typically 1 - but some models return several per sample (for exampl...
Definition MedAlgo.h:116
bool transpose_for_learn
True if need to transpose before learn.
Definition MedAlgo.h:80
virtual bool predict_single_not_implemented()
Prepartion function for fast prediction on single item each time.
Definition MedAlgo.h:244
int learn_prob_calibration(MedMat< float > &x, vector< float > &y, vector< float > &min_range, vector< float > &max_range, vector< float > &map_prob, int min_bucket_size=10000, float min_score_jump=0.001, float min_prob_jump=0.005, bool fix_prob_order=false)
calibration for probability using training data
Definition MedAlgo.cpp:549
virtual void calc_feature_importance(vector< float > &features_importance_scores, const string &general_params)
Feature Importance - assume called after learn.
Definition MedAlgo.h:155
void * new_polymorphic(string derived_class_name)
for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived clas...
Definition MedAlgo.cpp:88
virtual void calc_feature_contribs(MedMat< float > &x, MedMat< float > &contribs)
Feature contributions explains the prediction on each sample (aka BUT_WHY)
Definition MedAlgo.h:172
Definition SerializableObject.h:31
vector< float > predictInfraModel(void *model, const vector< vector< float > > &xTest)
run predict on the MedPredictor - wrapper api
Definition MedAlgo.cpp:1092
string getParamsInfraModel(void *model)
returns string to create model with init_string. void * is MedPredictor
Definition MedAlgo.cpp:918
void get_pids_cv(MedPredictor *pred, MedFeatures &matrix, int nFolds, mt19937 &generator, vector< float > &preds)
run cross validation where each pid is in diffrent fold and saves the preds.
Definition MedAlgo.cpp:1103
void learnInfraModel(void *model, const vector< vector< float > > &xTrain, vector< float > &y, vector< float > &weights)
run Learn on the MedPredictor - wrapper api
Definition MedAlgo.cpp:1083
void initInfraModel(void *&model)
initialize model which is MedPredictor by copying it's parameters to new address and freeing old one
Definition MedAlgo.cpp:1077
void * copyInfraModel(void *model, bool delete_old=true)
returns MedPredictor *, a clone copy of given model (params without learned data)....
Definition MedAlgo.cpp:1005
void get_cv(MedPredictor *pred, MedFeatures &matrix, int nFolds, mt19937 &generator, vector< float > &preds)
run cross validation where each samples can be in diffrent fold and saves the preds.
Definition MedAlgo.cpp:1163
void compare_populations(const MedFeatures &population1, const MedFeatures &population2, const string &name1, const string &name2, const string &output_file, const string &predictor_type="", const string &predictor_init="", int nfolds=5, int max_learn=0)
compares two matrixes populations.
Definition MedAlgo.cpp:1243
medial namespace for function
Definition InfraMed.h:667
Definition BFloat16.h:88