Medial Code Documentation
Loading...
Searching...
No Matches
MedAlgo.h
Go to the documentation of this file.
1
4
5#ifndef __MED_ALGO_H__
6#define __MED_ALGO_H__
7
8#if __GNUC__ >= 5 || (defined(_MSC_VER) && !defined(_DEBUG))
9#define NEW_COMPLIER false
10#else
11#define NEW_COMPLIER false
12#endif
13
15#include <MedUtils/MedUtils/MedUtils.h>
16#include <MedStat/MedStat/MedStat.h>
17#include <MedFeat/MedFeat/MedFeat.h>
18#include <QRF/QRF/QRF.h>
19#include <micNet/micNet/micNet.h>
20#include <string.h>
21#include <limits.h>
22#include <MedProcessTools/MedProcessTools/MedProcessUtils.h>
24#include <TQRF/TQRF/TQRF.h>
25#include "svm.h"
26#include <unordered_map>
27#include <random>
28#include <map>
29#include <string>
30
31// Forward Declaration
32class MedFeatures;
33
34#pragma warning(disable: 4297) //disable annoying " function assumed not to throw an exception but does "
35
36using namespace std;
37
38//================================================================================
39// MedPredictor - wrapper for classical learn/predict algorithms
40//================================================================================
41
69
71extern unordered_map<int, string> predictor_type_to_name;
73MedPredictorTypes predictor_name_to_type(const string& model_name);
74
79public:
81
82 // General constructor
83 MedPredictor() {}
84 virtual ~MedPredictor() {};
85
89
92
93 vector<string> model_features;
97
98 // Each wrapped algorithm needs to implement the following:
99 //.........................................................
100 // Init
101 virtual int init(void *classifier_params) { return 0; };
102 int init_from_string(string initialization_text);
103 virtual int init(map<string, string>& mapper);
104 virtual int set_params(map<string, string>& mapper) { fprintf(stderr, "????? Using the base class set_params() ?????\n"); fflush(stderr); return 0; };
105 virtual void init_defaults() {};
106
107
111 virtual int Learn(float *x, float *y, const float *w, int n_samples, int n_ftrs) { return 0; };
112
116 virtual int Predict(float *x, float *&preds, int n_samples, int n_ftrs) const { return 0; }
117
118 // Print
119 virtual void print(FILE *fp, const string& prefix, int level = 0) const;
120
122 virtual int n_preds_per_sample() const { return 1; };
123
124 virtual int denormalize_model(float *f_avg, float *f_std, float label_avg, float label_std) { return 0; };
125
126 // methods relying on virtual methods, and applicable to all predictors: (one can still reimplement in derived class if needed)
127 //..............................................................................................................................
128
130 int learn(float *x, float *y, int nsamples, int nftrs) { return Learn(x, y, NULL, nsamples, nftrs); }
131
132 // simple c++ style learn
133
136 virtual int learn(MedMat<float> &x, MedMat<float> &y, const vector<float> &wgts);
139 int learn(MedMat<float> &x, MedMat<float> &y) { vector<float> w; return(learn(x, y, w)); }
140
142 int learn(MedMat<float> &x, vector<float> &y, const vector<float> &wgts);
144 int learn(MedMat<float> &x, vector<float> &y) { vector<float> w; return(learn(x, y, w)); }
145
147 int learn(vector<float> &x, vector<float> &y, const vector<float> &wgts, int n_samples, int n_ftrs);
149 int learn(vector<float> &x, vector<float> &y, int n_samples, int n_ftrs) { vector<float> w; return learn(x, y, w, n_samples, n_ftrs); }
150
151 // simple c++ style predict
152 virtual int predict(MedMat<float> &x, vector<float> &preds) const;
153 int predict(vector<float> &x, vector<float> &preds, int n_samples, int n_ftrs) const;
154 int threaded_predict(MedMat<float> &x, vector<float> &preds, int nthreads) const;
155
156 int learn(const MedFeatures& features);
157 int learn(const MedFeatures& features, vector<string>& names);
158 virtual int predict(MedFeatures& features) const;
159
161 virtual void calc_feature_importance(vector<float> &features_importance_scores,
162 const string &general_params)
163 {
164 const MedFeatures *features = NULL;
165 calc_feature_importance(features_importance_scores,
166 general_params, features);
167 }
168 virtual void calc_feature_importance(vector<float> &features_importance_scores,
169 const string &general_params, const MedFeatures *features) {
170 string model_name = "model_id=" + to_string(classifier_type);
173 throw logic_error("ERROR:: operation calc_feature_importance "
174 "isn't supported for " + model_name + " yet.");
175 };
176
179 string model_name = "model_id=" + to_string(classifier_type);
182 throw logic_error("ERROR:: operation calc_feature_contribs "
183 "isn't supported for " + model_name + " yet.");
184 };
185
186 virtual void calc_feature_contribs_conditional(MedMat<float> &mat_x_in, unordered_map<string, float> &contiditional_variables, MedMat<float> &mat_x_out, MedMat<float> &mat_contribs)
187 {
188 string model_name = "model_id=" + to_string(classifier_type);
191 throw logic_error("ERROR:: operation calc_feature_contribs_conditional "
192 "isn't supported for " + model_name + " yet.");
193 }
194
195 virtual void export_predictor(const string &output_fname) {
196 string model_name = "model_id=" + to_string(classifier_type);
199 throw logic_error("ERROR:: operation export_predictor "
200 "isn't supported for " + model_name + " yet.");
201 }
202
217 int learn_prob_calibration(MedMat<float> &x, vector<float> &y,
218 vector<float> &min_range, vector<float> &max_range, vector<float> &map_prob, int min_bucket_size = 10000,
219 float min_score_jump = 0.001, float min_prob_jump = 0.005, bool fix_prob_order = false);
224 int convert_scores_to_prob(const vector<float> &preds, const vector<float> &min_range,
225 const vector<float> &max_range, const vector<float> &map_prob, vector<float> &probs) const;
237 int learn_prob_calibration(MedMat<float> &x, vector<float> &y, int poly_rank, vector<double> &params, int min_bucket_size = 10000, float min_score_jump = 0.001);
241 template<class T, class L> int convert_scores_to_prob(const vector<T> &preds, const vector<double> &params, vector<L> &converted) const;
242
243 // init
244 static MedPredictor *make_predictor(string model_type);
245 static MedPredictor *make_predictor(MedPredictorTypes model_type);
246 static MedPredictor *make_predictor(string model_type, string params);
247 static MedPredictor *make_predictor(MedPredictorTypes model_type, string params);
248
250 virtual bool predict_single_not_implemented() { return false; }
251 virtual void prepare_predict_single();
252 virtual void predict_single(const vector<float> &x, vector<float> &preds) const;
253 virtual void predict_single(const vector<double> &x, vector<double> &preds) const;
254 virtual void calc_feature_importance_shap(vector<float> &features_importance_scores, string &importance_type, const MedFeatures *features);
255
256 // (De)Serialize
257 ADD_CLASS_NAME(MedPredictor)
259 void *new_polymorphic(string derived_class_name);
260 size_t get_predictor_size();
261 size_t predictor_serialize(unsigned char *blob);
262
263
264protected:
265 // some needed helpers
266 void prepare_x_mat(MedMat<float> &x, const vector<float> &wgts, int &nsamples, int &nftrs, bool transpose_needed) const;
267 void predict_thread(void *p) const;
268
269};
270
271
272
273//================================================================
274// Unsupervised
275//================================================================
276
280int KMeans(MedMat<float> &x, int K, MedMat<float> &centers, vector<int> &clusters, MedMat<float> &dists);
284int KMeans(MedMat<float> &x, int K, int max_iter, MedMat<float> &centers, vector<int> &clusters, MedMat<float> &dists);
288int KMeans(float *x, int nrows, int ncols, int K, float *centers, int *clusters, float *dists);
289
293int KMeans(float *x, int nrows, int ncols, int K, int max_iter, float *centers, int *clusters, float *dists, bool verbose_print = true); // actual implemetation routine
294
295// PCA
296
299int MedPCA(MedMat<float> &x, MedMat<float> &pca_base, vector<float> &varsum);
300
302int MedPCA_project(MedMat<float> &x, MedMat<float> &pca_base, int dim, MedMat<float> &projected);
303
304
305//=========================================================================================
306
310namespace medial {
314 namespace models {
316 string getParamsInfraModel(void *model);
318 void *copyInfraModel(void *model, bool delete_old = true);
320 void initInfraModel(void *&model);
322 void learnInfraModel(void *model, const vector<vector<float>> &xTrain, vector<float> &y, vector<float> &weights);
324 vector<float> predictInfraModel(void *model, const vector<vector<float>> &xTest);
326 void get_pids_cv(MedPredictor *pred, MedFeatures &matrix, int nFolds,
327 mt19937 &generator, vector<float> &preds);
329 void get_cv(MedPredictor *pred, MedFeatures &matrix, int nFolds,
330 mt19937 &generator, vector<float> &preds);
331 }
335 namespace process {
338 void compare_populations(const MedFeatures &population1, const MedFeatures &population2,
339 const string &name1, const string &name2, const string &output_file,
340 const string &predictor_type = "", const string &predictor_init = "", int nfolds = 5, int max_learn = 0);
341 }
342}
343
344
345//=================================================================
346// Joining the MedSerialize Wagon
347//=================================================================
349
350#endif
Logger.h - allowing logs with more control.
int KMeans(MedMat< float > &x, int K, MedMat< float > &centers, vector< int > &clusters, MedMat< float > &dists)
K-Means: x is input matrix(each row is sample N*M).
Definition MedCluster.cpp:167
MedPredictorTypes predictor_name_to_type(const string &model_name)
Maping from model name in string to enum MedPredictorTypes.
Definition MedAlgo.cpp:69
unordered_map< int, string > predictor_type_to_name
Maping from predictor enum type MedPredictorTypes to model name in string.
Definition MedAlgo.cpp:40
int MedPCA_project(MedMat< float > &x, MedMat< float > &pca_base, int dim, MedMat< float > &projected)
returns the projection of the pca base on the first dim dimensions.
Definition MedCluster.cpp:227
MedPredictorTypes
Definition MedAlgo.h:44
@ MODEL_LINEAR_SGD
to_use:"linear_sgd" linear model using our customized SGD - creates MedLinearModel
Definition MedAlgo.h:60
@ MODEL_EXTERNAL_NN
to_use: "external_nn" , initialize a neural net using a layers file. creates MedExternalNN
Definition MedAlgo.h:64
@ MODEL_LASSO
to_use:"lasso" Lasso model - creates MedLasso
Definition MedAlgo.h:53
@ MODEL_BART
to_use:"bart" MedBART model using BART
Definition MedAlgo.h:63
@ MODEL_LINEAR_MODEL
to_use:"linear_model" Linear Model - creates MedLM
Definition MedAlgo.h:45
@ MODEL_QRF
to_use:"qrf" Q-Random-Forest - creates MedQRF
Definition MedAlgo.h:46
@ MODEL_GD_LINEAR
to_use:"gdlm" Gradient Descent/Full solution ridge - creates MedGDLM
Definition MedAlgo.h:50
@ MODEL_MULTI_CLASS
to_use:"multi_class" general one vs. all multi class extention - creates MedMultiClass
Definition MedAlgo.h:51
@ MODEL_TQRF
to_use:"tqrf" TQRF model - creates MedTQRF
Definition MedAlgo.h:62
@ MODEL_SIMPLE_ENSEMBLE
to_use: "simple_ensemble" , give 1 or more models to train, and ensemble them with given weights from...
Definition MedAlgo.h:65
@ MODEL_MIC_NET
to_use:"micNet" Home brew Neural Net implementation (Allows deep learning) - creates MedMicNet
Definition MedAlgo.h:54
@ MODEL_BOOSTER
to_use:"booster" general booster (meta algorithm) - creates MedBooster
Definition MedAlgo.h:55
@ MODEL_VW
to_use:"vw" VowpalWabbit yahoo reasearch library - creates MedVW
Definition MedAlgo.h:61
@ MODEL_LIGHTGBM
to_use:"lightgbm" the celebrated LightGBM algorithm - creates MedLightGBM
Definition MedAlgo.h:57
@ MODEL_KNN
to_use:"knn" K Nearest Neighbour - creates MedKNN
Definition MedAlgo.h:47
@ MODEL_BP
to_use:"BP" Neural Network Back Propagation - creates MedBP
Definition MedAlgo.h:48
@ MODEL_XGB
to_use:"xgb" XGBoost - creates MedXGB
Definition MedAlgo.h:52
@ MODEL_SPECIFIC_GROUPS_MODELS
to_use:"multi_models" spliting model by specific value (for example age-range) and train diffretn mod...
Definition MedAlgo.h:58
@ MODEL_MARS
to_use:"mars" Multivariate Adaptive Regression Splines - creates MedMars
Definition MedAlgo.h:49
@ MODEL_BY_MISSING_VALUES_SUBSET
to_use: "by_missing_value_subset", choosed MedPredictor on subset of the features based on missing va...
Definition MedAlgo.h:66
@ MODEL_DEEP_BIT
to_use:"deep_bit" Nir\'s DeepBit method - creates MedDeepBit
Definition MedAlgo.h:56
@ MODEL_SVM
to_use:"svm" Svm model - creates MedSvm
Definition MedAlgo.h:59
int MedPCA(MedMat< float > &x, MedMat< float > &pca_base, vector< float > &varsum)
given a matrix, returns the base PCA matrix and the cummulative relative variance explained by them.
Definition MedCluster.cpp:184
An Abstract class that can be serialized and written/read from file.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
Definition MedMat.h:63
Base Interface for predictor.
Definition MedAlgo.h:78
int learn(MedMat< float > &x, vector< float > &y)
MedMat x, vector y: will transpose normalize x if needed (y assumed to be normalized)
Definition MedAlgo.h:144
int learn(float *x, float *y, int nsamples, int nftrs)
simple no weights call
Definition MedAlgo.h:130
int convert_scores_to_prob(const vector< float > &preds, const vector< float > &min_range, const vector< float > &max_range, const vector< float > &map_prob, vector< float > &probs) const
If you have ran learn_prob_calibration before, you have min_range,max_range,map_prob from This functi...
Definition MedAlgo.cpp:659
int learn(MedMat< float > &x, MedMat< float > &y)
MedMat x,y : will transpose/normalize x,y if needed by algorithm The convention is that untransposed ...
Definition MedAlgo.h:139
virtual int Learn(float *x, float *y, const float *w, int n_samples, int n_ftrs)
Learn should be implemented for each model.
Definition MedAlgo.h:111
virtual int Predict(float *x, float *&preds, int n_samples, int n_ftrs) const
Predict should be implemented for each model.
Definition MedAlgo.h:116
bool normalize_for_learn
True if need to normalize before learn.
Definition MedAlgo.h:87
int learn(vector< float > &x, vector< float > &y, int n_samples, int n_ftrs)
vector x,y: transpose/normalizations not done.
Definition MedAlgo.h:149
bool transpose_for_predict
True if need to transpose before predict.
Definition MedAlgo.h:90
bool normalize_for_predict
True if need to normalize before predict.
Definition MedAlgo.h:91
int features_count
The model features count used in Learn, to validate when caling predict.
Definition MedAlgo.h:96
bool normalize_y_for_learn
True if need to normalize labels before learn.
Definition MedAlgo.h:88
MedPredictorTypes classifier_type
The Predicotr enum type.
Definition MedAlgo.h:80
vector< string > model_features
The model features used in Learn, to validate when caling predict.
Definition MedAlgo.h:93
virtual int n_preds_per_sample() const
Number of predictions per sample. typically 1 - but some models return several per sample (for exampl...
Definition MedAlgo.h:122
bool transpose_for_learn
True if need to transpose before learn.
Definition MedAlgo.h:86
virtual bool predict_single_not_implemented()
Prepartion function for fast prediction on single item each time.
Definition MedAlgo.h:250
int learn_prob_calibration(MedMat< float > &x, vector< float > &y, vector< float > &min_range, vector< float > &max_range, vector< float > &map_prob, int min_bucket_size=10000, float min_score_jump=0.001, float min_prob_jump=0.005, bool fix_prob_order=false)
calibration for probability using training data
Definition MedAlgo.cpp:567
virtual void calc_feature_importance(vector< float > &features_importance_scores, const string &general_params)
Feature Importance - assume called after learn.
Definition MedAlgo.h:161
void * new_polymorphic(string derived_class_name)
for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived clas...
Definition MedAlgo.cpp:92
virtual void calc_feature_contribs(MedMat< float > &x, MedMat< float > &contribs)
Feature contributions explains the prediction on each sample (aka BUT_WHY)
Definition MedAlgo.h:178
Definition SerializableObject.h:32
vector< float > predictInfraModel(void *model, const vector< vector< float > > &xTest)
run predict on the MedPredictor - wrapper api
Definition MedAlgo.cpp:1110
string getParamsInfraModel(void *model)
returns string to create model with init_string. void * is MedPredictor
Definition MedAlgo.cpp:936
void get_pids_cv(MedPredictor *pred, MedFeatures &matrix, int nFolds, mt19937 &generator, vector< float > &preds)
run cross validation where each pid is in diffrent fold and saves the preds.
Definition MedAlgo.cpp:1121
void learnInfraModel(void *model, const vector< vector< float > > &xTrain, vector< float > &y, vector< float > &weights)
run Learn on the MedPredictor - wrapper api
Definition MedAlgo.cpp:1101
void initInfraModel(void *&model)
initialize model which is MedPredictor by copying it's parameters to new address and freeing old one
Definition MedAlgo.cpp:1095
void * copyInfraModel(void *model, bool delete_old=true)
returns MedPredictor *, a clone copy of given model (params without learned data)....
Definition MedAlgo.cpp:1023
void get_cv(MedPredictor *pred, MedFeatures &matrix, int nFolds, mt19937 &generator, vector< float > &preds)
run cross validation where each samples can be in diffrent fold and saves the preds.
Definition MedAlgo.cpp:1181
void compare_populations(const MedFeatures &population1, const MedFeatures &population2, const string &name1, const string &name2, const string &output_file, const string &predictor_type="", const string &predictor_init="", int nfolds=5, int max_learn=0)
compares two matrixes populations.
Definition MedAlgo.cpp:1261
medial namespace for function
Definition InfraMed.h:667
Definition StdDeque.h:58