Medial Code Documentation
Loading...
Searching...
No Matches
MedQRF.h
1#pragma once
3
4
5//======================================================================================
6// QRF: Quantized Regression/Classification random forest
7//======================================================================================
8#define MED_QRF_DEF_NTREES 100
9#define MED_QRF_DEF_MAXQ 200
10#define MED_QRF_DEF_MIN_NODE 50
11#define MED_QRF_DEF_LEARN_NTHREADS 8
12#define MED_QRF_DEF_PREDICT_NTHREADS 8
13#define MED_QRF_DEF_SPREAD 0.1
14
16
17 // Required
18 int ntrees;
19 int maxq;
20 int learn_nthreads, predict_nthreads;
21 QRF_TreeType type;
22
23 // Optional
26 vector<int> samp_vec;
27 int *sampsize;
28 int ntry;
29 int get_only_this_categ;
32
33 // Regression
34 float spread;
37
38 // categorical
39 int min_node;
40 int n_categ;
41
42 int collect_oob;
43
44 // For Prediction
45 int get_count;
46 vector<float> quantiles;
47
48 ADD_CLASS_NAME(MedQRFParams)
49 ADD_SERIALIZATION_FUNCS(ntrees, maxq, learn_nthreads, predict_nthreads, type, max_samp, samp_factor, samp_vec,
50 ntry, get_only_this_categ, max_depth, take_all_samples, spread, keep_all_values, sparse_values, min_node, n_categ, collect_oob, get_count, quantiles)
51 void post_deserialization() { if (samp_vec.size() == 0) sampsize = NULL; else sampsize = &samp_vec[0]; }
52
53};
54
55class MedQRF : public MedPredictor {
56public:
59
62
63 // Function
64 MedQRF();
65 ~MedQRF() {};
66 MedQRF(void *params);
68 int init(void *params);
71 virtual int set_params(map<string, string>& mapper);
72 // int init(const string &init_str); // allows init of parameters from a string. Format is: param=val,... , for sampsize: 0 is NULL, a list of values is separated by ; (and not ,)
73 void init_defaults();
74
76 QRF_TreeType get_tree_type(string name);
77
78 int Learn(float *x, float *y, const float *w, int nsamples, int nftrs);
79 int Predict(float *x, float *&preds, int nsamples, int nftrs) const;
80
81 //int denormalize_model(float *f_avg, float *f_std, float lavel_avg, float label_std) {return 0;};
82
83 // (De)Desrialize - virtual class methods that do the actuale (De)Serializing. Should be created for each predictor
84 ADD_CLASS_NAME(MedQRF)
86
87 // Print
88 void print(FILE *fp, const string& prefix, int level = 0) const;
89 void printTrees(const vector<string> &modelSignalNames, const string &outputPath) const;
90 void calc_feature_importance(vector<float> &features_importance_scores, const string &general_params, const MedFeatures *features);
91
92 // Predictions per sample
93 int n_preds_per_sample() const;
94
95 void prepare_predict_single();
96 void predict_single(const vector<float> &x, vector<float> &preds) const;
97
98private:
99 void set_sampsize(float *y, int nsamples); // checking if there's a need to prep sampsize based on max_samp and samp_factor
100 int Predict(float *x, float *&preds, int nsamples, int nftrs, int get_count) const;
101
102 vector<pair<float, int>> _indexd_quantiles;
103 vector<float> _sorted_quantiles;
104 qrf_scoring_thread_params _single_pred_args;
105 bool prepared_single;
106};
107
MedAlgo - APIs to different algorithms: Linear Models, RF, GBM, KNN, and more.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
Base Interface for predictor.
Definition MedAlgo.h:78
int features_count
The model features count used in Learn, to validate when caling predict.
Definition MedAlgo.h:96
MedPredictorTypes classifier_type
The Predicotr enum type.
Definition MedAlgo.h:80
vector< string > model_features
The model features used in Learn, to validate when caling predict.
Definition MedAlgo.h:93
Definition MedQRF.h:55
virtual int set_params(map< string, string > &mapper)
The parsed fields from init command.
Definition MedQRF.cpp:130
QRF_Forest qf
Model.
Definition MedQRF.h:58
int Predict(float *x, float *&preds, int nsamples, int nftrs) const
Predict should be implemented for each model.
Definition MedQRF.cpp:348
int n_preds_per_sample() const
Number of predictions per sample. typically 1 - but some models return several per sample (for exampl...
Definition MedQRF.cpp:478
QRF_TreeType get_tree_type(string name)
Definition MedQRF.cpp:180
int Learn(float *x, float *y, const float *w, int nsamples, int nftrs)
Learn should be implemented for each model.
Definition MedQRF.cpp:264
MedQRFParams params
Parameters.
Definition MedQRF.h:61
Definition QRF.h:255
Definition SerializableObject.h:32
Definition MedQRF.h:15
bool sparse_values
For keeping all values as a value-index(int):count(char) vector.
Definition MedQRF.h:36
vector< int > samp_vec
to be used when sampsize is NULL and max_samp,samp_vector > 0
Definition MedQRF.h:26
float samp_factor
if > 0 & sampsize if NULL : the maximal factor of samples between the 2 largest categories
Definition MedQRF.h:25
int max_depth
maximial depth of tree branches - if 0 no limit
Definition MedQRF.h:30
bool take_all_samples
use all samples - no sampling in building tree
Definition MedQRF.h:31
int max_samp
M if > 0 & sampsize is NULL : the maximal sampsize we will take from each category.
Definition MedQRF.h:24
bool keep_all_values
For quantile regression.
Definition MedQRF.h:35
int ntry
if ntry <= 0: ntry = (int)(sqrt((double)nfeat) + 1.0);
Definition MedQRF.h:28
vector< float > quantiles
For quantile regression.
Definition MedQRF.h:46
Definition QRF.h:232