Medial Code Documentation
Loading...
Searching...
No Matches
MedDeepBit.h
1#pragma once
3#include <algorithm>
4#define LOCAL_SECTION LOG_MEDALGO
5#define LOCAL_LEVEL LOG_DEF_LEVEL
6extern MedLogger global_logger;
7
8//Deep Bit
9
11 int max_depth;
12 int num_iterations;
13 int num_ftrs_per_round;
14 int num_vals_to_be_categorial;
15 int nparts_auc;
16 int niter_auc_gitter;
17 int niter_coordinate_descent;
18 int internal_test_ratio;
19 double fraction_auc;
20 double grid_fraction;
21 double min_fraction_zeros_ones;
22 double frac_continuous_frequent;
23 double frac_categorial_frequent;
24 double lambda;
25 double min_cor_bin_ftr;
26
28 max_depth = 20; num_iterations = 100; num_ftrs_per_round = 1000; num_vals_to_be_categorial = 10; nparts_auc = 1; niter_coordinate_descent = 0;
29 niter_auc_gitter = 0; grid_fraction = 0.003; min_fraction_zeros_ones = 0.003;
30 frac_continuous_frequent = 0.1; frac_categorial_frequent = 0.1; lambda = 20; fraction_auc = 1; internal_test_ratio = 10, min_cor_bin_ftr = 0.05;
31 }
32 ADD_CLASS_NAME(MedDeepBitParams)
33 size_t get_size();
34 size_t serialize(unsigned char *blob);
35 size_t deserialize(unsigned char *blob);
36 string to_string();
37};
38
40
41class MedDeepBit : public MedPredictor {
42
43public:
44 MedDeepBitParams params;
45 void init_defaults();
46 int init(void *classifier_params) { this->params = *((MedDeepBitParams*)classifier_params); return 0; };
49 int set_params(map<string, string>& initialization_map);
50 MedDeepBit() { classifier_type = MODEL_DEEP_BIT; init_defaults(); }
51 ~MedDeepBit() {}
52 MedDeepBit(void *_params) { params = *(MedDeepBitParams *)_params; }
53 MedDeepBit(MedDeepBitParams& _params) { params = _params; }
54 int Learn(float *x, float *y, const float *w, int nsamples, int nftrs);
55 int Learn(float *x, float *y, int nsamples, int nftrs);
56 int Predict(float *x, float *&preds, int nsamples, int nftrs) const;
57 virtual void print(FILE *fp, const string& prefix, int level=0) const;
58 ADD_CLASS_NAME(MedDeepBit)
59 size_t get_size();
60 size_t serialize(unsigned char *blob);
61 size_t deserialize(unsigned char *blob);
62 void print_model(FILE *fp, const string& prefix) const;
63
64private:
65 vector<vector<double>> x, internal_test_x, internal_test_transposed_x;
66 vector<double> y, scores, r, internal_test_scores, avx, sdx;
67 vector<int> label, internal_test_label;
68 int nftrs, nsamples, internal_test_nsamples, num_bin_ftrs;
69 vector<string> ftr_names;
70 double avy;
71
72 vector<vector<double>> ftr_grids;
73 vector<int> is_categorial;
74 vector<vector<double>> frequent_ftr_vals;
75 // For each binary feature we document the raw feature number, a spceific value val of that feature and a boolean value bool_val.
76 // For categorial featues, we ask weather a given sample equals val or not, for bool_val in {true, false}, correpondingly.
77 // For quantitative features, we ask weather a given sample is above or under val for the values of true and false for bool_val, correspondingly.
78 vector<tuple<int, int, bool, bool>> bin_ftrs_map;
79 vector<vector<char>> bin_ftrs;
80 vector<vector<int>> bin_ftr_indexes;
81 vector<vector<double>> bin_ftr_avg_sd_beta;
82
83 void print_log();
84 void train_model();
85 void predict_train() {}
86 double predict_sample(const vector<double>& x, int niter) const;
87 double predict_sample(const vector<double>& x) const;
88 void get_normalized_col(const vector<char>& col, vector<double>& normalized_col, double& av, double& std);
89 double perform_lasso_iteration(const vector<double>& xk_train, const vector<double>& r, double lambda, double alpha);
90 void get_col_without_na(const vector<double>& col, vector<double>& col_without_na);
91 void get_avgs(const vector<vector<double>>& x, vector<double>& avx);
92 void get_sds(const vector<vector<double>>& x, vector<double>& sdx);
93 double avg(const vector<char>& vec);
94 double avg(const vector<double>& vec);
95 double sd(const vector<char>& binary_vec);
96 double sd(const vector<double>& vec);
97 void transpose(const vector<vector<double>>& before, vector<vector<double>>& after);
98 void init(float *x1, float *y1, int nsamples1, int nftrs1);
99 void predict(const vector<vector<double>>& x, vector<double>& scores) const;
100 double get_normalized_val(double x_val, int j);
101 void impute_x(vector<vector<double>>& x, const vector<double>& avx);
102 bool is_viable_01_ratios(int count0, int count1, int count_pos0, int count_pos1);
103 bool is_bin_ftr_valid(const vector<char>& bin_ftr);
104 void make_bin_ftrs(int j, const vector<double>& vals, bool is_categorial);
105 int step_function(int step, int i);
106 void mark_grids_and_frequent_vals();
107 void get_categorial_bin_ftr(const vector<double>& col, double val, bool direction, vector<char>& bin_ftr);
108 void get_quant_bin_ftr(const vector<double>& col, double val, bool direction, vector<char>& bin_ftr);
109 int get_categorial_bit(double x_val, double val, bool direction) const;
110 int get_quant_bit(double x_val, double val, bool direction) const;
111 double get_ftr_score(const vector<char>& bin_ftr);
112 void mult_bin_ftrs(const vector<char>& ftr1, vector<char>& ftr2);
113 void get_bin_ftr(int bin_ftr_index, vector<char>& bin_ftr);
114 void print_ftr_characteristics(int index);
115 void calc_bin_ftr_scores(const vector<char>& bin_ftr, double& av, double& std, double& b, vector<double>& scores1, vector<double>& r1, bool is_full_step = false);
116 void gen_random_indexes(vector<int>& random_indexes);
117 void score_random_ftrs(vector<double>& ftr_scores, const vector<int>& random_indexes, const vector<char>& final_bin_ftr);
118 void get_bin_ftr_of_it(int it, vector<char>& bin_ftr);
119 void do_auc_gittering();
120 void do_coordinate_descent(int num_iterations_descent);
121 double special_auc(const vector<double>& all_predictions, const vector<int>& all_label, bool is_weighted = false, int nparts = 1);
122};
123
125
126
127
128
129
130
131
132
133
134
135
136
137
138
MedAlgo - APIs to different algorithms: Linear Models, RF, GBM, KNN, and more.
@ MODEL_DEEP_BIT
to_use:"deep_bit" Nir\'s DeepBit method - creates MedDeepBit
Definition MedAlgo.h:56
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
Definition MedDeepBit.h:41
Definition Logger.h:99
Base Interface for predictor.
Definition MedAlgo.h:78
Definition SerializableObject.h:32
Definition StdDeque.h:58
Definition MedDeepBit.h:10
size_t get_size()
Gets bytes sizes for serializations.
Definition MedDeepBit.cpp:921
size_t serialize(unsigned char *blob)
Serialiazing object to blob memory. return number ob bytes wrote to memory.
Definition MedDeepBit.cpp:925
size_t deserialize(unsigned char *blob)
Deserialiazing blob to object. returns number of bytes read.
Definition MedDeepBit.cpp:945