Medial Code Documentation
Loading...
Searching...
No Matches
MedDeepBit.h
1#pragma once
3#include <algorithm>
4#undef LOCAL_SECTION
5#define LOCAL_SECTION LOG_MEDALGO
6#define LOCAL_LEVEL LOG_DEF_LEVEL
7extern MedLogger global_logger;
8
9//Deep Bit
10
12 int max_depth;
13 int num_iterations;
14 int num_ftrs_per_round;
15 int num_vals_to_be_categorial;
16 int nparts_auc;
17 int niter_auc_gitter;
18 int niter_coordinate_descent;
19 int internal_test_ratio;
20 double fraction_auc;
21 double grid_fraction;
22 double min_fraction_zeros_ones;
23 double frac_continuous_frequent;
24 double frac_categorial_frequent;
25 double lambda;
26 double min_cor_bin_ftr;
27
29 max_depth = 20; num_iterations = 100; num_ftrs_per_round = 1000; num_vals_to_be_categorial = 10; nparts_auc = 1; niter_coordinate_descent = 0;
30 niter_auc_gitter = 0; grid_fraction = 0.003; min_fraction_zeros_ones = 0.003;
31 frac_continuous_frequent = 0.1; frac_categorial_frequent = 0.1; lambda = 20; fraction_auc = 1; internal_test_ratio = 10, min_cor_bin_ftr = 0.05;
32 }
33 ADD_CLASS_NAME(MedDeepBitParams)
34 size_t get_size();
35 size_t serialize(unsigned char *blob);
36 size_t deserialize(unsigned char *blob);
37 string to_string();
38};
39
41
42class MedDeepBit : public MedPredictor {
43
44public:
45 MedDeepBitParams params;
46 void init_defaults();
47 int init(void *classifier_params) { this->params = *((MedDeepBitParams*)classifier_params); return 0; };
50 int set_params(map<string, string>& initialization_map);
51 MedDeepBit() { classifier_type = MODEL_DEEP_BIT; init_defaults(); }
52 ~MedDeepBit() {}
53 MedDeepBit(void *_params) { params = *(MedDeepBitParams *)_params; }
54 MedDeepBit(MedDeepBitParams& _params) { params = _params; }
55 int Learn(float *x, float *y, const float *w, int nsamples, int nftrs);
56 int Learn(float *x, float *y, int nsamples, int nftrs);
57 int Predict(float *x, float *&preds, int nsamples, int nftrs) const;
58 virtual void print(FILE *fp, const string& prefix, int level=0) const;
59 ADD_CLASS_NAME(MedDeepBit)
60 size_t get_size();
61 size_t serialize(unsigned char *blob);
62 size_t deserialize(unsigned char *blob);
63 void print_model(FILE *fp, const string& prefix) const;
64
65private:
66 vector<vector<double>> x, internal_test_x, internal_test_transposed_x;
67 vector<double> y, scores, r, internal_test_scores, avx, sdx;
68 vector<int> label, internal_test_label;
69 int nftrs, nsamples, internal_test_nsamples, num_bin_ftrs;
70 vector<string> ftr_names;
71 double avy;
72
73 vector<vector<double>> ftr_grids;
74 vector<int> is_categorial;
75 vector<vector<double>> frequent_ftr_vals;
76 // For each binary feature we document the raw feature number, a spceific value val of that feature and a boolean value bool_val.
77 // For categorial featues, we ask weather a given sample equals val or not, for bool_val in {true, false}, correpondingly.
78 // For quantitative features, we ask weather a given sample is above or under val for the values of true and false for bool_val, correspondingly.
79 vector<tuple<int, int, bool, bool>> bin_ftrs_map;
80 vector<vector<char>> bin_ftrs;
81 vector<vector<int>> bin_ftr_indexes;
82 vector<vector<double>> bin_ftr_avg_sd_beta;
83
84 void print_log();
85 void train_model();
86 void predict_train() {}
87 double predict_sample(const vector<double>& x, int niter) const;
88 double predict_sample(const vector<double>& x) const;
89 void get_normalized_col(const vector<char>& col, vector<double>& normalized_col, double& av, double& std);
90 double perform_lasso_iteration(const vector<double>& xk_train, const vector<double>& r, double lambda, double alpha);
91 void get_col_without_na(const vector<double>& col, vector<double>& col_without_na);
92 void get_avgs(const vector<vector<double>>& x, vector<double>& avx);
93 void get_sds(const vector<vector<double>>& x, vector<double>& sdx);
94 double avg(const vector<char>& vec);
95 double avg(const vector<double>& vec);
96 double sd(const vector<char>& binary_vec);
97 double sd(const vector<double>& vec);
98 void transpose(const vector<vector<double>>& before, vector<vector<double>>& after);
99 void init(float *x1, float *y1, int nsamples1, int nftrs1);
100 void predict(const vector<vector<double>>& x, vector<double>& scores) const;
101 double get_normalized_val(double x_val, int j);
102 void impute_x(vector<vector<double>>& x, const vector<double>& avx);
103 bool is_viable_01_ratios(int count0, int count1, int count_pos0, int count_pos1);
104 bool is_bin_ftr_valid(const vector<char>& bin_ftr);
105 void make_bin_ftrs(int j, const vector<double>& vals, bool is_categorial);
106 int step_function(int step, int i);
107 void mark_grids_and_frequent_vals();
108 void get_categorial_bin_ftr(const vector<double>& col, double val, bool direction, vector<char>& bin_ftr);
109 void get_quant_bin_ftr(const vector<double>& col, double val, bool direction, vector<char>& bin_ftr);
110 int get_categorial_bit(double x_val, double val, bool direction) const;
111 int get_quant_bit(double x_val, double val, bool direction) const;
112 double get_ftr_score(const vector<char>& bin_ftr);
113 void mult_bin_ftrs(const vector<char>& ftr1, vector<char>& ftr2);
114 void get_bin_ftr(int bin_ftr_index, vector<char>& bin_ftr);
115 void print_ftr_characteristics(int index);
116 void calc_bin_ftr_scores(const vector<char>& bin_ftr, double& av, double& std, double& b, vector<double>& scores1, vector<double>& r1, bool is_full_step = false);
117 void gen_random_indexes(vector<int>& random_indexes);
118 void score_random_ftrs(vector<double>& ftr_scores, const vector<int>& random_indexes, const vector<char>& final_bin_ftr);
119 void get_bin_ftr_of_it(int it, vector<char>& bin_ftr);
120 void do_auc_gittering();
121 void do_coordinate_descent(int num_iterations_descent);
122 double special_auc(const vector<double>& all_predictions, const vector<int>& all_label, bool is_weighted = false, int nparts = 1);
123};
124
126
127
128
129
130
131
132
133
134
135
136
137
138
139
MedAlgo - APIs to different algorithms: Linear Models, RF, GBM, KNN, and more.
@ MODEL_DEEP_BIT
to_use:"deep_bit" Nir\'s DeepBit method - creates MedDeepBit
Definition MedAlgo.h:50
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:142
Definition MedDeepBit.h:42
Definition Logger.h:99
Base Interface for predictor.
Definition MedAlgo.h:72
Definition SerializableObject.h:33
Definition MedDeepBit.h:11
size_t get_size()
Gets bytes sizes for serializations.
Definition MedDeepBit.cpp:921
size_t serialize(unsigned char *blob)
Serialiazing object to blob memory. return number ob bytes wrote to memory.
Definition MedDeepBit.cpp:925
size_t deserialize(unsigned char *blob)
Deserialiazing blob to object. returns number of bytes read.
Definition MedDeepBit.cpp:945