13 int num_ftrs_per_round;
14 int num_vals_to_be_categorial;
17 int niter_coordinate_descent;
18 int internal_test_ratio;
21 double min_fraction_zeros_ones;
22 double frac_continuous_frequent;
23 double frac_categorial_frequent;
25 double min_cor_bin_ftr;
28 max_depth = 20; num_iterations = 100; num_ftrs_per_round = 1000; num_vals_to_be_categorial = 10; nparts_auc = 1; niter_coordinate_descent = 0;
29 niter_auc_gitter = 0; grid_fraction = 0.003; min_fraction_zeros_ones = 0.003;
30 frac_continuous_frequent = 0.1; frac_categorial_frequent = 0.1; lambda = 20; fraction_auc = 1; internal_test_ratio = 10, min_cor_bin_ftr = 0.05;
46 int init(
void *classifier_params) { this->params = *((
MedDeepBitParams*)classifier_params);
return 0; };
49 int set_params(map<string, string>& initialization_map);
54 int Learn(
float *x,
float *y,
const float *w,
int nsamples,
int nftrs);
55 int Learn(
float *x,
float *y,
int nsamples,
int nftrs);
56 int Predict(
float *x,
float *&preds,
int nsamples,
int nftrs)
const;
57 virtual void print(FILE *fp,
const string& prefix,
int level=0)
const;
60 size_t serialize(
unsigned char *blob);
61 size_t deserialize(
unsigned char *blob);
62 void print_model(FILE *fp,
const string& prefix)
const;
65 vector<vector<double>> x, internal_test_x, internal_test_transposed_x;
66 vector<double> y, scores, r, internal_test_scores, avx, sdx;
67 vector<int> label, internal_test_label;
68 int nftrs, nsamples, internal_test_nsamples, num_bin_ftrs;
69 vector<string> ftr_names;
72 vector<vector<double>> ftr_grids;
73 vector<int> is_categorial;
74 vector<vector<double>> frequent_ftr_vals;
78 vector<tuple<int, int, bool, bool>> bin_ftrs_map;
79 vector<vector<char>> bin_ftrs;
80 vector<vector<int>> bin_ftr_indexes;
81 vector<vector<double>> bin_ftr_avg_sd_beta;
85 void predict_train() {}
86 double predict_sample(
const vector<double>& x,
int niter)
const;
87 double predict_sample(
const vector<double>& x)
const;
88 void get_normalized_col(
const vector<char>& col, vector<double>& normalized_col,
double& av,
double&
std);
89 double perform_lasso_iteration(
const vector<double>& xk_train,
const vector<double>& r,
double lambda,
double alpha);
90 void get_col_without_na(
const vector<double>& col, vector<double>& col_without_na);
91 void get_avgs(
const vector<vector<double>>& x, vector<double>& avx);
92 void get_sds(
const vector<vector<double>>& x, vector<double>& sdx);
93 double avg(
const vector<char>& vec);
94 double avg(
const vector<double>& vec);
95 double sd(
const vector<char>& binary_vec);
96 double sd(
const vector<double>& vec);
97 void transpose(
const vector<vector<double>>& before, vector<vector<double>>& after);
98 void init(
float *x1,
float *y1,
int nsamples1,
int nftrs1);
99 void predict(
const vector<vector<double>>& x, vector<double>& scores)
const;
100 double get_normalized_val(
double x_val,
int j);
101 void impute_x(vector<vector<double>>& x,
const vector<double>& avx);
102 bool is_viable_01_ratios(
int count0,
int count1,
int count_pos0,
int count_pos1);
103 bool is_bin_ftr_valid(
const vector<char>& bin_ftr);
104 void make_bin_ftrs(
int j,
const vector<double>& vals,
bool is_categorial);
105 int step_function(
int step,
int i);
106 void mark_grids_and_frequent_vals();
107 void get_categorial_bin_ftr(
const vector<double>& col,
double val,
bool direction, vector<char>& bin_ftr);
108 void get_quant_bin_ftr(
const vector<double>& col,
double val,
bool direction, vector<char>& bin_ftr);
109 int get_categorial_bit(
double x_val,
double val,
bool direction)
const;
110 int get_quant_bit(
double x_val,
double val,
bool direction)
const;
111 double get_ftr_score(
const vector<char>& bin_ftr);
112 void mult_bin_ftrs(
const vector<char>& ftr1, vector<char>& ftr2);
113 void get_bin_ftr(
int bin_ftr_index, vector<char>& bin_ftr);
114 void print_ftr_characteristics(
int index);
115 void calc_bin_ftr_scores(
const vector<char>& bin_ftr,
double& av,
double&
std,
double& b, vector<double>& scores1, vector<double>& r1,
bool is_full_step =
false);
116 void gen_random_indexes(vector<int>& random_indexes);
117 void score_random_ftrs(vector<double>& ftr_scores,
const vector<int>& random_indexes,
const vector<char>& final_bin_ftr);
118 void get_bin_ftr_of_it(
int it, vector<char>& bin_ftr);
119 void do_auc_gittering();
120 void do_coordinate_descent(
int num_iterations_descent);
121 double special_auc(
const vector<double>& all_predictions,
const vector<int>& all_label,
bool is_weighted =
false,
int nparts = 1);