81 vector<QRF_Node> nodes;
86 vector<int> sample_ids;
87 map<short, short> feat_chosen;
92 vector<double> histr_sum;
93 vector<int> histr_num;
96 size_t estimated_size() {
size_t s = histr_num.size() + histr_num.size() + inds.size() + qy.size() * 2 + hist[0].size() + hist[1].size() + feat_chosen.size() * 4 + sample_ids.size();
for (
auto &n : nodes) s += n.estimated_size();
return s; }
104 void init_rand_state();
129 vector<pair<int, unsigned int>> value_counts;
134 size_t estimated_size() {
return 10 + counts.size() + values.size() + 2*value_counts.size(); }
136 void get_scores(
int mode,
int get_counts_flag,
int n_categ, vector<float> &scores)
const;
139 ADD_SERIALIZATION_FUNCS(n_size, mode, ifeat, split_val, is_leaf, left, right, pred, counts, values, value_counts, tot_n_values, majority)
159 int NSamples, NFeat, MaxQ;
163 vector<vector<float>> quant_values;
164 vector<vector<short>> q_data;
166 vector<vector<int>> yr_multilabel;
169 vector<double> log_table;
171 vector<OOB_Result> cv;
180 int init_all(
float *X,
int *Y,
float *Yr,
const float *W,
int nfeat,
int nsamples,
int maxq);
182 void init_groups(vector<int> &groups_in);
184 int init(
float *X,
int *Y,
int nfeat,
int nsamples,
int maxq);
185 int get_Tree(
int *sampsize,
int ntry,
QRF_Tree &tree);
186 int collect_Tree_oob_scores(
float *x,
int nfeat,
QRF_ResTree &resTree, vector<int>& sample_ids);
187 void complete_oob_cv();
188 void score_tree_by_index(
float *x,
int nfeat,
QRF_ResTree &tree,
int id,
float& score,
int& majority, vector<int> &counts);
191 double get_cross_validation_auc();
194 int init_regression(
float *X,
float *Y,
const float *W,
int nfeat,
int nsamples,
int maxq);
195 int get_regression_Tree(
int *sampsize,
int ntry,
QRF_Tree &tree);
201 int min_split_node_size;
202 float min_split_spread;
209 bool take_all_samples;
215 int quantize_no_loss(vector<ValInd> &vals,
int nsamples,
int maxq, vector<float> &quant_val, vector<short> &qd);
218 int find_best_split(
QRF_Tree &tree,
int node,
int ntry);
219 int split_node(
QRF_Tree &tree,
int node);
222 int find_best_regression_split(
QRF_Tree &tree,
int node,
int ntry);
223 int split_regression_node(
QRF_Tree &tree,
int node);
226 int find_best_categories_chi2_split(
QRF_Tree &tree,
int node,
int ntry);
227 int find_best_categories_entropy_split(
QRF_Tree &tree,
int node,
int ntry);
228 int find_best_categories_entropy_split_multilabel(
QRF_Tree &tree,
int node,
int ntry);
262 int get_only_this_categ;
264 bool keep_all_values;
266 vector<float> quantiles;
267 vector<float> sorted_values;
272 vector<QRF_ResTree> qtrees;
276 vector<vector<float> > oob_scores;
280 qtrees.clear(); mode = 0; collect_oob = 0; oob_scores.clear(); keep_all_values =
false; sparse_values =
true; nthreads = 1; min_node_size = MIN_SPLIT_NODE_SIZE; get_only_this_categ = -1;
281 min_spread = 0; n_categ = -1; get_counts_flag = 0; take_all_samples =
false; max_depth = 0;
287 bool take_all_samples;
298 int get_forest(
double *x,
double *y,
int nfeat,
int nsamples,
int *sampsize,
int ntry,
int ntrees,
int maxq);
299 int get_forest(
float *x,
int *y,
int nfeat,
int nsamples,
int *sampsize,
int ntry,
int ntrees,
int maxq);
303 int get_forest_regression_trees(
float *x,
float *y,
int nfeat,
int nsamples,
int sampsize,
int ntry,
int ntrees,
int maxq,
int min_node,
float spread);
308 int get_forest_categorical(
float *x,
float *y,
const float *w,
int nfeat,
int nsamples,
int *sampsize,
int ntry,
int ntrees,
int maxq,
int min_node,
int ncateg,
int splitting_method);
313 int score_samples(
float *x,
int nfeat,
int nsamples,
float *&res)
const;
318 int score_samples(
float *x_in,
int nfeat,
int nsamples,
float *&res,
int get_counts)
const;
320 int score_samples_t(
double *x,
int nfeat,
int nsamples,
double *&res);
331 ADD_SERIALIZATION_FUNCS(qtrees, mode, min_node_size, min_spread, n_categ, get_counts_flag, get_only_this_categ, keep_all_values, sparse_values, quantiles, sorted_values, nthreads, take_all_samples, max_depth)
335 void write(FILE *fp);
340 void variableImportance(vector<pair<short, double> >& rankedFeatures,
unsigned int nFeatures);
344 int get_forest_trees_all_modes(
float *x,
void *y,
const float *w,
int nfeat,
int nsamples,
int *sampsize,
int ntry,
int ntrees,
int maxq,
int mode);
347 void score_with_threads(
float *x,
int nfeat,
int nsamples,
float *res)
const;
350 int transfer_to_forest(vector<QRF_Tree> &trees,
QuantizedRF &qrf,
int mode);
352 int init_keep_all_values(
QuantizedRF &qrf,
int mode, map<float, int> &all_values);