2#ifndef _ITERATIVEIMPUTER_H_
3#define _ITERATIVEIMPUTER_H_
6#include <unordered_set>
14 vector<string> features_to_impute = {};
15 string regressor =
"qrf";
16 string regressor_params =
"type=regression;ntrees=30;min_node=100;spread=0.1;learn_nthreads=40;predict_nthreads=40";
19 string multi_categ_classifier =
"qrf";
20 string multi_categ_classifier_params =
"type=categorical_entropy;ntrees=30;min_node=10;learn_nthreads=40;predict_nthreads=40";
21 string add_ncateg_var_name =
"n_categ";
22 string round1_strata =
"Age,40,80,5:Gender,1,2,1";
24 int round1_moment = 0;
25 int categorial_bound = 0;
26 int max_iterations = 1;
27 float p_validation = (float)0.1;
28 int min_vals_for_training = 10000;
29 float missing_value = MED_MAT_MISSING_VALUE;
30 int round_to_resolution = 1;
32 float missing_bound = 0.5;
35 int min_vals_for_impute = 1000;
38 int init(map<string, string>& mapper);
41 ADD_SERIALIZATION_FUNCS(features_to_impute, regressor, regressor_params, multi_categ_classifier, multi_categ_classifier_params, add_ncateg_var_name,
42 round1_strata, do_round1, round1_moment, categorial_bound, max_iterations, p_validation, min_vals_for_training, missing_value,
43 round_to_resolution, verbose, missing_bound);
51 string full_name =
"";
53 int is_categorial = 0;
54 float min = (float)1e10;
55 float max = (float)-1e10;
57 int predictor_type = 0;
58 vector<int> inds_for_pred;
62 int n_with_values = 0;
63 int n_with_non_zero_values = 0;
66 vector<char> is_missing;
67 vector<string> feats_for_pred;
68 vector<int> train_idx;
73 int prep_indexes(
const vector<int> &external_train_idx,
const vector<int> &external_test_idx,
float missing_value);
77 fprintf(stderr,
"Feature Info :: %s :: %s :: data_len %d : n_missing %d ( %5.2f ): n_with %d ( non zero %d ): n_diff_vals %d : categorial %d : min %f : max %f : resolution %f\n",
78 name.c_str(), full_name.c_str(), data_len, n_missing, (
float)100 * n_missing / (
float)data_len, n_with_values, n_with_non_zero_values, n_diff_vals, is_categorial, min, max, resolution);
82 ADD_SERIALIZATION_FUNCS(name, full_name, n_diff_vals, is_categorial, min, max, resolution, predictor_type, inds_for_pred);
101 vector<int> train_idx, test_idx;
102 vector<char> is_train;
104 unordered_set<int> train_ids, test_ids;
108 vector<feature_info> feats;
111 vector<FeatureImputer> first_round_imputers;
114 vector<int> predictors_order;
115 vector<vector<MedPredictor *>> predictors;
119 int init(map<string, string>& mapper) {
return params.init(mapper); }
125 int round_arr(
float *arr,
int len,
float resolution,
float _min,
float _max);
128 int apply_first_round(
MedFeatures &mfd,
bool learning);
131 int find_feats_to_learn_from(
int f_idx);
161 int init(map<string, string>& mapper) {
return imputer.
init(mapper); }
169 int _apply(
MedFeatures& features,
bool learning) {
return imputer.Apply(features, learning); }
A virtual class of processes on MedFeatures; E.g.
@ FTR_PROCESS_ITERATIVE_IMPUTER
"iterative_imputer" to create IterativeImputer
Definition FeatureProcess.h:34
MedAlgo - APIs to different algorithms: Linear Models, RF, GBM, KNN, and more.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
Definition IterativeImputer.h:150
int _apply(MedFeatures &features, unordered_set< int > &ids, bool learning)
Apply imputing model on subset of ids (TBI)
Definition IterativeImputer.cpp:598
void update_req_features_vec(unordered_set< string > &out_req_features, unordered_set< string > &in_req_features)
update sets of required as input according to set required as output to processor
Definition IterativeImputer.cpp:586
int Learn(MedFeatures &features, unordered_set< int > &ids)
Learn imputing model on subset of ids (TBI)
Definition IterativeImputer.cpp:605
bool are_features_affected(unordered_set< string > &out_req_features)
check if a set of features is affected by the current processor
Definition IterativeImputer.cpp:566
int init(map< string, string > &mapper)
The parsed fields from init command.
Definition IterativeImputer.h:161
Definition FeatureProcess.h:51
Definition IterativeImputer.h:11
int init(map< string, string > &mapper)
Virtual to init object from parsed fields.
Definition IterativeImputer.cpp:59
IterativeImputer A general strong imputer that does the following: (1) Runs a simple stratified im...
Definition IterativeImputer.h:95
int init(map< string, string > &mapper)
The parsed fields from init command.
Definition IterativeImputer.h:119
int init_feature_info(MedFeatures &mfd, string feat_name)
Definition IterativeImputer.cpp:119
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
Definition SerializableObject.h:32
Definition IterativeImputer.h:48