1#ifndef TRAIN_WITH_MISSING_PROCESSOR_H__
2#define TRAIN_WITH_MISSING_PROCESSOR_H__
42 int init(map<string, string>& mapper);
46 void dprint(
const string &pref,
int fp_flag);
49 int _apply(
MedFeatures& features, unordered_set<int>& ids) {
return 0; }
50 int Learn(
MedFeatures& features, unordered_set<int>& ids);
58 vector<vector<int>> group2Inds;
59 vector<string> groupNames;
A virtual class of processes on MedFeatures; E.g.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
Definition FeatureProcess.h:51
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
ResampleMissingProcessor: Add missing values to the train matrix for the train process.
Definition ResampleWithMissingProcessor.h:11
string grouping
grouping file or "BY_SIGNAL" keyword to group by signal or "BY_SIGNAL_CATEG" - for category signal to...
Definition ResampleWithMissingProcessor.h:19
vector< string > selected_tags
the selected tags to activate on
Definition ResampleWithMissingProcessor.h:13
int subsample_train
if not zero will use this to subsample original train sampels to this number
Definition ResampleWithMissingProcessor.h:31
int init(map< string, string > &mapper)
The parsed fields from init command.
Definition ResampleWithMissingProcessor.cpp:22
bool uniform_rand
it True will sample masks uniformlly
Definition ResampleWithMissingProcessor.h:26
bool duplicate_only_with_missing
flag to indicate whether to duplicate only rows with missing values
Definition ResampleWithMissingProcessor.h:16
bool use_shuffle
if not sampling uniformlly, If true will use shuffle (to speed up runtime)
Definition ResampleWithMissingProcessor.h:29
bool verbose
print verbose
Definition ResampleWithMissingProcessor.h:32
vector< string > removed_tags
blacklist of tags to skip
Definition ResampleWithMissingProcessor.h:14
float uniform_rand_p
the p for uniform rand
Definition ResampleWithMissingProcessor.h:27
int add_new_data
how many new data data points to add for train according to sample masks
Definition ResampleWithMissingProcessor.h:21
int limit_mask_size
if set will limit mask size in the train - maximal number of missing values
Definition ResampleWithMissingProcessor.h:23
float missing_value
missing value
Definition ResampleWithMissingProcessor.h:15
string select_learn_matrix(const vector< string > &matrix_tags) const
Will be called before learn to create new version for the matrix if needed - in parallel of existing ...
Definition ResampleWithMissingProcessor.cpp:74
bool sample_masks_with_repeats
Whether or not to sample masks with repeats.
Definition ResampleWithMissingProcessor.h:24