Medial Code Documentation
Loading...
Searching...
No Matches
ResampleWithMissingProcessor.h
1#ifndef TRAIN_WITH_MISSING_PROCESSOR_H__
2#define TRAIN_WITH_MISSING_PROCESSOR_H__
3
4#include "FeatureProcess.h"
5
12public:
13 vector<string> selected_tags;
14 vector<string> removed_tags;
17
18 //grouping of imputattion - for example handle imputations by signal (or other groups):
19 string grouping;
20
22 //Sampling options - general parameters:
25 //Sampling random
28 //When Sampling not random uniform_rand is false
30
32 bool verbose;
33
34 string select_learn_matrix(const vector<string> &matrix_tags) const;
35
36 ResampleMissingProcessor() : FeatureProcessor() { init_defaults(); }
37 // Copy
38 virtual void copy(FeatureProcessor *processor) { *this = *(dynamic_cast<ResampleMissingProcessor *>(processor)); }
39
42 int init(map<string, string>& mapper);
43 void init_defaults();
44
45 //print function
46 void dprint(const string &pref, int fp_flag);
47
48 // Apply do nothing in apply
49 int _apply(MedFeatures& features, unordered_set<int>& ids) { return 0; }
50 int Learn(MedFeatures& features, unordered_set<int>& ids);
51
52 // Serialization
53 ADD_CLASS_NAME(ResampleMissingProcessor)
57private:
58 vector<vector<int>> group2Inds;
59 vector<string> groupNames;
60};
61
63
64#endif // !TRAIN_WITH_MISSING_PROCESSOR_H__
65
A virtual class of processes on MedFeatures; E.g.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
Definition FeatureProcess.h:51
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
ResampleMissingProcessor: Add missing values to the train matrix for the train process.
Definition ResampleWithMissingProcessor.h:11
string grouping
grouping file or "BY_SIGNAL" keyword to group by signal or "BY_SIGNAL_CATEG" - for category signal to...
Definition ResampleWithMissingProcessor.h:19
vector< string > selected_tags
the selected tags to activate on
Definition ResampleWithMissingProcessor.h:13
int subsample_train
if not zero will use this to subsample original train sampels to this number
Definition ResampleWithMissingProcessor.h:31
int init(map< string, string > &mapper)
The parsed fields from init command.
Definition ResampleWithMissingProcessor.cpp:22
bool uniform_rand
it True will sample masks uniformlly
Definition ResampleWithMissingProcessor.h:26
bool duplicate_only_with_missing
flag to indicate whether to duplicate only rows with missing values
Definition ResampleWithMissingProcessor.h:16
bool use_shuffle
if not sampling uniformlly, If true will use shuffle (to speed up runtime)
Definition ResampleWithMissingProcessor.h:29
bool verbose
print verbose
Definition ResampleWithMissingProcessor.h:32
vector< string > removed_tags
blacklist of tags to skip
Definition ResampleWithMissingProcessor.h:14
float uniform_rand_p
the p for uniform rand
Definition ResampleWithMissingProcessor.h:27
int add_new_data
how many new data data points to add for train according to sample masks
Definition ResampleWithMissingProcessor.h:21
int limit_mask_size
if set will limit mask size in the train - maximal number of missing values
Definition ResampleWithMissingProcessor.h:23
float missing_value
missing value
Definition ResampleWithMissingProcessor.h:15
string select_learn_matrix(const vector< string > &matrix_tags) const
Will be called before learn to create new version for the matrix if needed - in parallel of existing ...
Definition ResampleWithMissingProcessor.cpp:74
bool sample_masks_with_repeats
Whether or not to sample masks with repeats.
Definition ResampleWithMissingProcessor.h:24