|
string | select_learn_matrix (const vector< string > &matrix_tags) const |
| Will be called before learn to create new version for the matrix if needed - in parallel of existing matrix.
|
|
virtual void | copy (FeatureProcessor *processor) |
|
int | init (map< string, string > &mapper) |
| The parsed fields from init command.
|
|
void | init_defaults () |
|
void | dprint (const string &pref, int fp_flag) |
|
int | _apply (MedFeatures &features, unordered_set< int > &ids) |
|
int | Learn (MedFeatures &features, unordered_set< int > &ids) |
|
virtual void | clear () |
|
void | init_defaults () |
|
virtual void | set_feature_name (const string &feature_name) |
|
virtual string | get_feature_name () |
|
virtual void | get_feature_names (vector< string > &feature_names) |
|
int | learn (MedFeatures &features) |
| PostProcess of MedFeatures - on all ids.
|
|
int | learn (MedFeatures &features, unordered_set< int > &ids) |
|
virtual int | _apply (MedFeatures &features, unordered_set< int > &ids, bool learning) |
|
virtual int | _conditional_apply (MedFeatures &features, unordered_set< int > &ids, unordered_set< string > &req_features, bool learning) |
|
int | apply (MedFeatures &features, bool learning) |
| PostProcess of MedFeatures - on all or a subset of the ids calls virtaul function "_apply/_conditional_apply" for the specific implementation.
|
|
int | apply (MedFeatures &features, unordered_set< string > &req_features, bool learning) |
|
int | apply (MedFeatures &features, unordered_set< int > &ids, bool learning) |
|
int | apply (MedFeatures &features, unordered_set< int > &ids, unordered_set< string > &req_features, bool learning) |
|
int | apply (MedFeatures &features) |
|
int | apply (MedFeatures &features, unordered_set< string > &req_features) |
|
int | apply (MedFeatures &features, unordered_set< int > &ids) |
|
int | apply (MedFeatures &features, unordered_set< int > &ids, unordered_set< string > &req_features) |
|
virtual int | init (void *processor_params) |
|
virtual int | filter (unordered_set< string > &features) |
| Filter according to a subset of features.
|
|
string | resolve_feature_name (MedFeatures &features, string substr) |
| Utility : get corresponding name in MedFeatures.
|
|
virtual bool | are_features_affected (unordered_set< string > &out_req_features) |
| check if a set of features is affected by the current processor
|
|
virtual void | update_req_features_vec (unordered_set< string > &out_req_features, unordered_set< string > &in_req_features) |
| update sets of required as input according to set required as output to processor Empty sets = require everything.
|
|
virtual bool | is_selector () |
| allows testing if this feature processor is a selector
|
|
void * | new_polymorphic (string derived_class_name) |
| for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived class given its type one needs to implement this function to return a new to the derived class given its type (as in my_type)
|
|
size_t | get_processor_size () |
|
size_t | processor_serialize (unsigned char *blob) |
|
virtual int | version () const |
| Relevant for serializations.
|
|
virtual string | my_class_name () const |
| For better handling of serializations it is highly recommended that each SerializableObject inheriting class will implement the next method.
|
|
virtual void | serialized_fields_name (vector< string > &field_names) const |
| The names of the serialized fields.
|
|
virtual void | pre_serialization () |
|
virtual void | post_deserialization () |
|
virtual size_t | get_size () |
| Gets bytes sizes for serializations.
|
|
virtual size_t | serialize (unsigned char *blob) |
| Serialiazing object to blob memory. return number ob bytes wrote to memory.
|
|
virtual size_t | deserialize (unsigned char *blob) |
| Deserialiazing blob to object. returns number of bytes read.
|
|
size_t | serialize_vec (vector< unsigned char > &blob) |
|
size_t | deserialize_vec (vector< unsigned char > &blob) |
|
virtual size_t | serialize (vector< unsigned char > &blob) |
|
virtual size_t | deserialize (vector< unsigned char > &blob) |
|
virtual int | read_from_file (const string &fname) |
| read and deserialize model
|
|
virtual int | write_to_file (const string &fname) |
| serialize model and write to file
|
|
virtual int | read_from_file_unsafe (const string &fname) |
| read and deserialize model without checking version number - unsafe read
|
|
int | init_from_string (string init_string) |
| Init from string.
|
|
int | init_params_from_file (string init_file) |
|
int | init_param_from_file (string file_str, string ¶m) |
|
int | update_from_string (const string &init_string) |
|
virtual int | update (map< string, string > &map) |
| Virtual to update object from parsed fields.
|
|
virtual string | object_json () const |
|
|
vector< string > | selected_tags |
| the selected tags to activate on
|
|
vector< string > | removed_tags |
| blacklist of tags to skip
|
|
float | missing_value |
| missing value
|
|
bool | duplicate_only_with_missing |
| flag to indicate whether to duplicate only rows with missing values
|
|
string | grouping |
| grouping file or "BY_SIGNAL" keyword to group by signal or "BY_SIGNAL_CATEG" - for category signal to split by values (aggreagates time windows) or "BY_SIGNAL_CATEG_TREND" - also splitby TRENDS
|
|
int | add_new_data |
| how many new data data points to add for train according to sample masks
|
|
int | limit_mask_size |
| if set will limit mask size in the train - maximal number of missing values
|
|
bool | sample_masks_with_repeats |
| Whether or not to sample masks with repeats.
|
|
bool | uniform_rand |
| it True will sample masks uniformlly
|
|
float | uniform_rand_p |
| the p for uniform rand
|
|
bool | use_shuffle |
| if not sampling uniformlly, If true will use shuffle (to speed up runtime)
|
|
int | subsample_train |
| if not zero will use this to subsample original train sampels to this number
|
|
bool | verbose |
| print verbose
|
|
ADD_SERIALIZATION_FUNCS(processor_type, selected_tags, removed_tags, missing_value, add_new_data, sample_masks_with_repeats, uniform_rand, uniform_rand_p, use_shuffle, subsample_train, limit_mask_size, grouping, groupNames, group2Inds, verbose, duplicate_only_with_missing) private vector< string > | groupNames |
|
string | feature_name = "unset_feature_name" |
| Feature name ( + name as appears in MedFeatures) ;.
|
|
string | resolved_feature_name |
|
FeatureProcessorTypes | processor_type = FTR_PROCESS_LAST |
|
int | learn_nthreads |
|
int | clean_nthreads |
|
ResampleMissingProcessor: Add missing values to the train matrix for the train process.
Should be first feature_processor before imputations/normalization if exists.