Medial Code Documentation
Loading...
Searching...
No Matches
Public Member Functions | Data Fields
FeatureImputer Class Reference

Feature Imputer to complete missing values. More...

#include <FeatureProcess.h>

Inheritance diagram for FeatureImputer:
FeatureProcessor SerializableObject

Public Member Functions

 FeatureImputer (const string &feature_name)
 
 FeatureImputer (const string &feature_name, string init_string)
 
void addStrata (string &init_string)
 
void addStrata (featureStrata &strata)
 
void addStrata (string &name, float resolution, float min, float max)
 
int init (map< string, string > &mapper)
 The parsed fields from init command.
 
void init_defaults ()
 
imputeMomentTypes getMomentType (string &entry)
 
void update_req_features_vec (unordered_set< string > &out_req_features, unordered_set< string > &in_req_features)
 update sets of required as input according to set required as output to processor
 
virtual void copy (FeatureProcessor *processor)
 
int Learn (MedFeatures &features, unordered_set< int > &ids)
 
int _apply (MedFeatures &features, unordered_set< int > &ids, bool learning)
 
void check_stratas_name (MedFeatures &features, map< string, string > &strata_name_conversion)
 
 ADD_SERIALIZATION_FUNCS (processor_type, feature_name, resolved_feature_name, missing_value, imputerStrata, moment_type, moments, histograms, strata_sizes, default_moment, default_histogram, moment_type_vec, moments_vec, default_moment_vec, leave_missing_for_small_stratas, impute_strata_with_missing, round_to_existing_value, existing_values) void dprint(const string &pref
 
void print ()
 debug and print
 
- Public Member Functions inherited from FeatureProcessor
virtual string select_learn_matrix (const vector< string > &matrix_tags) const
 Will be called before learn to create new version for the matrix if needed - in parallel of existing matrix.
 
virtual void clear ()
 
void init_defaults ()
 
virtual void set_feature_name (const string &feature_name)
 
virtual string get_feature_name ()
 
virtual void get_feature_names (vector< string > &feature_names)
 
int learn (MedFeatures &features)
 PostProcess of MedFeatures - on all ids.
 
int learn (MedFeatures &features, unordered_set< int > &ids)
 
virtual int _apply (MedFeatures &features, unordered_set< int > &ids)
 
virtual int _conditional_apply (MedFeatures &features, unordered_set< int > &ids, unordered_set< string > &req_features, bool learning)
 
int apply (MedFeatures &features, bool learning)
 PostProcess of MedFeatures - on all or a subset of the ids calls virtaul function "_apply/_conditional_apply" for the specific implementation.
 
int apply (MedFeatures &features, unordered_set< string > &req_features, bool learning)
 
int apply (MedFeatures &features, unordered_set< int > &ids, bool learning)
 
int apply (MedFeatures &features, unordered_set< int > &ids, unordered_set< string > &req_features, bool learning)
 
int apply (MedFeatures &features)
 
int apply (MedFeatures &features, unordered_set< string > &req_features)
 
int apply (MedFeatures &features, unordered_set< int > &ids)
 
int apply (MedFeatures &features, unordered_set< int > &ids, unordered_set< string > &req_features)
 
virtual int init (void *processor_params)
 
virtual int filter (unordered_set< string > &features)
 Filter according to a subset of features.
 
string resolve_feature_name (MedFeatures &features, string substr)
 Utility : get corresponding name in MedFeatures.
 
virtual bool are_features_affected (unordered_set< string > &out_req_features)
 check if a set of features is affected by the current processor
 
virtual bool is_selector ()
 allows testing if this feature processor is a selector
 
void * new_polymorphic (string derived_class_name)
 for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived class given its type one needs to implement this function to return a new to the derived class given its type (as in my_type)
 
size_t get_processor_size ()
 
size_t processor_serialize (unsigned char *blob)
 
virtual void dprint (const string &pref, int rp_flag)
 
- Public Member Functions inherited from SerializableObject
virtual int version () const
 Relevant for serializations.
 
virtual string my_class_name () const
 For better handling of serializations it is highly recommended that each SerializableObject inheriting class will implement the next method.
 
virtual void serialized_fields_name (vector< string > &field_names) const
 The names of the serialized fields.
 
virtual void pre_serialization ()
 
virtual void post_deserialization ()
 
virtual size_t get_size ()
 Gets bytes sizes for serializations.
 
virtual size_t serialize (unsigned char *blob)
 Serialiazing object to blob memory. return number ob bytes wrote to memory.
 
virtual size_t deserialize (unsigned char *blob)
 Deserialiazing blob to object. returns number of bytes read.
 
size_t serialize_vec (vector< unsigned char > &blob)
 
size_t deserialize_vec (vector< unsigned char > &blob)
 
virtual size_t serialize (vector< unsigned char > &blob)
 
virtual size_t deserialize (vector< unsigned char > &blob)
 
virtual int read_from_file (const string &fname)
 read and deserialize model
 
virtual int write_to_file (const string &fname)
 serialize model and write to file
 
virtual int read_from_file_unsafe (const string &fname)
 read and deserialize model without checking version number - unsafe read
 
int init_from_string (string init_string)
 Init from string.
 
int init_params_from_file (string init_file)
 
int init_param_from_file (string file_str, string &param)
 
int update_from_string (const string &init_string)
 
virtual int update (map< string, string > &map)
 Virtual to update object from parsed fields.
 
virtual string object_json () const
 

Data Fields

float missing_value = MED_MAT_MISSING_VALUE
 
bool verbose = true
 If true will print how many missing value were in each feature.
 
bool verbose_learn = false
 If true will call print after learn.
 
featureSetStrata imputerStrata
 
int min_samples = 50
 
int leave_missing_for_small_stratas = 0
 
int impute_strata_with_missing = 0
 
vector< imputeMomentTypes > moment_type_vec
 
vector< float > default_moment_vec
 
vector< vector< float > > moments_vec
 
imputeMomentTypes moment_type = IMPUTE_MMNT_MEAN
 
float default_moment
 
vector< float > moments
 
vector< pair< float, float > > default_histogram
 
vector< vector< pair< float, float > > > histograms
 
vector< int > strata_sizes
 
int max_samples = 100000
 Utility : maximum number of samples to take for moments calculations.
 
bool round_to_existing_value = true
 
vector< float > existing_values
 
int fp_flag
 
- Data Fields inherited from FeatureProcessor
string feature_name = "unset_feature_name"
 Feature name ( + name as appears in MedFeatures) ;.
 
string resolved_feature_name
 
FeatureProcessorTypes processor_type = FTR_PROCESS_LAST
 
int learn_nthreads
 
int clean_nthreads
 

Additional Inherited Members

- Static Public Member Functions inherited from FeatureProcessor
static FeatureProcessormake_processor (string processor_name)
 
static FeatureProcessormake_processor (FeatureProcessorTypes type)
 
static FeatureProcessormake_processor (string processor_name, string params)
 
static FeatureProcessormake_processor (FeatureProcessorTypes type, string params)
 

Detailed Description

Feature Imputer to complete missing values.

To Use this selector specify "imputer" in the fp_type

Member Function Documentation

◆ _apply()

int FeatureImputer::_apply ( MedFeatures features,
unordered_set< int > &  ids,
bool  learning 
)
virtual

Reimplemented from FeatureProcessor.

◆ copy()

virtual void FeatureImputer::copy ( FeatureProcessor processor)
inlinevirtual

Reimplemented from FeatureProcessor.

◆ init()

int FeatureImputer::init ( map< string, string > &  mapper)
virtual

The parsed fields from init command.

if (field == "name") feature_name = entry.second;
else if (field == "min_samples") min_samples = med_stoi(entry.second);
else if (field == "moment_type") {
moment_type_vec.resize(2);
moment_type_vec[0] = getMomentType(entry.second);
moment_type_vec[1] = moment_type_vec[0];
}
else if (field == "learn_moment_type") {
moment_type_vec.resize(2, IMPUTE_MMNT_MEAN);
moment_type_vec[0] = getMomentType(entry.second);
}
else if (field == "apply_moment_type") {
moment_type_vec.resize(2, IMPUTE_MMNT_MEAN);
moment_type_vec[1] = getMomentType(entry.second);
}
else if (field == "max_samples") max_samples = med_stoi(entry.second);
else if (field == "strata") {
boost::split(strata, entry.second, boost::is_any_of(":"));
for (string& stratum : strata) addStrata(stratum);
}
else if (field == "verbose")
verbose = stoi(entry.second) > 0;
else if (field == "verbose_learn")
verbose_learn = stoi(entry.second) > 0;
else if (field == "round_to_existing_value")
round_to_existing_value = stoi(entry.second) > 0;
else if (field == "leave_missing_for_small_stratas") leave_missing_for_small_stratas = med_stoi(entry.second);
else if (field == "impute_strata_with_missing") impute_strata_with_missing = med_stoi(entry.second);
else if (field != "names" && field != "fp_type" && field != "tag")
MLOG("Unknown parameter \'%s\' for FeatureImputer\n", field.c_str());
#define MLOG(fmt,...)
MLOG() - use LOCAL_SECTION and LOCAL_LEVEL.
Definition Logger.h:145
bool verbose
If true will print how many missing value were in each feature.
Definition FeatureProcess.h:455
bool verbose_learn
If true will call print after learn.
Definition FeatureProcess.h:456
int max_samples
Utility : maximum number of samples to take for moments calculations.
Definition FeatureProcess.h:487
string feature_name
Feature name ( + name as appears in MedFeatures) ;.
Definition FeatureProcess.h:55

[FeatureImputer::init]

[FeatureImputer::init]

Reimplemented from FeatureProcessor.

◆ Learn()

int FeatureImputer::Learn ( MedFeatures features,
unordered_set< int > &  ids 
)
virtual

Reimplemented from FeatureProcessor.

◆ update_req_features_vec()

void FeatureImputer::update_req_features_vec ( unordered_set< string > &  out_req_features,
unordered_set< string > &  in_req_features 
)
virtual

update sets of required as input according to set required as output to processor

Reimplemented from FeatureProcessor.


The documentation for this class was generated from the following files: