ResampleMissingProcessor: Add missing values to the train matrix for the train process. More...

#include <ResampleWithMissingProcessor.h>

Inheritance diagram for ResampleMissingProcessor:

Public Member Functions
string	select_learn_matrix (const vector< string > &matrix_tags) const
	Will be called before learn to create new version for the matrix if needed - in parallel of existing matrix.

virtual void	copy (FeatureProcessor *processor)

int	init (map< string, string > &mapper)
	The parsed fields from init command.

void	init_defaults ()

void	dprint (const string &pref, int fp_flag)

int	_apply (MedFeatures &features, unordered_set< int > &ids)

int	Learn (MedFeatures &features, unordered_set< int > &ids)

Public Member Functions inherited from FeatureProcessor
virtual void	clear ()

void	init_defaults ()

virtual void	set_feature_name (const string &feature_name)

virtual string	get_feature_name ()

virtual void	get_feature_names (vector< string > &feature_names)

int	learn (MedFeatures &features)
	PostProcess of MedFeatures - on all ids.

int	learn (MedFeatures &features, unordered_set< int > &ids)

virtual int	_apply (MedFeatures &features, unordered_set< int > &ids, bool learning)

virtual int	_conditional_apply (MedFeatures &features, unordered_set< int > &ids, unordered_set< string > &req_features, bool learning)

int	apply (MedFeatures &features, bool learning)
	PostProcess of MedFeatures - on all or a subset of the ids calls virtaul function "_apply/_conditional_apply" for the specific implementation.

int	apply (MedFeatures &features, unordered_set< string > &req_features, bool learning)

int	apply (MedFeatures &features, unordered_set< int > &ids, bool learning)

int	apply (MedFeatures &features, unordered_set< int > &ids, unordered_set< string > &req_features, bool learning)

int	apply (MedFeatures &features)

int	apply (MedFeatures &features, unordered_set< string > &req_features)

int	apply (MedFeatures &features, unordered_set< int > &ids)

int	apply (MedFeatures &features, unordered_set< int > &ids, unordered_set< string > &req_features)

virtual int	init (void *processor_params)

virtual int	filter (unordered_set< string > &features)
	Filter according to a subset of features.

string	resolve_feature_name (MedFeatures &features, string substr)
	Utility : get corresponding name in MedFeatures.

virtual bool	are_features_affected (unordered_set< string > &out_req_features)
	check if a set of features is affected by the current processor

virtual void	update_req_features_vec (unordered_set< string > &out_req_features, unordered_set< string > &in_req_features)
	update sets of required as input according to set required as output to processor Empty sets = require everything.

virtual bool	is_selector ()
	allows testing if this feature processor is a selector

void *	new_polymorphic (string derived_class_name)
	for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived class given its type one needs to implement this function to return a new to the derived class given its type (as in my_type)

size_t	get_processor_size ()

size_t	processor_serialize (unsigned char *blob)

Public Member Functions inherited from SerializableObject
virtual int	version () const
	Relevant for serializations.

virtual string	my_class_name () const
	For better handling of serializations it is highly recommended that each SerializableObject inheriting class will implement the next method.

virtual void	serialized_fields_name (vector< string > &field_names) const
	The names of the serialized fields.

virtual void	pre_serialization ()

virtual void	post_deserialization ()

virtual size_t	get_size ()
	Gets bytes sizes for serializations.

virtual size_t	serialize (unsigned char *blob)
	Serialiazing object to blob memory. return number ob bytes wrote to memory.

virtual size_t	deserialize (unsigned char *blob)
	Deserialiazing blob to object. returns number of bytes read.

size_t	serialize_vec (vector< unsigned char > &blob)

size_t	deserialize_vec (vector< unsigned char > &blob)

virtual size_t	serialize (vector< unsigned char > &blob)

virtual size_t	deserialize (vector< unsigned char > &blob)

virtual int	read_from_file (const string &fname)
	read and deserialize model

virtual int	write_to_file (const string &fname)
	serialize model and write to file

virtual int	read_from_file_unsafe (const string &fname)
	read and deserialize model without checking version number - unsafe read

int	init_from_string (string init_string)
	Init from string.

int	init_params_from_file (string init_file)

int	init_param_from_file (string file_str, string &param)

int	update_from_string (const string &init_string)

virtual int	update (map< string, string > &map)
	Virtual to update object from parsed fields.

virtual string	object_json () const

Data Fields
vector< string >	selected_tags
	the selected tags to activate on

vector< string >	removed_tags
	blacklist of tags to skip

float	missing_value
	missing value

bool	duplicate_only_with_missing
	flag to indicate whether to duplicate only rows with missing values

string	grouping
	grouping file or "BY_SIGNAL" keyword to group by signal or "BY_SIGNAL_CATEG" - for category signal to split by values (aggreagates time windows) or "BY_SIGNAL_CATEG_TREND" - also splitby TRENDS

int	add_new_data
	how many new data data points to add for train according to sample masks

int	limit_mask_size
	if set will limit mask size in the train - maximal number of missing values

bool	sample_masks_with_repeats
	Whether or not to sample masks with repeats.

bool	uniform_rand
	it True will sample masks uniformlly

float	uniform_rand_p
	the p for uniform rand

bool	use_shuffle
	if not sampling uniformlly, If true will use shuffle (to speed up runtime)

int	subsample_train
	if not zero will use this to subsample original train sampels to this number

bool	verbose
	print verbose

ADD_SERIALIZATION_FUNCS(processor_type, selected_tags, removed_tags, missing_value, add_new_data, sample_masks_with_repeats, uniform_rand, uniform_rand_p, use_shuffle, subsample_train, limit_mask_size, grouping, groupNames, group2Inds, verbose, duplicate_only_with_missing) private vector< string >	groupNames

Data Fields inherited from FeatureProcessor
string	feature_name = "unset_feature_name"
	Feature name ( + name as appears in MedFeatures) ;.

string	resolved_feature_name

FeatureProcessorTypes	processor_type = FTR_PROCESS_LAST

int	learn_nthreads

int	clean_nthreads

Additional Inherited Members
Static Public Member Functions inherited from FeatureProcessor
static FeatureProcessor *	make_processor (string processor_name)

static FeatureProcessor *	make_processor (FeatureProcessorTypes type)

static FeatureProcessor *	make_processor (string processor_name, string params)

static FeatureProcessor *	make_processor (FeatureProcessorTypes type, string params)

Detailed Description

ResampleMissingProcessor: Add missing values to the train matrix for the train process.

Should be first feature_processor before imputations/normalization if exists.

Member Function Documentation

◆ _apply()

int ResampleMissingProcessor::_apply	(	MedFeatures &	features,
		unordered_set< int > &	ids
	)

inlinevirtual

Reimplemented from FeatureProcessor.

◆ copy()

virtual void ResampleMissingProcessor::copy ( FeatureProcessor * processor )

inlinevirtual

Reimplemented from FeatureProcessor.

◆ dprint()

void ResampleMissingProcessor::dprint	(	const string &	pref,
		int	fp_flag
	)

virtual

Reimplemented from FeatureProcessor.

◆ init()

int ResampleMissingProcessor::init ( map< string, string > & mapper )

virtual

The parsed fields from init command.

[ResampleMissingProcessor::init]

Reimplemented from FeatureProcessor.

◆ Learn()

int ResampleMissingProcessor::Learn	(	MedFeatures &	features,
		unordered_set< int > &	ids
	)

virtual

Reimplemented from FeatureProcessor.

◆ select_learn_matrix()

string ResampleMissingProcessor::select_learn_matrix ( const vector< string > & matrix_tags ) const

virtual

Will be called before learn to create new version for the matrix if needed - in parallel of existing matrix.

Reimplemented from FeatureProcessor.

The documentation for this class was generated from the following files:

Internal/MedProcessTools/MedProcessTools/ResampleWithMissingProcessor.h
Internal/MedProcessTools/MedProcessTools/ResampleWithMissingProcessor.cpp

Public Member Functions

Data Fields

Additional Inherited Members

Detailed Description

Member Function Documentation

◆ _apply()

◆ copy()

◆ dprint()

◆ init()

◆ Learn()

◆ select_learn_matrix()