Medial Code Documentation
Loading...
Searching...
No Matches
Public Member Functions | Data Fields
CategoryDependencyGenerator Class Reference

Creates multipal features based on categorical values and statistical dependency strength by Age,Gender groups. More...

#include <FeatureGenerator.h>

Inheritance diagram for CategoryDependencyGenerator:
FeatureGenerator SerializableObject

Public Member Functions

void set_signal_ids (MedSignals &sigs)
 
void init_tables (MedDictionarySections &dict)
 
void init_defaults ()
 
virtual void copy (FeatureGenerator *generator)
 
int init (map< string, string > &mapper)
 The parsed fields from init command.
 
int update (map< string, string > &mapper)
 Virtual to update object from parsed fields.
 
void set_names ()
 
int filter_features (unordered_set< string > &validFeatures)
 summary> prints summary of generator job.
 
int _learn (MedPidRepository &rep, const MedSamples &samples, vector< RepProcessor * > processors)
 
int _generate (PidDynamicRec &rec, MedFeatures &features, int index, int num, vector< float * > &_p_data)
 
int nfeatures ()
 
void get_required_signal_categories (unordered_map< string, vector< string > > &signal_categories_in_use) const
 returns for each used signal it's used categories
 
- Public Member Functions inherited from FeatureGenerator
virtual void prepare (MedFeatures &features, MedPidRepository &rep, MedSamples &samples)
 
virtual void get_p_data (MedFeatures &features, vector< float * > &_p_data)
 
void get_p_data (MedFeatures &features)
 
virtual void clear ()
 
void get_required_signal_names (unordered_set< string > &signalNames)
 
virtual void set_required_signal_ids (MedDictionarySections &dict)
 
void get_required_signal_ids (unordered_set< int > &signalIds)
 
virtual void get_generated_features (unordered_set< string > &names_list)
 
virtual void fit_for_repository (MedPidRepository &rep)
 Prepartion and adjustment for model based on repository.
 
int learn (MedPidRepository &rep, const MedSamples &samples, vector< RepProcessor * > processors)
 
int learn (MedPidRepository &rep, const MedSamples &samples)
 
int _generate (PidDynamicRec &in_rep, MedFeatures &features, int index, int num)
 
int generate (PidDynamicRec &in_rep, MedFeatures &features, int index, int num)
 
int generate (PidDynamicRec &in_rep, MedFeatures &features)
 
int generate (MedPidRepository &rep, int id, MedFeatures &features)
 
int generate (MedPidRepository &rep, int id, MedFeatures &features, int index, int num)
 
virtual int _generate (MedFeatures &features)
 
int generate (MedFeatures &features)
 
virtual int init (void *generator_params)
 
virtual void make_summary ()
 
void * new_polymorphic (string derived_class_name)
 for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived class given its type one needs to implement this function to return a new to the derived class given its type (as in my_type)
 
size_t get_generator_size ()
 
size_t generator_serialize (unsigned char *blob)
 
virtual void print ()
 
virtual void dprint (const string &pref, int fg_flag)
 
- Public Member Functions inherited from SerializableObject
virtual int version () const
 Relevant for serializations.
 
virtual string my_class_name () const
 For better handling of serializations it is highly recommended that each SerializableObject inheriting class will implement the next method.
 
virtual void serialized_fields_name (vector< string > &field_names) const
 The names of the serialized fields.
 
virtual void pre_serialization ()
 
virtual void post_deserialization ()
 
virtual size_t get_size ()
 Gets bytes sizes for serializations.
 
virtual size_t serialize (unsigned char *blob)
 Serialiazing object to blob memory. return number ob bytes wrote to memory.
 
virtual size_t deserialize (unsigned char *blob)
 Deserialiazing blob to object. returns number of bytes read.
 
size_t serialize_vec (vector< unsigned char > &blob)
 
size_t deserialize_vec (vector< unsigned char > &blob)
 
virtual size_t serialize (vector< unsigned char > &blob)
 
virtual size_t deserialize (vector< unsigned char > &blob)
 
virtual int read_from_file (const string &fname)
 read and deserialize model
 
virtual int write_to_file (const string &fname)
 serialize model and write to file
 
virtual int read_from_file_unsafe (const string &fname)
 read and deserialize model without checking version number - unsafe read
 
int init_from_string (string init_string)
 Init from string.
 
int init_params_from_file (string init_file)
 
int init_param_from_file (string file_str, string &param)
 
int update_from_string (const string &init_string)
 
virtual string object_json () const
 

Data Fields

string signalName
 the signal name
 
int signalId
 
int time_channel
 n >= 0 : use time channel n , default: 0.
 
int val_channel
 n >= 0 : use val channel n , default : 0.
 
int win_from
 time window for feature: win_from is the minimal time before from the prediction time
 
int win_to
 time window for feature: win_to is the maximal time before the prediction time

 
int time_unit_win
 the time unit in which the windows are given. Default: Undefined
 
int min_age
 minimal age for testing statistical dependency
 
int max_age
 maximal age for testing statistical dependency
 
int age_bin
 age bin for testing statistical dependency
 
string regex_filter
 regex filter for filtering categories in learn
 
string remove_regex_filter
 remove regex filter for filtering categories in learn
 
int min_code_cnt
 minimal number of occourences to consider signal
 
float fdr
 the FDR value
 
int take_top
 maximal number of features to create
 
float lift_below
 filter lift to keep below it
 
float lift_above
 filter lift to keep above it
 
float filter_child_pval_diff
 below this threshold of pvalue diff change to remove child category (with AND condition on average lift change)
 
float filter_child_lift_ratio
 below this threshold of lift change to remove child category
 
float filter_child_count_ratio
 If child ratio count is too similar, small change from parent code - keep only paretn code.
 
float filter_child_removed_ratio
 If child removed ratio is beyond this and has other child taken - remove parent.
 
category_stat_test stat_metric
 statistical test
 
float chi_square_at_least
 chi_square arg to test for at least that change in lift to measure bigger diffrence
 
int minimal_chi_cnt
 chi_square arg to keep at least count to use row in calc
 
int sort_by_chi = 0
 sort results by chi-square
 
int max_depth
 maximal depth to go in heirarchy
 
int max_parents
 controls maximum parents count
 
bool use_fixed_lift
 If true will also sort be lifts below 1.
 
bool filter_hierarchy
 
bool verbose
 Apply hierarchy filtering.
 
bool verbose_full
 If true will print a lot - table of all stats for each code.
 
string verbose_full_file
 output file for verbose_full debug in learn
 
string feature_prefix
 additional prefix to add to name to describe the feature
 
bool generate_with_counts
 If true will generate feature with counts not just as set.
 
vector< vector< string > > filter_set_by_val_channel
 filter set by value channels. can be initialized by "filter_set_by_val_channel_X":"string_set_for_val_channel_X",
 
vector< string > filter_set_by_val_channel_names
 naming for each set matched filter_set_by_val_channel variable
 
float male_regression_cntrl_lower
 lower limit mask on outcome for controls - important inregression
 
float male_regression_cntrl_upper
 upper limit mask on outcome for controls - important inregression
 
float male_regression_case_lower
 lower limit mask on outcome for cases - important inregression
 
float male_regression_case_upper
 upper limit mask on outcome for cases - important inregression
 
float female_regression_cntrl_lower
 lower limit mask on outcome for controls - important inregression
 
float female_regression_cntrl_upper
 upper limit mask on outcome for controls - important inregression
 
float female_regression_case_lower
 lower limit mask on outcome for cases - important inregression
 
float female_regression_case_upper
 upper limit mask on outcome for cases - important inregression
 
- Data Fields inherited from FeatureGenerator
FeatureGeneratorTypes generator_type = FTR_GEN_LAST
 Type.
 
vector< string > names
 Feature name.
 
int learn_nthreads = 16
 
int pred_nthreads = 16
 
float missing_val = (float)MED_MAT_MISSING_VALUE
 Missing value.
 
vector< string > tags
 Tags - for defining labels or groups. may be used later for filtering for example.
 
int iGenerateWeights = 0
 Feature/Weights generator.
 
vector< float * > p_data
 
vector< string > req_signals
 
vector< int > req_signal_ids
 
int serial_id
 

Additional Inherited Members

- Static Public Member Functions inherited from FeatureGenerator
static FeatureGeneratormake_generator (string name)
 
static FeatureGeneratormake_generator (string name, string params)
 
static FeatureGeneratormake_generator (FeatureGeneratorTypes type)
 
static FeatureGeneratormake_generator (FeatureGeneratorTypes type, string params)
 
static FeatureGeneratorcreate_generator (string &params)
 

Detailed Description

Creates multipal features based on categorical values and statistical dependency strength by Age,Gender groups.

Member Function Documentation

◆ _generate()

int CategoryDependencyGenerator::_generate ( PidDynamicRec rec,
MedFeatures features,
int  index,
int  num,
vector< float * > &  _p_data 
)
virtual

Reimplemented from FeatureGenerator.

◆ _learn()

int CategoryDependencyGenerator::_learn ( MedPidRepository rep,
const MedSamples samples,
vector< RepProcessor * >  processors 
)
virtual

Reimplemented from FeatureGenerator.

◆ copy()

virtual void CategoryDependencyGenerator::copy ( FeatureGenerator generator)
inlinevirtual

Reimplemented from FeatureGenerator.

◆ filter_features()

int CategoryDependencyGenerator::filter_features ( unordered_set< string > &  validFeatures)
virtual

summary> prints summary of generator job.

optional, called after generate. for example - prints how many values were missing value /summary>

Reimplemented from FeatureGenerator.

◆ get_required_signal_categories()

void CategoryDependencyGenerator::get_required_signal_categories ( unordered_map< string, vector< string > > &  signal_categories_in_use) const
virtual

returns for each used signal it's used categories

Reimplemented from FeatureGenerator.

◆ init()

int CategoryDependencyGenerator::init ( map< string, string > &  mapper)
virtual

The parsed fields from init command.

if (it->first == "signal")
signalName = it->second;
else if (it->first == "val_channel")
val_channel = med_stoi(it->second);
else if (it->first == "time_channel")
time_channel = med_stoi(it->second);
else if (it->first == "win_from")
win_from = med_stoi(it->second);
else if (it->first == "win_to")
win_to = med_stoi(it->second);
else if (it->first == "time_unit_win")
time_unit_win = med_time_converter.string_to_type(it->second);
else if (it->first == "regex_filter")
regex_filter = it->second;
else if (it->first == "remove_regex_filter")
remove_regex_filter = it->second;
else if (it->first == "min_age")
min_age = med_stoi(it->second);
else if (it->first == "max_age")
max_age = med_stoi(it->second);
else if (it->first == "age_bin")
age_bin = med_stoi(it->second);
else if (it->first == "min_code_cnt")
min_code_cnt = med_stoi(it->second);
else if (it->first == "fdr")
fdr = med_stof(it->second);
else if (it->first == "take_top")
take_top = med_stoi(it->second);
else if (it->first == "filter_hierarchy")
filter_hierarchy = med_stoi(it->second) > 0;
else if (it->first == "lift_below")
lift_below = med_stof(it->second);
else if (it->first == "lift_above")
lift_above = med_stof(it->second);
else if (it->first == "filter_child_count_ratio")
filter_child_count_ratio = med_stof(it->second);
else if (it->first == "filter_child_lift_ratio")
filter_child_lift_ratio = med_stof(it->second);
else if (it->first == "filter_child_pval_diff")
filter_child_pval_diff = med_stof(it->second);
else if (it->first == "filter_child_removed_ratio")
filter_child_removed_ratio = med_stof(it->second);
else if (it->first == "chi_square_at_least")
chi_square_at_least = med_stof(it->second);
else if (it->first == "sort_by_chi")
sort_by_chi = med_stoi(it->second);
else if (it->first == "minimal_chi_cnt")
minimal_chi_cnt = med_stoi(it->second);
else if (it->first == "use_fixed_lift")
use_fixed_lift = med_stoi(it->second) > 0;
else if (it->first == "verbose")
verbose = med_stoi(it->second) > 0;
else if (it->first == "verbose_full")
verbose_full = med_stoi(it->second) > 0;
else if (it->first == "verbose_full_file")
verbose_full_file = it->second;
else if (it->first == "feature_prefix")
feature_prefix = it->second;
else if (it->first == "stat_metric") {
if (conv_map_stats.find(it->second) != conv_map_stats.end())
stat_metric = category_stat_test(conv_map_stats.at(it->second));
else
MTHROW_AND_ERR("Unknown stat_test \"%s\". options are: %s\n",
it->second.c_str(), medial::io::get_list(conv_map_stats).c_str());
}
else if (it->first == "max_depth")
max_depth = med_stoi(it->second);
else if (it->first == "max_parents")
max_parents = med_stoi(it->second);
else if (it->first == "generate_with_counts")
generate_with_counts = med_stoi(it->second) > 0;
else if (it->first == "regression_cntrl_lower") {
male_regression_cntrl_lower = med_stoi(it->second);
female_regression_cntrl_lower = med_stoi(it->second);
}
else if (it->first == "regression_cntrl_upper") {
male_regression_cntrl_upper = med_stoi(it->second);
female_regression_cntrl_upper = med_stoi(it->second);
}
else if (it->first == "regression_case_lower") {
male_regression_case_lower = med_stoi(it->second);
female_regression_case_lower = med_stoi(it->second);
}
else if (it->first == "regression_case_upper") {
male_regression_case_upper = med_stoi(it->second);
female_regression_case_upper = med_stoi(it->second);
}
else if (it->first == "male_regression_cntrl_lower")
male_regression_cntrl_lower = med_stoi(it->second);
else if (it->first == "male_regression_cntrl_upper")
male_regression_cntrl_upper = med_stoi(it->second);
else if (it->first == "male_regression_case_lower")
male_regression_case_lower = med_stoi(it->second);
else if (it->first == "male_regression_case_upper")
male_regression_case_upper = med_stoi(it->second);
else if (it->first == "female_regression_cntrl_lower")
female_regression_cntrl_lower = med_stoi(it->second);
else if (it->first == "female_regression_cntrl_upper")
female_regression_cntrl_upper = med_stoi(it->second);
else if (it->first == "female_regression_case_lower")
female_regression_case_lower = med_stoi(it->second);
else if (it->first == "female_regression_case_upper")
female_regression_case_upper = med_stoi(it->second);
else if (boost::starts_with(it->first, prefix_str)) {
int val_channel_f = med_stoi(it->first.substr(prefix_str.length()));
if (filter_set_by_val_channel.size() <= val_channel_f)
filter_set_by_val_channel.resize(val_channel_f + 1);
vector<string> &f_v_sets = filter_set_by_val_channel[val_channel_f];
boost::split(f_v_sets, it->second, boost::is_any_of(","));
}
else if (boost::starts_with(it->first, prefix_str_names)) {
int val_channel_f = med_stoi(it->first.substr(prefix_str_names.length()));
if (filter_set_by_val_channel_names.size() <= val_channel_f)
filter_set_by_val_channel_names.resize(val_channel_f + 1);
filter_set_by_val_channel_names[val_channel_f] = it->second;
}
else if (it->first == "fg_type") {}
else if (it->first == "tags") { boost::split(tags, it->second, boost::is_any_of(",")); }
else
MTHROW_AND_ERR("Unknown parameter \'%s\' for CategoryDependencyGenerator\n", it->first.c_str())
bool generate_with_counts
If true will generate feature with counts not just as set.
Definition FeatureGenerator.h:945
float filter_child_lift_ratio
below this threshold of lift change to remove child category
Definition FeatureGenerator.h:930
int max_depth
maximal depth to go in heirarchy
Definition FeatureGenerator.h:937
float male_regression_cntrl_lower
lower limit mask on outcome for controls - important inregression
Definition FeatureGenerator.h:949
string regex_filter
regex filter for filtering categories in learn
Definition FeatureGenerator.h:922
bool use_fixed_lift
If true will also sort be lifts below 1.
Definition FeatureGenerator.h:939
string verbose_full_file
output file for verbose_full debug in learn
Definition FeatureGenerator.h:943
vector< string > filter_set_by_val_channel_names
naming for each set matched filter_set_by_val_channel variable
Definition FeatureGenerator.h:947
category_stat_test stat_metric
statistical test
Definition FeatureGenerator.h:933
float male_regression_cntrl_upper
upper limit mask on outcome for controls - important inregression
Definition FeatureGenerator.h:950
int min_code_cnt
minimal number of occourences to consider signal
Definition FeatureGenerator.h:924
int age_bin
age bin for testing statistical dependency
Definition FeatureGenerator.h:921
float filter_child_pval_diff
below this threshold of pvalue diff change to remove child category (with AND condition on average li...
Definition FeatureGenerator.h:929
string remove_regex_filter
remove regex filter for filtering categories in learn
Definition FeatureGenerator.h:923
string signalName
the signal name
Definition FeatureGenerator.h:912
float female_regression_case_lower
lower limit mask on outcome for cases - important inregression
Definition FeatureGenerator.h:955
float male_regression_case_lower
lower limit mask on outcome for cases - important inregression
Definition FeatureGenerator.h:951
float lift_below
filter lift to keep below it
Definition FeatureGenerator.h:927
string feature_prefix
additional prefix to add to name to describe the feature
Definition FeatureGenerator.h:944
int time_channel
n >= 0 : use time channel n , default: 0.
Definition FeatureGenerator.h:914
int win_to
time window for feature: win_to is the maximal time before the prediction time
Definition FeatureGenerator.h:917
float female_regression_cntrl_lower
lower limit mask on outcome for controls - important inregression
Definition FeatureGenerator.h:953
int minimal_chi_cnt
chi_square arg to keep at least count to use row in calc
Definition FeatureGenerator.h:935
vector< vector< string > > filter_set_by_val_channel
filter set by value channels. can be initialized by "filter_set_by_val_channel_X":"string_set_for_val...
Definition FeatureGenerator.h:946
int max_parents
controls maximum parents count
Definition FeatureGenerator.h:938
float filter_child_removed_ratio
If child removed ratio is beyond this and has other child taken - remove parent.
Definition FeatureGenerator.h:932
float female_regression_case_upper
upper limit mask on outcome for cases - important inregression
Definition FeatureGenerator.h:956
int win_from
time window for feature: win_from is the minimal time before from the prediction time
Definition FeatureGenerator.h:916
int max_age
maximal age for testing statistical dependency
Definition FeatureGenerator.h:920
float male_regression_case_upper
upper limit mask on outcome for cases - important inregression
Definition FeatureGenerator.h:952
int take_top
maximal number of features to create
Definition FeatureGenerator.h:926
float fdr
the FDR value
Definition FeatureGenerator.h:925
bool verbose_full
If true will print a lot - table of all stats for each code.
Definition FeatureGenerator.h:942
bool verbose
Apply hierarchy filtering.
Definition FeatureGenerator.h:941
int min_age
minimal age for testing statistical dependency
Definition FeatureGenerator.h:919
float female_regression_cntrl_upper
upper limit mask on outcome for controls - important inregression
Definition FeatureGenerator.h:954
float lift_above
filter lift to keep above it
Definition FeatureGenerator.h:928
int val_channel
n >= 0 : use val channel n , default : 0.
Definition FeatureGenerator.h:915
float filter_child_count_ratio
If child ratio count is too similar, small change from parent code - keep only paretn code.
Definition FeatureGenerator.h:931
int time_unit_win
the time unit in which the windows are given. Default: Undefined
Definition FeatureGenerator.h:918
float chi_square_at_least
chi_square arg to test for at least that change in lift to measure bigger diffrence
Definition FeatureGenerator.h:934
int sort_by_chi
sort results by chi-square
Definition FeatureGenerator.h:936
vector< string > tags
Tags - for defining labels or groups. may be used later for filtering for example.
Definition FeatureGenerator.h:69
int string_to_type(const string &str)
Convert string to type.
Definition MedTime.cpp:358

[CategoryDependencyGenerator::init]

[CategoryDependencyGenerator::init]

Reimplemented from FeatureGenerator.

◆ init_defaults()

void CategoryDependencyGenerator::init_defaults ( )
virtual

Reimplemented from FeatureGenerator.

◆ init_tables()

void CategoryDependencyGenerator::init_tables ( MedDictionarySections dict)
virtual

Reimplemented from FeatureGenerator.

◆ nfeatures()

int CategoryDependencyGenerator::nfeatures ( )
virtual

Reimplemented from FeatureGenerator.

◆ set_names()

void CategoryDependencyGenerator::set_names ( )
virtual

Reimplemented from FeatureGenerator.

◆ set_signal_ids()

void CategoryDependencyGenerator::set_signal_ids ( MedSignals sigs)
virtual

Reimplemented from FeatureGenerator.

◆ update()

int CategoryDependencyGenerator::update ( map< string, string > &  map)
virtual

Virtual to update object from parsed fields.

Reimplemented from SerializableObject.

Field Documentation

◆ verbose

bool CategoryDependencyGenerator::verbose

Apply hierarchy filtering.

in Learn will print selected features


The documentation for this class was generated from the following files: