|
int | init (map< string, string > &map) |
| extra param for use when debugging
|
|
virtual int | version () const |
| Relevant for serializations.
|
|
virtual string | my_class_name () const |
| For better handling of serializations it is highly recommended that each SerializableObject inheriting class will implement the next method.
|
|
virtual void | serialized_fields_name (vector< string > &field_names) const |
| The names of the serialized fields.
|
|
virtual void * | new_polymorphic (string derived_name) |
| for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived class given its type one needs to implement this function to return a new to the derived class given its type (as in my_type)
|
|
virtual void | pre_serialization () |
|
virtual void | post_deserialization () |
|
virtual size_t | get_size () |
| Gets bytes sizes for serializations.
|
|
virtual size_t | serialize (unsigned char *blob) |
| Serialiazing object to blob memory. return number ob bytes wrote to memory.
|
|
virtual size_t | deserialize (unsigned char *blob) |
| Deserialiazing blob to object. returns number of bytes read.
|
|
size_t | serialize_vec (vector< unsigned char > &blob) |
|
size_t | deserialize_vec (vector< unsigned char > &blob) |
|
virtual size_t | serialize (vector< unsigned char > &blob) |
|
virtual size_t | deserialize (vector< unsigned char > &blob) |
|
virtual int | read_from_file (const string &fname) |
| read and deserialize model
|
|
virtual int | write_to_file (const string &fname) |
| serialize model and write to file
|
|
virtual int | read_from_file_unsafe (const string &fname) |
| read and deserialize model without checking version number - unsafe read
|
|
int | init_from_string (string init_string) |
| Init from string.
|
|
int | init_params_from_file (string init_file) |
|
int | init_param_from_file (string file_str, string ¶m) |
|
int | update_from_string (const string &init_string) |
|
virtual int | update (map< string, string > &map) |
| Virtual to update object from parsed fields.
|
|
virtual string | object_json () const |
|
|
string | init_string = "" |
|
string | samples_time_unit = "Date" |
| sometimes it helps to keep it for debugging
|
|
int | samples_time_unit_i |
|
int | ncateg = 2 |
| calculated upon init
|
|
string | time_slice_unit = "Days" |
| number of categories (1 for regression)
|
|
int | time_slice_unit_i |
|
int | time_slice_size = -1 |
| calculated upon init
|
|
int | n_time_slices = 1 |
| the size of the basic time slice, -1: is like infinity: a single time slice like a regular QRF
|
|
vector< int > | time_slices = {} |
| if time_slices vector is not given, one will be created using time_slice_size and this parameter.
|
|
vector< float > | time_slices_wgts ={} |
| if not empty: defines the borders of all the time lines. Enables a very flexible time slicing strategy
|
|
int | censor_cases = 0 |
| default is all 1.0 , but can be assigned by the user, will be used to weight the scores from different time windows
|
|
int | max_q = 200 |
| when calclating the time slices distributions we have an option to NOT count the preciding 0's of non 0 cases.
|
|
string | tree_type = "" |
| maximal quantization
|
|
int | tree_type_i = -1 |
| options: regression, entropy, logrank
|
|
int | ntrees = 50 |
| tree type code : calulated from tree_type the string
|
|
int | max_depth = 100 |
| number of trees to learn
|
|
int | min_node_last_slice = 10 |
| maximal depth of tree
|
|
int | min_node = 10 |
| stopping criteria : minimal number of samples in a node in the last time slice
|
|
float | random_split_prob = 0 |
| stopping criteria : minimal number of samples in a node in the first time slice
|
|
int | ntry = -1 |
| at this probability we will split a node in a random manner, in order to add noise to the tree.
|
|
float | ntry_prob = (float)0.1 |
| -1: use the ntry_prob rule, > 0 : choose this number of features.
|
|
int | nsplits = -1 |
| choose ntry_prob * nfeat features each time
|
|
int | max_node_test_samples = 50000 |
| -1: check all splits for each feature , then split the max, > 0: choose this number of split points at random and choose best
|
|
int | single_sample_per_pid = 1 |
| when a node is bigger than this number : choose this number of random samples to make decisions
|
|
int | bag_with_repeats = 1 |
| when bagging select a single sample per pid (which in itself can be repeated)
|
|
float | bag_prob = (float)0.5 |
| weather to bag with repeats or not
|
|
float | bag_ratio = -1 |
| random choice of samples for each tree prob
|
|
float | bag_feat = (float)1.0 |
| control ratio of #0 : #NonZero of labels, if < -1 , leave as is.
|
|
int | qpoints_per_split = 0 |
| proportion of random features chosen for each tree
|
|
int | nvals_for_categorial = 0 |
| if > 0 : will only choose this random number of points to test split points at, otherwise will test all of them
|
|
vector< string > | categorial_str |
| features with number of different values below nvals_for_categ will be assumed categorial
|
|
vector< string > | categorial_tags |
| all features containing one of the strings defined here in their name will be assumed categorial
|
|
float | missing_val = MED_MAT_MISSING_VALUE |
| all features containing these tags will be assumed categorial
|
|
string | missing_method_str = "median" |
| missing value
|
|
int | missing_method = -1 |
| how to handle missing values: median , left, right, mean, rand
|
|
int | test_for_inf = 1 |
| to be initialized from missing_method_str
|
|
int | test_for_missing = 0 |
| will fail on non finite values in input data
|
|
int | only_this_categ = -1 |
| will fail if missing value found in data
|
|
int | predict_from_slice = -1 |
| relavant only to categorial predictions: -1: give all categs, 0 and above: give only those categs remember that currently 0 is a special category in TQRF : the control category (nothing happens, healthy, etc...)
|
|
int | predict_to_slice = -1 |
| will give predictions for slices [predict_from_slice,...,predict_to_slice]. if negative: all slices.
|
|
int | predict_sum_times = 0 |
|
float | case_wgt = 1 |
| will sum predictions over different times
|
|
int | nrounds = 1 |
| the weight to use for cases with y!=0 in a weighted case
|
|
float | min_p = (float)0.01 |
| a single round means simply running TQRF as defined with no boosting applied
|
|
float | max_p = (float)0.99 |
| minimal probability to trim to when recalculating weights
|
|
float | alpha = 1 |
| maximal probability to trip to when recalculating weights
|
|
float | wgts_pow = 2 |
| shrinkage factor
|
|
float | tuning_size = 0 |
| power for the pow(-log(p), wgts_pow) used for adaboost weights
|
|
int | tune_max_depth = 0 |
| size of group to tune tree weights by.
|
|
int | tune_min_node_size = 0 |
| max depth of a node to get a weight for. 0 means 1 weight per tree.
|
|
float | gd_rate = (float)0.01 |
| min node size for a node to have a weight
|
|
int | gd_batch = 1000 |
| gradient descent step size
|
|
float | gd_momentum = (float)0.95 |
| gradient descent batch size
|
|
float | gd_lambda = 0 |
| gradient descent momentum
|
|
int | gd_epochs = 0 |
| regularization
|
|
int | verbosity = 0 |
| 0 : stop automatically , Otherwise: do this number of epochs
|
|
int | ids_to_print = 30 |
| for debug prints
|
|
int | debug = 0 |
| control debug prints in certain places
|
|
vector< double > | log_table |
|