|
void | split_feature_to_bins (const BinSettings &setting, vector< float > &feature, const vector< int > &sel_indexes, vector< float > &y) |
| splits feature to bin using setting
|
|
void | normalize_feature_to_uniform (const BinSettings &setting, vector< float > &feature) |
| normalize feature to be between [0-1] but also change the distribution of values to be uniform
|
|
void | compare_populations (const MedFeatures &population1, const MedFeatures &population2, const string &name1, const string &name2, const string &output_file, const string &predictor_type="", const string &predictor_init="", int nfolds=5, int max_learn=0) |
| compares two matrixes populations.
|
|
template<class T > |
void | commit_selection (vector< T > &vec, const vector< int > &idx) |
| commit selection of indexes on vector
|
|
void | filter_row_indexes (MedFeatures &dataMat, vector< int > &selected_indexes, bool op_flag=false) |
| filtering MedFeatures by selected indexes rows
|
|
void | filter_row_indexes_safe (MedFeatures &dataMat, const vector< int > &selected_indexes, bool op_flag=false) |
| filtering MedFeatures by selected indexes rows (thread safe for selected_indexes) no sort of selected_indexes
|
|
void | down_sample (MedFeatures &dataMat, double take_ratio, bool with_repeats=false, vector< int > *selected_indexes=NULL) |
| down sampling with ratio
|
|
double | reweight_by_general (MedFeatures &data_records, const vector< string > &groups, vector< float > &weigths, bool print_verbose) |
| reweighting method by given groups uniq values. return weights and min_factor
|
|
void | match_by_general (MedFeatures &data_records, const vector< string > &groups, vector< int > &filtered_row_ids, float price_ratio, int min_grp_size, bool print_verbose) |
| matching by given groups uniq values. returns also the row_ids filtered
|
|
void | match_by_general (MedFeatures &data_records, const vector< string > &groups, vector< int > &filtered_row_ids, float price_ratio, float max_ratio, int min_grp_size, bool print_verbose) |
| matching by given groups uniq values. returns also the row_ids filtered. max_ratio is maximal allowed ratio, inf if < 0.
|
|
void | split_matrix (const MedFeatures &matrix, vector< int > &folds, int iFold, MedFeatures &trainMatrix, MedFeatures &testMatrix, const vector< string > *selected_features=NULL) |
| split matrix to train test based on iFold value. folds is fold id for each sample
|
|
void | split_matrix (const MedFeatures &matrix, unordered_map< int, int > &folds, int iFold, MedFeatures &trainMatrix, MedFeatures &testMatrix, const vector< string > *selected_features=NULL) |
| split matrix to train test based on iFold value. folds is map from patient id to fold
|
|
void | convert_prctile (vector< float > &features_prctiles) |
| convert feature vector to it's prctil's value in each element
|
|
void | match_to_prior (const vector< float > &outcome, const vector< float > &group_values, float target_prior, vector< int > &sel_idx) |
| does matching to specific target_prior.
|
|
double | match_to_prior (MedSamples &samples, float target_prior, vector< int > &sel_idx) |
| does matching to specific prior for MedSamples
|
|
double | match_to_prior (MedFeatures &features, float target_prior, vector< int > &sel_idx) |
| does matching to specific prior for MedFeatures
|
|
void | match_to_prior (MedFeatures &features, const vector< string > &group_values, float target_prior, vector< int > &sel_idx, bool print_verbose=true) |
| does matching to specific target_prior.
|
|
int | nSplits (vector< MedSample > &samples) |
| Return number of splits, also check mismatches between idSample and internal MedSamples and set idSamples.split if missing.
|
|
float | match_multi_class (MedFeatures &data, const vector< string > &groups, vector< int > &filtered_row_ids, vector< float > &price_ratios, int nRand=10000, int verbose=false) |
| multi-class matching.
|
|
float | match_multi_class (vector< MedSample > &data, const vector< string > &groups, vector< int > &filtered_row_ids, vector< float > &price_ratios, int nRand=10000, int verbose=false) |
|
void | match_multi_class_to_dist (MedFeatures &data, const vector< string > &groups, vector< int > &filtered_row_ids, vector< float > probs) |
|
void | match_multi_class_to_dist (vector< MedSample > &data, const vector< string > &groups, vector< int > &filtered_row_ids, vector< float > probs) |
|
void | down_sample (MedSamples &samples, double take_ratio, bool with_repeats=false) |
| down sammling
|
|
void | down_sample_by_pid (MedSamples &samples, double take_ratio, bool with_repeats=false) |
| down sample by selecting from pids
|
|
void | down_sample (MedSamples &samples, int no_more_than, bool with_repeats=false) |
| down sammling
|
|
void | down_sample_by_pid (MedSamples &samples, int no_more_than, bool with_repeats=false) |
| down sample by selecting from pids
|
|
template<class T > |
void | prctils (const vector< T > &x, const vector< double > &prc, vector< T > &res, const vector< float > *weights=NULL) |
| calc prctile
|
|
template<typename T > |
int | binary_search_index (const T *begin, const T *end, T val) |
| binary search for index. -1 if not found
|
|
template<typename T > |
int | binary_search_position (const T *begin, const T *end, T val, bool reversed=false) |
| binary search for position to add new element in sorted manner (first position if equal elements found).
|
|
template<typename T > |
int | binary_search_position_last (const T *begin, const T *end, T val, bool reversed=false) |
| binary search for position to add new element in sorted manner (last position if equal elements found).
|
|
template<class T > |
int | binary_search_position (const vector< T > &v, T search) |
| binary search for position to add new element in sorted manner (first position if equal elements found).
|
|
template<class T > |
int | binary_search_position (const vector< T > &v, T search, int start, int end) |
| binary search for position to add new element in sorted manner (first position if equal elements found).
|
|