Medial Code Documentation
Loading...
Searching...
No Matches
Functions
medial::process Namespace Reference

process namespace More...

Functions

void split_feature_to_bins (const BinSettings &setting, vector< float > &feature, const vector< int > &sel_indexes, vector< float > &y)
 splits feature to bin using setting
 
void normalize_feature_to_uniform (const BinSettings &setting, vector< float > &feature)
 normalize feature to be between [0-1] but also change the distribution of values to be uniform
 
void compare_populations (const MedFeatures &population1, const MedFeatures &population2, const string &name1, const string &name2, const string &output_file, const string &predictor_type="", const string &predictor_init="", int nfolds=5, int max_learn=0)
 compares two matrixes populations.
 
template<class T >
void commit_selection (vector< T > &vec, const vector< int > &idx)
 commit selection of indexes on vector
 
void filter_row_indexes (MedFeatures &dataMat, vector< int > &selected_indexes, bool op_flag=false)
 filtering MedFeatures by selected indexes rows
 
void filter_row_indexes_safe (MedFeatures &dataMat, const vector< int > &selected_indexes, bool op_flag=false)
 filtering MedFeatures by selected indexes rows (thread safe for selected_indexes) no sort of selected_indexes
 
void down_sample (MedFeatures &dataMat, double take_ratio, bool with_repeats=false, vector< int > *selected_indexes=NULL)
 down sampling with ratio
 
double reweight_by_general (MedFeatures &data_records, const vector< string > &groups, vector< float > &weigths, bool print_verbose)
 reweighting method by given groups uniq values. return weights and min_factor
 
void match_by_general (MedFeatures &data_records, const vector< string > &groups, vector< int > &filtered_row_ids, float price_ratio, int min_grp_size, bool print_verbose)
 matching by given groups uniq values. returns also the row_ids filtered
 
void match_by_general (MedFeatures &data_records, const vector< string > &groups, vector< int > &filtered_row_ids, float price_ratio, float max_ratio, int min_grp_size, bool print_verbose)
 matching by given groups uniq values. returns also the row_ids filtered. max_ratio is maximal allowed ratio, inf if < 0.
 
void split_matrix (const MedFeatures &matrix, vector< int > &folds, int iFold, MedFeatures &trainMatrix, MedFeatures &testMatrix, const vector< string > *selected_features=NULL)
 split matrix to train test based on iFold value. folds is fold id for each sample
 
void split_matrix (const MedFeatures &matrix, unordered_map< int, int > &folds, int iFold, MedFeatures &trainMatrix, MedFeatures &testMatrix, const vector< string > *selected_features=NULL)
 split matrix to train test based on iFold value. folds is map from patient id to fold
 
void convert_prctile (vector< float > &features_prctiles)
 convert feature vector to it's prctil's value in each element
 
void match_to_prior (const vector< float > &outcome, const vector< float > &group_values, float target_prior, vector< int > &sel_idx)
 does matching to specific target_prior.
 
double match_to_prior (MedSamples &samples, float target_prior, vector< int > &sel_idx)
 does matching to specific prior for MedSamples
 
double match_to_prior (MedFeatures &features, float target_prior, vector< int > &sel_idx)
 does matching to specific prior for MedFeatures
 
void match_to_prior (MedFeatures &features, const vector< string > &group_values, float target_prior, vector< int > &sel_idx, bool print_verbose=true)
 does matching to specific target_prior.
 
int nSplits (vector< MedSample > &samples)
 Return number of splits, also check mismatches between idSample and internal MedSamples and set idSamples.split if missing.
 
float match_multi_class (MedFeatures &data, const vector< string > &groups, vector< int > &filtered_row_ids, vector< float > &price_ratios, int nRand=10000, int verbose=false)
 multi-class matching.
 
float match_multi_class (vector< MedSample > &data, const vector< string > &groups, vector< int > &filtered_row_ids, vector< float > &price_ratios, int nRand=10000, int verbose=false)
 
void match_multi_class_to_dist (MedFeatures &data, const vector< string > &groups, vector< int > &filtered_row_ids, vector< float > probs)
 
void match_multi_class_to_dist (vector< MedSample > &data, const vector< string > &groups, vector< int > &filtered_row_ids, vector< float > probs)
 
void down_sample (MedSamples &samples, double take_ratio, bool with_repeats=false)
 down sammling
 
void down_sample_by_pid (MedSamples &samples, double take_ratio, bool with_repeats=false)
 down sample by selecting from pids
 
void down_sample (MedSamples &samples, int no_more_than, bool with_repeats=false)
 down sammling
 
void down_sample_by_pid (MedSamples &samples, int no_more_than, bool with_repeats=false)
 down sample by selecting from pids
 
template<class T >
void prctils (const vector< T > &x, const vector< double > &prc, vector< T > &res, const vector< float > *weights=NULL)
 calc prctile
 
template<typename T >
int binary_search_index (const T *begin, const T *end, T val)
 binary search for index. -1 if not found
 
template<typename T >
int binary_search_position (const T *begin, const T *end, T val, bool reversed=false)
 binary search for position to add new element in sorted manner (first position if equal elements found).
 
template<typename T >
int binary_search_position_last (const T *begin, const T *end, T val, bool reversed=false)
 binary search for position to add new element in sorted manner (last position if equal elements found).
 
template<class T >
int binary_search_position (const vector< T > &v, T search)
 binary search for position to add new element in sorted manner (first position if equal elements found).
 
template<class T >
int binary_search_position (const vector< T > &v, T search, int start, int end)
 binary search for position to add new element in sorted manner (first position if equal elements found).
 

Detailed Description

process namespace

process

Function Documentation

◆ compare_populations()

void medial::process::compare_populations ( const MedFeatures population1,
const MedFeatures population2,
const string &  name1,
const string &  name2,
const string &  output_file,
const string &  predictor_type = "",
const string &  predictor_init = "",
int  nfolds = 5,
int  max_learn = 0 
)
protected

compares two matrixes populations.

it's also try to seperate between populations using the predictor parameters if given

◆ match_to_prior() [1/2]

void medial::process::match_to_prior ( const vector< float > &  outcome,
const vector< float > &  group_values,
float  target_prior,
vector< int > &  sel_idx 
)

does matching to specific target_prior.

Parameters
outcomeis the outcome vector for measure prior in each group
group_valuesis the groups to split the matching to. it can be year signature or age or unique combination of both
target_priorthe target prior
thereturn value of selected indexes to do the matching

◆ match_to_prior() [2/2]

void medial::process::match_to_prior ( MedFeatures features,
const vector< string > &  group_values,
float  target_prior,
vector< int > &  sel_idx,
bool  print_verbose = true 
)

does matching to specific target_prior.

Parameters
featuresthe matrix to match. will use outcome in samples
group_valuesis the groups to split the matching to. it can be year signature or age or unique combination of both
target_priorthe target prior
sel_idxthe original indecies

◆ split_feature_to_bins()

void medial::process::split_feature_to_bins ( const BinSettings setting,
vector< float > &  feature,
const vector< int > &  sel_indexes,
vector< float > &  y 
)

splits feature to bin using setting

Parameters
settingthe settings of split
featurethe feature vector values
sel_indexesthe indexes to take from feature. if empty will take all feature vector
ylabels if we have. some binning methods uses the labels for better split
Returns
it updates feature to the binned values