Medial Code Documentation
|
This is the infrastracture of bootstrap. More...
#include <vector>
#include <string>
#include <map>
#include <random>
#include <MedTime/MedTime/MedTime.h>
#include <SerializableObject/SerializableObject/SerializableObject.h>
#include <Logger/Logger/Logger.h>
#include <MedStat/MedStat/MedStat.h>
Go to the source code of this file.
Data Structures | |
class | Lazy_Iterator |
A class which fetches the samples in bootstrap manner in lazy way. More... | |
class | Mem_Iterator |
A class which fetches the samples in bootstrap manner in memort way. More... | |
class | Measurement_Params |
A base class for measurements parameter. More... | |
class | Incident_Stats |
The Incident Object which holds the gender, age incidence stats. More... | |
class | ROC_Params |
Parameter object for calc_roc_measures functions. More... | |
class | Multiclass_Params |
Parameter object for Multiclass measure functions. More... | |
class | Regression_Params |
Parameter object for Regression measure functions. More... | |
class | Filter_Param |
Parameter object for filter_params functions. More... | |
struct | ROC_And_Filter_Params |
Typedefs | |
typedef map< string, float >(* | MeasurementFunctions) (Lazy_Iterator *iterator, int thread_num, Measurement_Params *function_params) |
Function which recieves Lazy_Iterator and the thread num for iterating the predictions and labels. | |
typedef bool(* | FilterCohortFunc) (const map< string, vector< float > > &record_info, int index, void *cohort_params) |
Function which recieves map from feature name to vector of all samples value, sample index and cohort definition params and return true\false if to include the sample in the cohort. | |
typedef void(* | ProcessMeasurementParamFunc) (const map< string, vector< float > > &additional_info, const vector< float > &y, const vector< int > &pids, Measurement_Params *function_params, const vector< int > &filtered_indexes, const vector< float > &y_full, const vector< int > &pids_full) |
a function to process and maniplulate function params based on the given cohort - for example sotring incedince information for the cohort | |
typedef void(* | PreprocessScoresFunc) (vector< float > &preds, Measurement_Params *function_params) |
a funtion to preprocess the prediction scores (binning for example to speed up bootstrap). | |
Functions | |
map< string, float > | calc_npos_nneg (Lazy_Iterator *iterator, int thread_num, Measurement_Params *function_params) |
A Function to calculate only NPOS,NNEG (already calculated in calc_roc_measures_with_inc). | |
map< string, float > | calc_only_auc (Lazy_Iterator *iterator, int thread_num, Measurement_Params *function_params) |
A Function to calculate only AUC (already calculated in calc_roc_measures_with_inc). | |
map< string, float > | calc_roc_measures_with_inc (Lazy_Iterator *iterator, int thread_num, Measurement_Params *function_params) |
A Function to calculate all roc measurements- AUC, Sensitivity, speceficity positive rate, ppv... Implements MeasurementFunctions signature function. | |
map< string, float > | calc_kandel_tau (Lazy_Iterator *iterator, int thread_num, Measurement_Params *function_params) |
A Function to calculate calc_kandel_tau Implements MeasurementFunctions signature function. | |
map< string, float > | calc_multi_class (Lazy_Iterator *iterator, int thread_num, Measurement_Params *function_params) |
A Function to calculate performance measurements for multicategory Implements MeasurementFunctions signature function. | |
map< string, float > | calc_harrell_c_statistic (Lazy_Iterator *iterator, int thread_num, Measurement_Params *function_params) |
A Function to calculate performance measurements for harrell c statistic Implements MeasurementFunctions signature function Encoding: Case/Control => effect outcome/y sign. | |
map< string, float > | calc_regression (Lazy_Iterator *iterator, int thread_num, Measurement_Params *function_params) |
A Function to calculate performance measurements for regression problems Implements MeasurementFunctions signature function Accepted Regresion_Params. | |
bool | filter_range_param (const map< string, vector< float > > &record_info, int index, void *cohort_params) |
A function to filter samples based on single Filter_Param object. | |
bool | filter_range_params (const map< string, vector< float > > &record_info, int index, void *cohort_params) |
A function to filter samples based on multipal Filter_Param objects - in a vector with and condition between each parameter range. | |
void | fix_cohort_sample_incidence (const map< string, vector< float > > &additional_info, const vector< float > &y, const vector< int > &pids, Measurement_Params *function_params, const vector< int > &filtered_indexes, const vector< float > &y_full, const vector< int > &pids_full) |
a function to calculate the incidence in each cohort - preprocessing of function_params and storing the incidence inside of it. | |
void | fix_cohort_sample_incidence_old (const map< string, vector< float > > &additional_info, const vector< float > &y, const vector< int > &pids, Measurement_Params *function_params, const vector< int > &filtered_indexes, const vector< float > &y_full, const vector< int > &pids_full) |
a function to calculate the incidence in each cohort - preprocessing of function_params and storing the incidence inside of it. | |
void | preprocess_bin_scores (vector< float > &preds, Measurement_Params *function_params) |
Binning function of scores based on ROC_Params. | |
string | format_working_point (const string &init_str, float wp, bool perc=true) |
Format out measurement. | |
string | format_working_point_topn (const string &init_str, int wp, bool perc=true) |
map< string, float > | booststrap_analyze_cohort (const vector< float > &preds, const vector< int > &preds_order, const vector< float > &y, const vector< int > &pids, float sample_ratio, int sample_per_pid, int loopCnt, const vector< MeasurementFunctions > &meas_functions, const vector< Measurement_Params * > &function_params, ProcessMeasurementParamFunc process_measurments_params, const map< string, vector< float > > &additional_info, const vector< float > &y_full, const vector< int > &pids_full, const vector< float > *weights, const vector< int > &filter_indexes, FilterCohortFunc cohort_def, void *cohort_params, int &warn_cnt, const string &cohort_name, int seed=0) |
to run bootstrap on single cohort | |
map< string, map< string, float > > | booststrap_analyze (const vector< float > &preds, const vector< int > &preds_order, const vector< float > &y, const vector< float > *weights, const vector< int > &pids, const map< string, vector< float > > &additional_info, const map< string, FilterCohortFunc > &filter_cohort, const vector< MeasurementFunctions > &meas_functions={calc_roc_measures_with_inc}, const map< string, void * > *cohort_params=NULL, const vector< Measurement_Params * > *function_params=NULL, ProcessMeasurementParamFunc process_measurments_params=NULL, PreprocessScoresFunc preprocess_scores=NULL, Measurement_Params *preprocess_scores_params=NULL, float sample_ratio=(float) 1.0, int sample_per_pid=1, int loopCnt=500, int seed=0, bool binary_outcome=true) |
The main bootstrap function to run all bootstrap process with all the arguments. | |
void | prepare_for_bootstrap (const vector< int > &pids, const map< string, vector< float > > &additional_info, FilterCohortFunc &filter_cohort, void *cohort_params, float sample_ratio, int sample_per_pid, int seed, vector< int > &indexes) |
void | write_bootstrap_results (const string &file_name, const map< string, map< string, float > > &all_cohorts_measurments, const string &run_id="") |
Will output the bootstrap results into file in TAB delimeted format. | |
void | read_bootstrap_results (const string &file_name, map< string, map< string, float > > &all_cohorts_measurments) |
Will read the bootstrap results from file in TAB delimeted format. | |
void | write_pivot_bootstrap_results (const string &file_name, const map< string, map< string, float > > &all_cohorts_measurments, const string &run_id="") |
Will output the bootstrap results into file with the new format with columns: "Cohort$Measure_Name", "Value". | |
void | read_pivot_bootstrap_results (const string &file_name, map< string, map< string, float > > &all_cohorts_measurments) |
Will read the bootstrap results into file with the new format with columns: "Cohort$Measure_Name", "Value" <//summary> | |
This is the infrastracture of bootstrap.
typedef map< string, float >(* MeasurementFunctions) (Lazy_Iterator *iterator, int thread_num, Measurement_Params *function_params) |
Function which recieves Lazy_Iterator and the thread num for iterating the predictions and labels.
it also recieves function_params which are additional arguments for the function (can be working points defintions for example)
typedef void(* PreprocessScoresFunc) (vector< float > &preds, Measurement_Params *function_params) |
a funtion to preprocess the prediction scores (binning for example to speed up bootstrap).
the function manipulate preds based on function_params
map< string, map< string, float > > booststrap_analyze | ( | const vector< float > & | preds, |
const vector< int > & | preds_order, | ||
const vector< float > & | y, | ||
const vector< float > * | weights, | ||
const vector< int > & | pids, | ||
const map< string, vector< float > > & | additional_info, | ||
const map< string, FilterCohortFunc > & | filter_cohort, | ||
const vector< MeasurementFunctions > & | meas_functions = {calc_roc_measures_with_inc} , |
||
const map< string, void * > * | cohort_params = NULL , |
||
const vector< Measurement_Params * > * | function_params = NULL , |
||
ProcessMeasurementParamFunc | process_measurments_params = NULL , |
||
PreprocessScoresFunc | preprocess_scores = NULL , |
||
Measurement_Params * | preprocess_scores_params = NULL , |
||
float | sample_ratio = (float) 1.0 , |
||
int | sample_per_pid = 1 , |
||
int | loopCnt = 500 , |
||
int | seed = 0 , |
||
bool | binary_outcome = true |
||
) |
The main bootstrap function to run all bootstrap process with all the arguments.
preds | vector of predictions |
y | labels |
pids | patient ids [used for random draws] |
additional_info | Dictionary used to keep values of features [for filtering]. Key is the feature name. Val contains vector of feature values. |
filter_cohort | Dictionary, where key is name of cohort, value is a function which performs the filtering [keeps only entries that belong to this cohort] |
meas_functions | Vector of metrics to calculate per one bootstrap (?) experiment |
cohort_params | Key: name of a cohort, Value: additional parameters which are passed to functions from filter_cohort.values |
function_params | Configuration parameters passed to "meas_functions" (like 2 for F2 metric). |
process_measurments_params | Function to process the function_params before running on each cohort (helps to calc incidence for example) |
preprocess_scores | A function to preprocess all scores - for example binning the scores (can sometimes speedup metrics calculation) |
preprocess_scores_params | Additional parameters for the preprocess_scores function |
sample_ratio | A number in range (0,1] for subsampling the samples [in order to speed-up the bootstrap] |
sample_per_pid | How many samples to sample on each id [sampling with replacement] |
loopCnt | How many bootstrap experiments(?) to do |
seed | The random seed. If 0 will use random_device to create random seed |
binary_outcome | A flag to indicate whether the labels are binary (used to validate the input labels)
|
map< string, float > calc_harrell_c_statistic | ( | Lazy_Iterator * | iterator, |
int | thread_num, | ||
Measurement_Params * | function_params | ||
) |
A Function to calculate performance measurements for harrell c statistic Implements MeasurementFunctions signature function Encoding: Case/Control => effect outcome/y sign.
positive is case, negative controls. Can't handle event in time zero. Time to event => abs value of outcome/y Score => the prediction
map< string, float > calc_kandel_tau | ( | Lazy_Iterator * | iterator, |
int | thread_num, | ||
Measurement_Params * | function_params | ||
) |
A Function to calculate calc_kandel_tau Implements MeasurementFunctions signature function.
map< string, float > calc_multi_class | ( | Lazy_Iterator * | iterator, |
int | thread_num, | ||
Measurement_Params * | function_params | ||
) |
A Function to calculate performance measurements for multicategory Implements MeasurementFunctions signature function.
map< string, float > calc_npos_nneg | ( | Lazy_Iterator * | iterator, |
int | thread_num, | ||
Measurement_Params * | function_params | ||
) |
A Function to calculate only NPOS,NNEG (already calculated in calc_roc_measures_with_inc).
Implements MeasurementFunctions signature function
map< string, float > calc_only_auc | ( | Lazy_Iterator * | iterator, |
int | thread_num, | ||
Measurement_Params * | function_params | ||
) |
A Function to calculate only AUC (already calculated in calc_roc_measures_with_inc).
Implements MeasurementFunctions signature function
map< string, float > calc_regression | ( | Lazy_Iterator * | iterator, |
int | thread_num, | ||
Measurement_Params * | function_params | ||
) |
A Function to calculate performance measurements for regression problems Implements MeasurementFunctions signature function Accepted Regresion_Params.
< can be called False
< can be called Negative
< can be called True
< can be called Positive
Counts how much in percentage [0-100] in the data points are "covered" (the L1 error is within threshold) The threshold is determined by percentages for mean outcome (the prior)
map< string, float > calc_roc_measures_with_inc | ( | Lazy_Iterator * | iterator, |
int | thread_num, | ||
Measurement_Params * | function_params | ||
) |
A Function to calculate all roc measurements- AUC, Sensitivity, speceficity positive rate, ppv...
Implements MeasurementFunctions signature function.
counts also false positives weights if no fix_label_to_binary
bool filter_range_param | ( | const map< string, vector< float > > & | record_info, |
int | index, | ||
void * | cohort_params | ||
) |
A function to filter samples based on single Filter_Param object.
it's a FilterCohortFunc signature
bool filter_range_params | ( | const map< string, vector< float > > & | record_info, |
int | index, | ||
void * | cohort_params | ||
) |
A function to filter samples based on multipal Filter_Param objects - in a vector with and condition between each parameter range.
it's a FilterCohortFunc signature
void fix_cohort_sample_incidence_old | ( | const map< string, vector< float > > & | additional_info, |
const vector< float > & | y, | ||
const vector< int > & | pids, | ||
Measurement_Params * | function_params, | ||
const vector< int > & | filtered_indexes, | ||
const vector< float > & | y_full, | ||
const vector< int > & | pids_full | ||
) |
a function to calculate the incidence in each cohort - preprocessing of function_params and storing the incidence inside of it.
The old has same implementation as old bootstrap only averaging incidence over the controls in the sample based on incidence in each group(age+gender)
void prepare_for_bootstrap | ( | const vector< int > & | pids, |
const map< string, vector< float > > & | additional_info, | ||
FilterCohortFunc & | filter_cohort, | ||
void * | cohort_params, | ||
float | sample_ratio, | ||
int | sample_per_pid, | ||
int | seed, | ||
vector< int > & | indexes | ||
) |
pids | the pids vector |
additional_info | the data vector for filtering |
filter_cohort | The cohorts definition - the filtering function |
cohort_params | Additional parameters for the filtering cohort function |
sample_ratio | A number in range (0,1] for subsampling the samples |
sample_per_pid | How many samples to sample on each id |
seed | The random seed. If 0 will use random_device to create random seed |
indexes | the selected indexes results for the bootstrap
|
void preprocess_bin_scores | ( | vector< float > & | preds, |
Measurement_Params * | function_params | ||
) |
Binning function of scores based on ROC_Params.
look at score_bins,score_resolution
void read_bootstrap_results | ( | const string & | file_name, |
map< string, map< string, float > > & | all_cohorts_measurments | ||
) |
Will read the bootstrap results from file in TAB delimeted format.
each line is cohort and the The columns are the measurements
void write_bootstrap_results | ( | const string & | file_name, |
const map< string, map< string, float > > & | all_cohorts_measurments, | ||
const string & | run_id = "" |
||
) |
Will output the bootstrap results into file in TAB delimeted format.
each line is cohort and the The columns are the measurements