1#ifndef __MEDBOOTSTRAP_ANALYSIS_H__
2#define __MEDBOOTSTRAP_ANALYSIS_H__
3#include <unordered_map>
6#include <MedProcessTools/MedProcessTools/MedSamples.h>
7#include <MedProcessTools/MedProcessTools/MedFeatures.h>
8#include <InfraMed/InfraMed/MedPidRepository.h>
19enum class MeasurmentFunctionType {
22 calc_roc_measures_with_inc = 2,
25 calc_harrell_c_statistic = 5,
67 float censor_time_factor;
68 bool sort_preds_in_multicategory;
118 int init(map<string, string>& map);
134 map<
string, vector<float>> &final_additional_info, vector<int> &preds_order, unordered_map<
int, vector<int>> *splits_inds = NULL);
143 void prepare_bootstrap(
MedSamples &samples, map<
string, vector<float>> &additional_info, vector<float> &preds, vector<float> &y, vector<int> &pids, vector<int> &preds_order,
144 unordered_map<
int, vector<int>> *splits_inds = NULL);
170 map<string, map<string, float>>
bootstrap(
MedSamples &samples, map<
string, vector<float>> &additional_info, map<
int, map<
string, map<string, float>>> *results_per_split = NULL,
with_registry_args *registry_args = NULL);
184 map<string, map<string, float>>
bootstrap(
MedSamples &samples,
const string &rep_path, map<
int, map<
string, map<string, float>>> *results_per_split = NULL,
with_registry_args *registry_args = NULL);
261 static unordered_map<string, MeasurmentFunctionType> measurement_function_name_map;
286 MedSamples &curr_samples,
const string &bt_cohort);
300 MedSamples &curr_samples,
const string &bt_cohort);
306 map<string, map<string, float>> bootstrap_base(
const vector<float> &preds,
const vector<int> &preds_order,
const vector<float> &y,
const vector<int> &pids,
307 const vector<float> *weights,
const map<
string, vector<float>> &additional_info);
308 map<string, map<string, float>> bootstrap_using_registry(
const MedFeatures &features_mat,
309 const with_registry_args& args, map<
int, map<
string, map<string, float>>> *results_per_split = NULL);
310 void add_splits_results(
const vector<float> &preds, vector<int> &preds_order,
const vector<float> &y,
311 const vector<int> &pids,
const vector<float> *weights,
const map<
string, vector<float>> &data,
312 const unordered_map<
int, vector<int>> &splits_inds,
313 map<
int, map<
string, map<string, float>>> &results_per_split);
314 bool use_time_window();
315 void add_filter_cohorts(
const map<
string, vector<pair<float, float>>> ¶meters_ranges);
316 void add_filter_cohorts(
const vector<vector<Filter_Param>> ¶meters_ranges);
317 void sort_index_only(
const vector<float> &vec, std::vector<int>::iterator ind_start, std::vector<int>::iterator ind_end);
340 void bootstrap(
MedSamples &samples, map<
string, vector<float>> &additional_info, map<
int, map<
string, map<string, float>>> *results_per_split = NULL,
with_registry_args *registry_args = NULL);
356 vector<float> &sens_points, vector<float> &pr_points);
371 void explore_score(
float score, map<string, float> &score_measurements,
372 const string &string_cohort =
"All",
float max_search_range = 0.1);
388 bool find_in_range(
const vector<float> &vec,
float search,
float th);
389 void explore_measure(
const string &measure_name,
float value, map<string, float> &score_measurements,
390 const string &string_cohort =
"All",
float max_search_range = 0.1);
397 void make_sim_time_window(
const string &cohort_name,
const vector<Filter_Param> &filter_p,
398 const vector<float> &y,
const map<
string, vector<float>> &additional_info,
399 vector<float> &y_changed, map<
string, vector<float>> &cp_info,
400 map<string, FilterCohortFunc> &cohorts_t, map<string, void *> &cohort_params_t,
float censor_time_factor = 2);
registry methods over MedRegistry Object
An Abstract class that can be serialized and written/read from file.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
This is the infrastracture of bootstrap.
Parameters for lableing strategy on MedRegistry for given time window.
Definition LabelParams.h:91
A wrapper class which contains the MedBootstrap object and the results for later quering the scores f...
Definition MedBootstrap.h:324
map< string, map< string, float > > bootstrap_results
The bootstrap results.
Definition MedBootstrap.h:329
void bootstrap(MedFeatures &features, map< int, map< string, map< string, float > > > *results_per_split=NULL, with_registry_args *registry_args=NULL)
run the bootstrap - look at MedBootstrap.bootstrap documentition and stores the results in bootstrap_...
Definition MedBootstrap.cpp:1156
void find_working_points(const map< string, float > &bootstrap_cohort, vector< float > &sens_points, vector< float > &pr_points)
searches for the sensitivty(sens) and positive rate(pr) in the sepcific bootstrap_cohort results whic...
Definition MedBootstrap.cpp:1177
void write_results_to_text_file(const string &path, bool pivot_format=true, const string &run_id="")
writes the results to file with TAB delimeted manner.
Definition MedBootstrap.cpp:1281
void read_results_to_text_file(const string &path, bool pivot_format=true)
reads the results from file with TAB delimeted manner.
Definition MedBootstrap.cpp:1288
void explore_score(float score, map< string, float > &score_measurements, const string &string_cohort="All", float max_search_range=0.1)
searches for sepcific score value the corresonding measurments in that working point based on the boo...
Definition MedBootstrap.cpp:1276
MedBootstrap bootstrap_params
The boostrap parameters.
Definition MedBootstrap.h:326
Bootstrap wrapper for Medila Infrastructure objects, simplify the parameters and the input,...
Definition MedBootstrap.h:50
int loopCnt
the bootstrap count
Definition MedBootstrap.h:61
vector< pair< MeasurementFunctions, Measurement_Params * > > measurements_with_params
not Serializable! the measurements with the params
Definition MedBootstrap.h:70
size_t num_categories
number of categories
Definition MedBootstrap.h:69
int sample_per_pid
how many samples to take for each patients. 0 - means no sampling take all sample for patient
Definition MedBootstrap.h:58
Regression_Params regression_params
params for regerssion
Definition MedBootstrap.h:53
static void filter_bootstrap_cohort(MedFeatures &features, const string &bt_cohort)
commit bootstrap cohort filter on a given matrix
Definition MedBootstrap.cpp:1332
void clean_feature_name_prefix(map< string, vector< float > > &features)
cleans the initiale "FTR_" from the feature names in MedFeatures created by the infra pipeline
Definition MedBootstrap.cpp:622
void change_sample_autosim(MedSamples &samples, int min_time, int max_time, MedSamples &new_samples)
changing the samples to be auto-simulations - taking max score in the time window for each pid
Definition MedBootstrap.cpp:1037
map< string, vector< Filter_Param > > filter_cohort
the cohorts definitions. name to parameters range to intersect
Definition MedBootstrap.h:55
bool use_time_control_as_case
if True will use time window condition for controls same as cases.
Definition MedBootstrap.h:63
Multiclass_Params multiclass_params
Controling the multi class parameters: top n...
Definition MedBootstrap.h:54
bool is_binary_outcome
only used for validating bootstrap input
Definition MedBootstrap.h:62
void parse_cohort_file(const string &cohorts_path)
a function which reads cohorts file and stores it in filter_cohort.
Definition MedBootstrap.cpp:53
void prepare_bootstrap(const MedFeatures &features, vector< float > &preds, vector< float > &y, vector< int > &pids, map< string, vector< float > > &final_additional_info, vector< int > &preds_order, unordered_map< int, vector< int > > *splits_inds=NULL)
prepares the required vectors for bootstrap from MedFeatures &features
Definition MedBootstrap.cpp:775
map< string, map< string, float > > bootstrap(const MedFeatures &features, map< int, map< string, map< string, float > > > *results_per_split=NULL, with_registry_args *registry_args=NULL)
Will run the bootstraping process on all cohorts and measurements.
Definition MedBootstrap.cpp:826
MedBootstrap()
defualt Ctor.
Definition MedBootstrap.cpp:67
bool simTimeWindow
Time window simulation (in cohorts with Time-Window filtering) - instead of censoring cases out of ti...
Definition MedBootstrap.h:66
bool sample_patient_label
if true will treat patient+label as the "id" for the sampling
Definition MedBootstrap.h:59
float sample_ratio
the sample ratio of the patients out of all patients in each bootstrap
Definition MedBootstrap.h:57
void parse_cohort_line(const string &line)
parsing specific line.
Definition MedBootstrap.cpp:17
void apply_censor(const unordered_map< int, int > &pid_censor_dates, MedSamples &samples)
censors samples from samples based on time_range provided in pid_censor_dates.
Definition MedBootstrap.cpp:997
MeasurmentFunctionType measurement_function_name_to_type(const string &measurement_function_name)
convert measurement function name to type
Definition MedBootstrap.cpp:1094
int sample_seed
if 0 will use random_device
Definition MedBootstrap.h:60
int init(map< string, string > &map)
Initialization string with format "parameter_name=value;..." each paramter_name is same as the class ...
Definition MedBootstrap.cpp:83
ROC_Params roc_Params
Controling the roc parameters: sensitivity, specificity...
Definition MedBootstrap.h:52
void get_cohort_from_arg(const string &single_cohort)
A function which reads a single cohort definition from the command line and parses it.
Definition MedBootstrap.cpp:10
map< string, FilterCohortFunc > additional_cohorts
not Serializable! additional cohorts given by function
Definition MedBootstrap.h:56
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
A model = repCleaner + featureGenerator + featureProcessor + MedPredictor.
Definition MedModel.h:56
Definition MedPidRepository.h:87
A class that holds all registry records on all patients.
Definition MedRegistry.h:30
MedSamples represent a collection of samples per different id The data is conatined in a vector of ...
Definition MedSamples.h:129
DEPRECATED - A Class which samples by year from year to year by jump and find match in registry.
Definition MedSamplingStrategy.h:155
Parameter object for Multiclass measure functions.
Definition bootstrap.h:348
Parameter object for calc_roc_measures functions.
Definition bootstrap.h:294
Parameter object for Regression measure functions.
Definition bootstrap.h:376
Definition SerializableObject.h:32
Definition MedBootstrap.h:28
MedRegistry * registry_censor
the registry censor of records
Definition MedBootstrap.h:31
string rep_path
repository path
Definition MedBootstrap.h:34
bool do_kaplan_meir
If true will do kaplan meier.
Definition MedBootstrap.h:33
string json_model
The json_model path to create matrix to calc incidence and filter cohort.
Definition MedBootstrap.h:35
MedRegistry * registry
the registry of records
Definition MedBootstrap.h:30
MedSamplingYearly * sampler
the sampler for calculating incidence for example yearly from year to year
Definition MedBootstrap.h:32