1#ifndef __CALIBRATION_H__
2#define __CALIBRATION_H__
7#include <MedProcessTools/MedProcessTools/MedSamples.h>
9#include <MedFeat/MedFeat/MedOutcome.h>
22 float min_pred, max_pred;
23 double cnt_cases, cnt_controls;
24 double cnt_cases_no_w, cnt_controls_no_w;
25 float mean_pred, mean_outcome;
27 vector<double> controls_per_time_slot;
28 vector<double> cases_per_time_slot;
34 ADD_SERIALIZATION_FUNCS(bin, min_pred, max_pred, cnt_cases, cnt_controls, cnt_cases_no_w, cnt_controls_no_w, mean_pred, mean_outcome, controls_per_time_slot, cases_per_time_slot, kaplan_meier)
38enum CalibrationTypes {
39 probability_time_window = 0,
40 probability_binning = 1,
41 probability_platt_scale = 2,
42 probability_isotonic = 3
45extern unordered_map<int, string> calibration_method_to_name;
46static CalibrationTypes clibration_name_to_type(
const string& calibration_name);
52 CalibrationTypes calibration_type = probability_time_window;
56 string estimator_type =
"kaplan_meier";
57 string binning_method =
"equal_num_of_samples_per_bin";
59 int pos_sample_min_time_before_case = 0;
60 int pos_sample_max_time_before_case = 360;
61 int km_time_resolution = 1;
62 int min_cases_for_calibration_smoothing_pct = 10;
63 int do_calibration_smoothing = 1;
65 string weights_attr_name =
"weight";
66 int min_control_bins = -1;
80 vector<calibration_entry>
cals;
85 virtual int init(map<string, string>& mapper);
87 virtual int Learn(
const vector<MedSample>& samples) {
return Learn(samples, global_default_time_unit); }
88 virtual int Learn(
const vector <MedSample>& samples,
const int samples_time_unit);
90 virtual int Apply(vector <MedSample>& samples);
91 void Apply(
const vector<float> &preds, vector<float> &probs)
const;
92 float Apply(
float pred)
const;
98 void Learn(
const MedFeatures &matrix) {Learn(matrix.samples); }
102 float calibrate_pred(
float pred,
int type)
const;
104 void write_calibration_table(
const string & calibration_table_file);
105 void read_calibration_table(
const string& fname);
107 void dprint(
const string &pref)
const;
108 void learn_isotonic_regression(
const vector<float> &x,
const vector<float> &y,
const vector<float> &weights, vector<float> &min_range, vector<float> &max_range, vector<float> &
map_prob,
int n_top_controls,
int n_bottom_cases,
112 ADD_SERIALIZATION_FUNCS(calibration_type, estimator_type, binning_method, bins_num, time_unit, pos_sample_min_time_before_case, pos_sample_max_time_before_case,
113 km_time_resolution, min_cases_for_calibration_smoothing_pct, do_calibration_smoothing,
censor_controls,
118 double calc_kaplan_meier(vector<double> controls_per_time_slot, vector<double> cases_per_time_slot,
double controls_factor);
119 void smooth_calibration_entries(
const vector<calibration_entry>&
cals, vector<calibration_entry>& smooth_cals,
double controls_factor);
122 int learn_time_window(
const vector<MedSample>& orig_samples,
const int samples_time_unit);
123 int apply_time_window(
MedSamples& samples)
const;
124 int apply_time_window(vector<MedSample>& samples)
const;
125 void write_calibration_time_window(
const string & calibration_table_file);
126 void read_calibration_time_window(
const string& fname);
Logger.h - allowing logs with more control.
@ FTR_POSTPROCESS_CALIBRATOR
"calibrator" to create Calibrator
Definition PostProcessor.h:16
An Abstract class that can be serialized and written/read from file.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
Definition Calibration.h:48
vector< float > map_prob
for "binning/isotonic-regression"
Definition Calibration.h:81
bool fix_pred_order
If true will not allow higher scores to have lower probabilites.
Definition Calibration.h:72
bool use_isotonic
If true will use isotonic on time_window.
Definition Calibration.h:67
int poly_rank
Only in platt_scale - the polynon rank for optimizing sigmoid of prob.
Definition Calibration.h:73
double control_weight_down_sample
factor weight for controls when downsampling controls by this factor
Definition Calibration.h:74
int censor_controls
censor controls without long-enough followup even in mean-outcome mode
Definition Calibration.h:64
vector< double > platt_params
for "platt_scale"
Definition Calibration.h:82
bool verbose
If true will print verbose information for calibration.
Definition Calibration.h:75
int min_preds_in_bin
minimal number of obseravtion to create bin
Definition Calibration.h:69
vector< calibration_entry > cals
for "time_window"
Definition Calibration.h:80
float min_prob_res
final probality resulotion value to round to and merge similar
Definition Calibration.h:71
float min_score_res
score resulotion value to round to and merge similar
Definition Calibration.h:70
void get_output_fields(vector< Effected_Field > &fields) const
List of fields that are being effected by this post_processor.
Definition Calibration.cpp:46
int n_bottom_cases
number of cases to add with minimal-score for regularization of isotonic regression
Definition Calibration.h:78
void get_input_fields(vector< Effected_Field > &fields) const
List of fields that are used by this post_processor.
Definition Calibration.cpp:38
int n_top_controls
number of controls to add with maximal-score for regularization of isotonic regression
Definition Calibration.h:77
virtual int init(map< string, string > &mapper)
Definition Calibration.cpp:52
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
MedSamples represent a collection of samples per different id The data is conatined in a vector of ...
Definition MedSamples.h:129
static const int Days
days since 1900/01/01
Definition MedTime.h:28
An Abstract PostProcessor class.
Definition PostProcessor.h:39
Definition SerializableObject.h:32
Calibrator are post processocrs using for recalibration of a model.
Definition Calibration.h:19