Medial Code Documentation
Loading...
Searching...
No Matches
Calibration.h
1#ifndef __CALIBRATION_H__
2#define __CALIBRATION_H__
3
4#include <vector>
7#include <MedProcessTools/MedProcessTools/MedSamples.h>
9#include <MedFeat/MedFeat/MedOutcome.h>
10#include "PostProcessor.h"
11
12using namespace std;
13
18// Calibration entry is used for transoforming a score into probabibilty
20public:
21 int bin;
22 float min_pred, max_pred;
23 double cnt_cases, cnt_controls;
24 double cnt_cases_no_w, cnt_controls_no_w;
25 float mean_pred, mean_outcome;
26 float cumul_pct;
27 vector<double> controls_per_time_slot;
28 vector<double> cases_per_time_slot;
29 float kaplan_meier;
30
31 string str() const;
32
33 ADD_CLASS_NAME(calibration_entry)
34 ADD_SERIALIZATION_FUNCS(bin, min_pred, max_pred, cnt_cases, cnt_controls, cnt_cases_no_w, cnt_controls_no_w, mean_pred, mean_outcome, controls_per_time_slot, cases_per_time_slot, kaplan_meier)
35};
37
38enum CalibrationTypes {
39 probability_time_window = 0,
40 probability_binning = 1,
41 probability_platt_scale = 2,
42 probability_isotonic = 3
43};
44
45extern unordered_map<int, string> calibration_method_to_name;
46static CalibrationTypes clibration_name_to_type(const string& calibration_name);
47
48class Calibrator : public PostProcessor {
49public:
51
52 CalibrationTypes calibration_type = probability_time_window;
53
54 int time_unit = MedTime::Days;
55
56 string estimator_type = "kaplan_meier";
57 string binning_method = "equal_num_of_samples_per_bin";
58 int bins_num = 1000;
59 int pos_sample_min_time_before_case = 0;
60 int pos_sample_max_time_before_case = 360;
61 int km_time_resolution = 1;
62 int min_cases_for_calibration_smoothing_pct = 10;
63 int do_calibration_smoothing = 1;
65 string weights_attr_name = "weight"; //weight attr to look for in samples attributes
66 int min_control_bins = -1;
67 bool use_isotonic = false;
68
69 int min_preds_in_bin = 100;
70 float min_score_res = 0;
71 float min_prob_res = 0;
72 bool fix_pred_order = false;
73 int poly_rank = 1;
75 bool verbose = true;
76
79
80 vector<calibration_entry> cals;
81 vector<float> min_range, max_range, map_prob;
82 vector<double> platt_params;
83
85 virtual int init(map<string, string>& mapper);
86 virtual int Learn(const MedSamples& samples);
87 virtual int Learn(const vector<MedSample>& samples) { return Learn(samples, global_default_time_unit); }
88 virtual int Learn(const vector <MedSample>& samples, const int samples_time_unit);
89 virtual int Apply(MedSamples& samples);
90 virtual int Apply(vector <MedSample>& samples);
91 void Apply(const vector<float> &preds, vector<float> &probs) const;
92 float Apply(float pred) const;
93
94 void get_input_fields(vector<Effected_Field> &fields) const;
95 void get_output_fields(vector<Effected_Field> &fields) const;
96
97 //PostProcessor functions:
98 void Learn(const MedFeatures &matrix) {Learn(matrix.samples); }
99 void Apply(MedFeatures &matrix);
100
101 calibration_entry calibrate_pred(float pred);
102 float calibrate_pred(float pred, int type) const;
103
104 void write_calibration_table(const string & calibration_table_file);
105 void read_calibration_table(const string& fname);
106
107 void dprint(const string &pref) const;
108 void learn_isotonic_regression(const vector<float> &x, const vector<float> &y, const vector<float> &weights, vector<float> &min_range, vector<float> &max_range, vector<float> &map_prob, int n_top_controls, int n_bottom_cases,
109 bool verbose);
110
111 ADD_CLASS_NAME(Calibrator)
112 ADD_SERIALIZATION_FUNCS(calibration_type, estimator_type, binning_method, bins_num, time_unit, pos_sample_min_time_before_case, pos_sample_max_time_before_case,
113 km_time_resolution, min_cases_for_calibration_smoothing_pct, do_calibration_smoothing, censor_controls,
116
117protected:
118 double calc_kaplan_meier(vector<double> controls_per_time_slot, vector<double> cases_per_time_slot, double controls_factor);
119 void smooth_calibration_entries(const vector<calibration_entry>& cals, vector<calibration_entry>& smooth_cals, double controls_factor);
120
121private:
122 int learn_time_window(const vector<MedSample>& orig_samples, const int samples_time_unit);
123 int apply_time_window(MedSamples& samples) const;
124 int apply_time_window(vector<MedSample>& samples) const;
125 void write_calibration_time_window(const string & calibration_table_file);
126 void read_calibration_time_window(const string& fname);
127};
128
130
131#endif
Logger.h - allowing logs with more control.
@ FTR_POSTPROCESS_CALIBRATOR
"calibrator" to create Calibrator
Definition PostProcessor.h:16
An Abstract class that can be serialized and written/read from file.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
Definition Calibration.h:48
vector< float > map_prob
for "binning/isotonic-regression"
Definition Calibration.h:81
bool fix_pred_order
If true will not allow higher scores to have lower probabilites.
Definition Calibration.h:72
bool use_isotonic
If true will use isotonic on time_window.
Definition Calibration.h:67
int poly_rank
Only in platt_scale - the polynon rank for optimizing sigmoid of prob.
Definition Calibration.h:73
double control_weight_down_sample
factor weight for controls when downsampling controls by this factor
Definition Calibration.h:74
int censor_controls
censor controls without long-enough followup even in mean-outcome mode
Definition Calibration.h:64
vector< double > platt_params
for "platt_scale"
Definition Calibration.h:82
bool verbose
If true will print verbose information for calibration.
Definition Calibration.h:75
int min_preds_in_bin
minimal number of obseravtion to create bin
Definition Calibration.h:69
vector< calibration_entry > cals
for "time_window"
Definition Calibration.h:80
float min_prob_res
final probality resulotion value to round to and merge similar
Definition Calibration.h:71
float min_score_res
score resulotion value to round to and merge similar
Definition Calibration.h:70
void get_output_fields(vector< Effected_Field > &fields) const
List of fields that are being effected by this post_processor.
Definition Calibration.cpp:46
int n_bottom_cases
number of cases to add with minimal-score for regularization of isotonic regression
Definition Calibration.h:78
void get_input_fields(vector< Effected_Field > &fields) const
List of fields that are used by this post_processor.
Definition Calibration.cpp:38
int n_top_controls
number of controls to add with maximal-score for regularization of isotonic regression
Definition Calibration.h:77
virtual int init(map< string, string > &mapper)
Definition Calibration.cpp:52
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
MedSamples represent a collection of samples per different id The data is conatined in a vector of ...
Definition MedSamples.h:129
static const int Days
days since 1900/01/01
Definition MedTime.h:28
An Abstract PostProcessor class.
Definition PostProcessor.h:39
Definition SerializableObject.h:32
Calibrator are post processocrs using for recalibration of a model.
Definition Calibration.h:19
Definition StdDeque.h:58