Medial Code Documentation
Loading...
Searching...
No Matches
GibbsSampler.h
1#ifndef __GIBBS_SAMPLER_H__
2#define __GIBBS_SAMPLER_H__
3#include <vector>
4#include <string>
5#include <map>
6#include <random>
7#include <unordered_map>
10#include <MedAlgo/MedAlgo/BinSplitOptimizer.h>
11#include "MedProcessTools/MedProcessTools/Calibration.h"
12
13using namespace std;
14
18template<typename T> class PredictorOrEmpty : public SerializableObject {
19public:
20 int input_size;
21 vector<T> sample_cohort;
23 vector<Calibrator> calibrators;
24 vector<float> bin_vals;
25
26 vector<float> cluster_centers;
27 vector<vector<float>> clusters_y;
28
31
33 T get_sample(vector<T> &x, mt19937 &gen);
34
35 ADD_CLASS_NAME(PredictorOrEmpty)
37};
38
67
85
89template<typename T> class GibbsSampler : public SerializableObject {
90private:
91 mt19937 _gen;
92 bool done_prepare;
93public:
95 vector<PredictorOrEmpty<T>> feats_predictors;
96 vector<string> all_feat_names;
97 vector<string> impute_feat_names;
98 vector<vector<T>> uniqu_value_bins;
99
100 GibbsSampler();
101
105 void learn_gibbs(const map<string, vector<T>> &cohort_data);
106
110 void learn_gibbs(const map<string, vector<T>> &cohort_data, const vector<string> &learn_features, bool skip_missing);
111
115 void prepare_predictors();
116
120 void get_samples(map<string, vector<T>> &results, const GibbsSamplingParams &sampling_params,
121 const vector<bool> *mask = NULL, const vector<T> *mask_values = NULL, bool print_progress = false);
122
126 void get_samples(map<string, vector<T>> &results, const GibbsSamplingParams &sampling_params, mt19937 &rnd_gen,
127 const vector<bool> *mask = NULL, const vector<T> *mask_values = NULL, bool print_progress = false);
128
132 void get_parallel_samples(map<string, vector<T>> &results, const GibbsSamplingParams &sampling_params,
133 const vector<bool> *mask = NULL, const vector<T> *mask_values = NULL);
134
138 void filter_samples(const map<string, vector<float>> &cohort_data,
139 map<string, vector<T>> &results, const string &predictor_type, const string &predictor_args,
140 float filter_sens);
141
142 int init(map<string, string>& map);
143
144 virtual ~GibbsSampler();
145
146 ADD_CLASS_NAME(GibbsSampler<T>)
148};
149
156
157#endif
MedAlgo - APIs to different algorithms: Linear Models, RF, GBM, KNN, and more.
An Abstract class that can be serialized and written/read from file.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
A specific settings for binning feature.
Definition BinSplitOptimizer.h:37
A gibbs sampler - has learn and create sample based on mask.
Definition GibbsSampler.h:89
vector< string > impute_feat_names
all features names (saved in learn)
Definition GibbsSampler.h:97
void learn_gibbs(const map< string, vector< T > > &cohort_data)
learn gibbs sample - for each feature creates predictors
Definition GibbsSampler.cpp:166
Gibbs_Params params
gibbs params
Definition GibbsSampler.h:94
vector< PredictorOrEmpty< T > > feats_predictors
gibbs_feature generators based on predictors
Definition GibbsSampler.h:95
void get_samples(map< string, vector< T > > &results, const GibbsSamplingParams &sampling_params, const vector< bool > *mask=NULL, const vector< T > *mask_values=NULL, bool print_progress=false)
generates samples based on gibbs sampling process
Definition GibbsSampler.cpp:539
void filter_samples(const map< string, vector< float > > &cohort_data, map< string, vector< T > > &results, const string &predictor_type, const string &predictor_args, float filter_sens)
takes original cohort and results samples - filters and keep only samples that are similar to origina...
Definition GibbsSampler.cpp:604
int init(map< string, string > &map)
initialized params init function. reffer to that
Definition GibbsSampler.cpp:98
vector< string > all_feat_names
all features names (saved in learn)
Definition GibbsSampler.h:96
void prepare_predictors()
Should be called before first get_samples when used in parallel manner.
Definition GibbsSampler.cpp:545
void get_parallel_samples(map< string, vector< T > > &results, const GibbsSamplingParams &sampling_params, const vector< bool > *mask=NULL, const vector< T > *mask_values=NULL)
generates samples based on gibbs sampling process - uses only burn rate and creates one sample and ex...
Definition GibbsSampler.cpp:557
vector< vector< T > > uniqu_value_bins
to round samples to those resoultions! - important for no leak!
Definition GibbsSampler.h:98
A class that contains all sampling arguments.
Definition GibbsSampler.h:71
int jump_between_samples
how many rounds to ignore between taking samples
Definition GibbsSampler.h:75
int burn_in_count
how many rounds in the start to ignore
Definition GibbsSampler.h:74
bool find_real_value_bin
If true will find closet real value to result - to be in same resolution, real value from train.
Definition GibbsSampler.h:77
int samples_count
how many samples to output
Definition GibbsSampler.h:76
int init(map< string, string > &map)
Virtual to init object from parsed fields.
Definition GibbsSampler.cpp:71
Parameters fo Gibbs Sampling.
Definition GibbsSampler.h:42
float calibration_save_ratio
if given will use calibrate each prediction score on the saved_ratio. [0, 1]
Definition GibbsSampler.h:56
string calibration_string
if calibration_save_ratio > 0 will use this init for calibration string
Definition GibbsSampler.h:57
string predictor_type
predictor args for multi-class
Definition GibbsSampler.h:51
int selection_count
selection down sample count
Definition GibbsSampler.h:47
int init(map< string, string > &map)
Virtual to init object from parsed fields.
Definition GibbsSampler.cpp:38
int kmeans
If > 0 will use kmeans to find clusters and look on each cluster y distribution - select 1 randomly a...
Definition GibbsSampler.h:45
int max_iters
max_iters for kmeans
Definition GibbsSampler.h:49
float selection_ratio
selection_ratio for kMeans - down sample
Definition GibbsSampler.h:46
string predictor_args
predictor args for multi-class
Definition GibbsSampler.h:52
string num_class_setup
param to control number of classes if needed in predictor
Definition GibbsSampler.h:53
BinSettings bin_settings
binning method for each signal
Definition GibbsSampler.h:54
bool select_with_repeats
If true will selct with repeats.
Definition GibbsSampler.h:48
Base Interface for predictor.
Definition MedAlgo.h:78
A wrapper class to store same predictor trained on random selected samples to return prediction dist.
Definition GibbsSampler.h:18
vector< vector< float > > clusters_y
for kMeans centers
Definition GibbsSampler.h:27
vector< T > sample_cohort
all data points of feature
Definition GibbsSampler.h:21
vector< float > bin_vals
the value of feature for each pred
Definition GibbsSampler.h:24
vector< float > cluster_centers
for kMeans centers
Definition GibbsSampler.h:26
T get_sample(vector< T > &x, mt19937 &gen)
retrieves random sample for feature based on all other features
Definition GibbsSampler.cpp:102
MedPredictor * predictor
predictors for each feature and probability to see Y (logloss function)
Definition GibbsSampler.h:22
vector< Calibrator > calibrators
calibrator for probability for each pred
Definition GibbsSampler.h:23
Definition SerializableObject.h:32
Definition StdDeque.h:58