1#ifndef __GIBBS_SAMPLER_H__
2#define __GIBBS_SAMPLER_H__
7#include <unordered_map>
10#include <MedAlgo/MedAlgo/BinSplitOptimizer.h>
11#include "MedProcessTools/MedProcessTools/Calibration.h"
61 int init(map<string, string>& map);
80 int init(map<string, string>& map);
105 void learn_gibbs(
const map<
string, vector<T>> &cohort_data);
110 void learn_gibbs(
const map<
string, vector<T>> &cohort_data,
const vector<string> &learn_features,
bool skip_missing);
121 const vector<bool> *mask = NULL,
const vector<T> *mask_values = NULL,
bool print_progress =
false);
127 const vector<bool> *mask = NULL,
const vector<T> *mask_values = NULL,
bool print_progress =
false);
133 const vector<bool> *mask = NULL,
const vector<T> *mask_values = NULL);
138 void filter_samples(
const map<
string, vector<float>> &cohort_data,
139 map<
string, vector<T>> &results,
const string &predictor_type,
const string &predictor_args,
142 int init(map<string, string>& map);
MedAlgo - APIs to different algorithms: Linear Models, RF, GBM, KNN, and more.
An Abstract class that can be serialized and written/read from file.
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
A specific settings for binning feature.
Definition BinSplitOptimizer.h:37
A gibbs sampler - has learn and create sample based on mask.
Definition GibbsSampler.h:89
vector< string > impute_feat_names
all features names (saved in learn)
Definition GibbsSampler.h:97
void learn_gibbs(const map< string, vector< T > > &cohort_data)
learn gibbs sample - for each feature creates predictors
Definition GibbsSampler.cpp:166
Gibbs_Params params
gibbs params
Definition GibbsSampler.h:94
vector< PredictorOrEmpty< T > > feats_predictors
gibbs_feature generators based on predictors
Definition GibbsSampler.h:95
void get_samples(map< string, vector< T > > &results, const GibbsSamplingParams &sampling_params, const vector< bool > *mask=NULL, const vector< T > *mask_values=NULL, bool print_progress=false)
generates samples based on gibbs sampling process
Definition GibbsSampler.cpp:539
void filter_samples(const map< string, vector< float > > &cohort_data, map< string, vector< T > > &results, const string &predictor_type, const string &predictor_args, float filter_sens)
takes original cohort and results samples - filters and keep only samples that are similar to origina...
Definition GibbsSampler.cpp:604
int init(map< string, string > &map)
initialized params init function. reffer to that
Definition GibbsSampler.cpp:98
vector< string > all_feat_names
all features names (saved in learn)
Definition GibbsSampler.h:96
void prepare_predictors()
Should be called before first get_samples when used in parallel manner.
Definition GibbsSampler.cpp:545
void get_parallel_samples(map< string, vector< T > > &results, const GibbsSamplingParams &sampling_params, const vector< bool > *mask=NULL, const vector< T > *mask_values=NULL)
generates samples based on gibbs sampling process - uses only burn rate and creates one sample and ex...
Definition GibbsSampler.cpp:557
vector< vector< T > > uniqu_value_bins
to round samples to those resoultions! - important for no leak!
Definition GibbsSampler.h:98
A class that contains all sampling arguments.
Definition GibbsSampler.h:71
int jump_between_samples
how many rounds to ignore between taking samples
Definition GibbsSampler.h:75
int burn_in_count
how many rounds in the start to ignore
Definition GibbsSampler.h:74
bool find_real_value_bin
If true will find closet real value to result - to be in same resolution, real value from train.
Definition GibbsSampler.h:77
int samples_count
how many samples to output
Definition GibbsSampler.h:76
int init(map< string, string > &map)
Virtual to init object from parsed fields.
Definition GibbsSampler.cpp:71
Parameters fo Gibbs Sampling.
Definition GibbsSampler.h:42
float calibration_save_ratio
if given will use calibrate each prediction score on the saved_ratio. [0, 1]
Definition GibbsSampler.h:56
string calibration_string
if calibration_save_ratio > 0 will use this init for calibration string
Definition GibbsSampler.h:57
string predictor_type
predictor args for multi-class
Definition GibbsSampler.h:51
int selection_count
selection down sample count
Definition GibbsSampler.h:47
int init(map< string, string > &map)
Virtual to init object from parsed fields.
Definition GibbsSampler.cpp:38
int kmeans
If > 0 will use kmeans to find clusters and look on each cluster y distribution - select 1 randomly a...
Definition GibbsSampler.h:45
int max_iters
max_iters for kmeans
Definition GibbsSampler.h:49
float selection_ratio
selection_ratio for kMeans - down sample
Definition GibbsSampler.h:46
string predictor_args
predictor args for multi-class
Definition GibbsSampler.h:52
string num_class_setup
param to control number of classes if needed in predictor
Definition GibbsSampler.h:53
BinSettings bin_settings
binning method for each signal
Definition GibbsSampler.h:54
bool select_with_repeats
If true will selct with repeats.
Definition GibbsSampler.h:48
Base Interface for predictor.
Definition MedAlgo.h:78
A wrapper class to store same predictor trained on random selected samples to return prediction dist.
Definition GibbsSampler.h:18
vector< vector< float > > clusters_y
for kMeans centers
Definition GibbsSampler.h:27
vector< T > sample_cohort
all data points of feature
Definition GibbsSampler.h:21
vector< float > bin_vals
the value of feature for each pred
Definition GibbsSampler.h:24
vector< float > cluster_centers
for kMeans centers
Definition GibbsSampler.h:26
T get_sample(vector< T > &x, mt19937 &gen)
retrieves random sample for feature based on all other features
Definition GibbsSampler.cpp:102
MedPredictor * predictor
predictors for each feature and probability to see Y (logloss function)
Definition GibbsSampler.h:22
vector< Calibrator > calibrators
calibrator for probability for each pred
Definition GibbsSampler.h:23
Definition SerializableObject.h:32