2#ifndef __EXPLAIN_WRAPPER_H__
3#define __EXPLAIN_WRAPPER_H__
9#include <MedStat/MedStat/GibbsSampler.h>
10#include <MedAlgo/MedAlgo/tree_shap.h>
11#include <MedAlgo/MedAlgo/SamplesGenerator.h>
26 int init(map<string, string> &map);
29 void filter(map<string, float> &explain_list)
const;
41 bool postprocessing_cov =
false;
58 vector<vector<int>> group2Inds;
59 vector<string> groupNames;
60 map<string, vector<int>> groupName2Inds;
64 int init(map<string, string> &map);
70 void process(map<string, float> &explain_list)
const;
72 void process(map<string, float> &explain_list,
unsigned char *missing_value_mask)
const;
77 void post_deserialization();
80 static void read_feature_grouping(
const string &file_name,
const vector<string>& features, vector<vector<int>>& group2index,
81 vector<string>& group_names,
bool verbose =
true);
109 virtual void _init(map<string, string> &mapper) = 0;
111 unordered_map<string, const FeatureNormalizer *> feats_to_norm;
119 virtual int init(map<string, string> &mapper);
121 virtual int update(map<string, string>& mapper);
136 virtual void explain(
const MedFeatures &matrix, vector<map<string, float>> &sample_explain_reasons)
const = 0;
141 static void print_explain(
MedSample &smp,
int sort_mode = 0);
143 void dprint(
const string &pref)
const;
151enum TreeExplainerMode {
153 CONVERTED_TREES_IMPL = 1,
168 bool convert_qrf_trees();
169 bool convert_lightgbm_trees();
170 bool convert_xgb_trees();
171 void _init(map<string, string> &mapper);
173 bool try_convert_trees();
180 bool verbose =
false;
186 TreeExplainerMode get_mode()
const;
190 void explain(
const MedFeatures &matrix, vector<map<string, float>> &sample_explain_reasons)
const;
192 void post_deserialization();
211 void _init(map<string, string> &mapper);
213 float avg_bias_score;
225 string predictor_type;
247 void explain(
const MedFeatures &matrix, vector<map<string, float>> &sample_explain_reasons)
const;
264 unique_ptr<SamplesGenerator<float>> _sampler = NULL;
265 void *sampler_sampling_args = NULL;
270 float avg_bias_score;
272 void init_sampler(
bool with_sampler =
true);
274 void _init(map<string, string> &mapper);
287 void explain(
const MedFeatures &matrix, vector<map<string, float>> &sample_explain_reasons)
const;
289 void post_deserialization();
292 void load_GAN(
MedPredictor *original_pred,
const string &gan_path);
296 void dprint(
const string &pref)
const;
308 unique_ptr<SamplesGenerator<float>> _sampler = NULL;
309 void *sampler_sampling_args = NULL;
315 void init_sampler(
bool with_sampler =
true);
316 void _init(map<string, string> &mapper);
317 medial::shapley::LimeWeightMethod get_weight_method(
string method_s);
324 medial::shapley::LimeWeightMethod weighting = medial::shapley::LimeWeightSum;
331 void explain(
const MedFeatures &matrix, vector<map<string, float>> &sample_explain_reasons)
const;
334 void load_GAN(
MedPredictor *original_pred,
const string &gan_path);
338 void post_deserialization();
340 void dprint(
const string &pref)
const;
353 vector<float> average,
std;
356 void computeExplanation(vector<float> thisRow, map<string, float> &sample_explain_reasons, vector <vector<int>> knnGroups, vector<string> knnGroupNames)
const;
358 void _init(map<string, string> &mapper);
370 void explain(
const MedFeatures &matrix, vector<map<string, float>> &sample_explain_reasons)
const;
381 void _init(map<string, string> &mapper);
383 float avg_bias_score;
389 void explain(
const MedFeatures &matrix, vector<map<string, float>> &sample_explain_reasons)
const;
401 unique_ptr<SamplesGenerator<float>> _sampler = NULL;
402 void *sampler_sampling_args = NULL;
407 float avg_bias_score;
409 void init_sampler(
bool with_sampler =
true);
411 void _init(map<string, string> &mapper);
430 void explain(
const MedFeatures &matrix, vector<map<string, float>> &sample_explain_reasons)
const;
432 void post_deserialization();
435 void load_GAN(
MedPredictor *original_pred,
const string &gan_path);
439 void dprint(
const string &pref)
const;
MedAlgo - APIs to different algorithms: Linear Models, RF, GBM, KNN, and more.
@ FTR_POSTPROCESS_ITERATIVE_SET
"iterative_set" to create IterativeSetExplainer - model agnostic iterative explainer for model....
Definition PostProcessor.h:23
@ FTR_POSTPROCESS_LINEAR
"linear" to create LinearExplainer to explain linear model - importance is score change when putting ...
Definition PostProcessor.h:22
@ FTR_POSTPROCESS_LIME_SHAP
"lime_shap" to create LimeExplainer - model agnostic shapley algorithm with lime on shap values sampl...
Definition PostProcessor.h:20
@ FTR_POSTPROCESS_SHAPLEY
"shapley" to create ShapleyExplainer - model agnostic shapley explainer for model....
Definition PostProcessor.h:18
@ FTR_POSTPROCESS_KNN_EXPLAIN
"knn" Explainer built on knn principles KNN_Explainer
Definition PostProcessor.h:21
@ FTR_POSTPROCESS_TREE_SHAP
"tree_shap" to create TreeExplainer to explain tree mode or mimic generic model with trees model
Definition PostProcessor.h:17
#define ADD_SERIALIZATION_FUNCS(...)
Definition SerializableObject.h:122
#define MEDSERIALIZE_SUPPORT(Type)
Definition SerializableObject.h:108
A specific settings for binning feature.
Definition BinSplitOptimizer.h:37
Parameters for filtering explanations.
Definition ExplainWrapper.h:18
int max_count
maximal limit to take as explain features. 0 - no limit
Definition ExplainWrapper.h:21
void filter(map< string, float > &explain_list) const
commit filterings
Definition ExplainWrapper.cpp:44
float sum_ratio
percentage of sum of explain values to take from sort_mode. [0 - 1]
Definition ExplainWrapper.h:22
int init(map< string, string > &map)
Virtual to init object from parsed fields.
Definition ExplainWrapper.cpp:26
int sort_mode
0 - both pos and negative (sorted by abs), -1 - only negatives, +1 - only positives
Definition ExplainWrapper.h:20
Processings of explanations - grouping, Using covariance matrix for taking feature correlations into ...
Definition ExplainWrapper.h:39
bool learn_cov_matrix
If true will learn cov_matrix.
Definition ExplainWrapper.h:44
int zero_missing
if != 0 will throw bias terms and zero all contributions of missing values and groups of missing valu...
Definition ExplainWrapper.h:46
static void read_feature_grouping(const string &file_name, const vector< string > &features, vector< vector< int > > &group2index, vector< string > &group_names, bool verbose=true)
Creates the feature groups from the argument file_name and by existing features.
Definition ExplainWrapper.cpp:733
void process(map< string, float > &explain_list) const
commit processings
Definition ExplainWrapper.cpp:327
bool iterative
if true will add explainers iteratively, conditioned on those already selected
Definition ExplainWrapper.h:48
int init(map< string, string > &map)
Virtual to init object from parsed fields.
Definition ExplainWrapper.cpp:92
string grouping
grouping file or "BY_SIGNAL" keyword to group by signal or "BY_SIGNAL_CATEG" - for category signal to...
Definition ExplainWrapper.h:57
int normalize_vals
If != 0 will normalize contributions. 1: normalize by sum of (non b0) abs of all contributions 2: sam...
Definition ExplainWrapper.h:45
float get_group_normalized_contrib(const vector< int > &group_inds, vector< float > &contribs, float total_normalization_factor) const
helper func: returns the normalized contribution for a specific group given original contributions
Definition ExplainWrapper.cpp:295
BinSettings mutual_inf_bin_setting
the bin setting for mutual information
Definition ExplainWrapper.h:53
bool group_by_sum
If true will do grouping by sum of each feature, otherwise will use internal special implementation.
Definition ExplainWrapper.h:43
MedMat< float > abs_cov_features
absolute values of covariance features for matrix.either read from file (and then apply absolute valu...
Definition ExplainWrapper.h:55
int iteration_cnt
if >0 the maximal number of iterations
Definition ExplainWrapper.h:49
bool use_mutual_information
if true will use mutual information instead of covariance
Definition ExplainWrapper.h:52
void learn(const MedFeatures &train_mat)
Learns process - for example cov matrix.
Definition ExplainWrapper.cpp:183
bool keep_b0
if true will keep b0 prior
Definition ExplainWrapper.h:47
bool use_max_cov
If true will use max cov logic.
Definition ExplainWrapper.h:50
A gibbs sampler - has learn and create sample based on mask.
Definition GibbsSampler.h:89
A class that contains all sampling arguments.
Definition GibbsSampler.h:71
A wrapper class to hold all global arguments needed for ModelExplainer.
Definition ExplainWrapper.h:91
bool store_as_json
If true will store ButWhy output as json in string attributes.
Definition ExplainWrapper.h:94
bool denorm_features
If true will save feature values denorm.
Definition ExplainWrapper.h:95
string attr_name
attribute name for explainer
Definition ExplainWrapper.h:93
iterative set explainer with (gibbs, GAN or other samples generator) or proxy predictor algorithm to ...
Definition ExplainWrapper.h:399
int max_set_size
the size to look for to explain
Definition ExplainWrapper.h:424
void _learn(const MedFeatures &train_mat)
overload function for ModelExplainer - easier API
Definition ExplainWrapper.cpp:2900
float sort_params_a
weight for minimal distance from original score importance
Definition ExplainWrapper.h:420
int n_masks
how many test to conduct from shapley
Definition ExplainWrapper.h:416
float sort_params_k2
weight for variance in prediction using imputation. the rest is change from prev
Definition ExplainWrapper.h:423
float sort_params_k1
weight for minimal distance from original score importance
Definition ExplainWrapper.h:422
float missing_value
missing value
Definition ExplainWrapper.h:418
void explain(const MedFeatures &matrix, vector< map< string, float > > &sample_explain_reasons) const
Virtual - return explain results in sample_feature_contrib.
Definition ExplainWrapper.cpp:2905
string sampling_args
args for sampling
Definition ExplainWrapper.h:415
float sort_params_b
weight for variance in prediction using imputation. the rest is change from prev
Definition ExplainWrapper.h:421
GeneratorType gen_type
generator type
Definition ExplainWrapper.h:413
string generator_args
for learn
Definition ExplainWrapper.h:414
bool use_random_sampling
If True will use random sampling - otherwise will sample mask size and than create it.
Definition ExplainWrapper.h:417
KNN explainer.
Definition ExplainWrapper.h:350
float chosenThreshold
Threshold to use on scores. If missing use thresholdQ to define threshold.
Definition ExplainWrapper.h:363
float thresholdQ
defines threshold by positive ratio on training set ( when chosenThreshold missing)....
Definition ExplainWrapper.h:364
float fraction
fraction of points that is considered neighborhood to a point
Definition ExplainWrapper.h:362
int numClusters
how many samples (randomly chosen) represent the training space -1:all. If larger than size of matrix...
Definition ExplainWrapper.h:361
void explain(const MedFeatures &matrix, vector< map< string, float > > &sample_explain_reasons) const
Virtual - return explain results in sample_feature_contrib.
Definition ExplainWrapper.cpp:2729
void _learn(const MedFeatures &train_mat)
overload function for ModelExplainer - easier API
Definition ExplainWrapper.cpp:2659
shapley-Lime explainer with gibbs, GAN or other sampler generator
Definition ExplainWrapper.h:306
int n_masks
number of masks
Definition ExplainWrapper.h:325
void explain(const MedFeatures &matrix, vector< map< string, float > > &sample_explain_reasons) const
Virtual - return explain results in sample_feature_contrib.
Definition ExplainWrapper.cpp:2493
string generator_args
for learn
Definition ExplainWrapper.h:320
GeneratorType gen_type
generator type
Definition ExplainWrapper.h:319
float p_mask
prob for 1 in mask, if 0 - mask generation done by first selecting # of 1's in mask (uniformly) and t...
Definition ExplainWrapper.h:323
float missing_value
missing value
Definition ExplainWrapper.h:322
string sampling_args
args for sampling
Definition ExplainWrapper.h:321
void _learn(const MedFeatures &train_mat)
overload function for ModelExplainer - easier API
Definition ExplainWrapper.cpp:2489
Simple Linear Explainer - puts zeros for each feature and measures change in score.
Definition ExplainWrapper.h:379
void explain(const MedFeatures &matrix, vector< map< string, float > > &sample_explain_reasons) const
Virtual - return explain results in sample_feature_contrib.
Definition ExplainWrapper.cpp:2561
void _learn(const MedFeatures &train_mat)
overload function for ModelExplainer - easier API
Definition ExplainWrapper.cpp:2557
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
A model = repCleaner + featureGenerator + featureProcessor + MedPredictor.
Definition MedModel.h:56
Base Interface for predictor.
Definition MedAlgo.h:78
MedSample represents a signle sample: id + time (date) Additional (optinal) entries: outcome,...
Definition MedSamples.h:20
Shapely Explainer - Based on learning training data to handle missing_values as "correct" input.
Definition ExplainWrapper.h:207
string verbose_apply
If has value - output file.
Definition ExplainWrapper.h:227
float override_score_bias
when given will use it as score bias it train is very different from test
Definition ExplainWrapper.h:239
float missing_value
missing value
Definition ExplainWrapper.h:219
float sort_params_k1
weight for minimal distance from original score importance
Definition ExplainWrapper.h:236
float split_to_test
to report RMSE on this ratio > 0 and < 1
Definition ExplainWrapper.h:240
float sort_params_b
weight for variance in prediction using imputation. the rest is change from prev
Definition ExplainWrapper.h:235
int max_set_size
the size to look for to explain
Definition ExplainWrapper.h:238
int subsample_train
if not zero will use this to subsample original train sampels to this number
Definition ExplainWrapper.h:229
string predictor_args
arguments to change in predictor - for example to change it into regression
Definition ExplainWrapper.h:224
void _learn(const MedFeatures &train_mat)
overload function for ModelExplainer - easier API
Definition ExplainWrapper.cpp:1723
bool no_relearn
If true will use original model without relearn. assume original model is good enough for missing val...
Definition ExplainWrapper.h:216
bool verbose_learn
If true will print more in learn.
Definition ExplainWrapper.h:226
bool use_minimal_set
If true will use different method to find minimal set.
Definition ExplainWrapper.h:233
float sort_params_k2
weight for variance in prediction using imputation. the rest is change from prev
Definition ExplainWrapper.h:237
float max_weight
the maximal weight number. if < 0 no limit
Definition ExplainWrapper.h:228
void explain(const MedFeatures &matrix, vector< map< string, float > > &sample_explain_reasons) const
Virtual - return explain results in sample_feature_contrib.
Definition ExplainWrapper.cpp:2012
int limit_mask_size
if set will limit mask size in the train - usefull for minimal_set
Definition ExplainWrapper.h:230
int add_new_data
how many new data data points to add for train according to sample masks
Definition ExplainWrapper.h:215
int max_test
max number of samples in SHAP
Definition ExplainWrapper.h:218
float select_from_all
If max_test is beyond this percentage of all options than sample from all options (to speed up runtim...
Definition ExplainWrapper.h:221
bool sample_masks_with_repeats
Whether or not to sample masks with repeats.
Definition ExplainWrapper.h:220
bool use_shuffle
if not sampling uniformlly, If true will use shuffle (to speed up runtime)
Definition ExplainWrapper.h:223
bool uniform_rand
it True will sample masks uniformlly
Definition ExplainWrapper.h:222
float sort_params_a
weight for minimal distance from original score importance
Definition ExplainWrapper.h:234
An abstract class API for explainer.
Definition ExplainWrapper.h:106
void Apply(MedFeatures &matrix)
alias for explain
Definition ExplainWrapper.h:127
virtual int init(map< string, string > &mapper)
Global init for general args in all explainers. initialize directly all args in GlobalExplainerParams...
Definition ExplainWrapper.cpp:530
void get_input_fields(vector< Effected_Field > &fields) const
List of fields that are used by this post_processor.
Definition ExplainWrapper.cpp:595
ExplainFilters filters
general filters of results
Definition ExplainWrapper.h:114
void get_output_fields(vector< Effected_Field > &fields) const
List of fields that are being effected by this post_processor.
Definition ExplainWrapper.cpp:598
virtual void explain(const MedFeatures &matrix, vector< map< string, float > > &sample_explain_reasons) const =0
Virtual - return explain results in sample_feature_contrib.
void init_post_processor(MedModel &model)
Init ModelExplainer from MedModel - copies predictor pointer, might save normalizers pointers.
Definition ExplainWrapper.cpp:614
virtual void _learn(const MedFeatures &train_mat)=0
overload function for ModelExplainer - easier API
ExplainProcessings processing
processing of results, like groupings, COV
Definition ExplainWrapper.h:115
virtual int update(map< string, string > &mapper)
Virtual to update object from parsed fields.
Definition ExplainWrapper.cpp:558
virtual void Learn(const MedFeatures &train_mat)
Learns from predictor and train_matrix (PostProcessor API)
Definition ExplainWrapper.cpp:897
MedPredictor * original_predictor
predictor we're trying to explain
Definition ExplainWrapper.h:113
An Abstract PostProcessor class.
Definition PostProcessor.h:39
Abstract Random Samples generator.
Definition SamplesGenerator.h:34
Definition SerializableObject.h:32
shapley explainer with gibbs, GAN or other samples generator
Definition ExplainWrapper.h:262
void explain(const MedFeatures &matrix, vector< map< string, float > > &sample_explain_reasons) const
Virtual - return explain results in sample_feature_contrib.
Definition ExplainWrapper.cpp:2259
string sampling_args
args for sampling
Definition ExplainWrapper.h:278
float missing_value
missing value
Definition ExplainWrapper.h:281
bool use_random_sampling
If True will use random sampling - otherwise will sample mask size and than create it.
Definition ExplainWrapper.h:280
void _learn(const MedFeatures &train_mat)
overload function for ModelExplainer - easier API
Definition ExplainWrapper.cpp:2254
int n_masks
how many test to conduct from shapley
Definition ExplainWrapper.h:279
string generator_args
for learn
Definition ExplainWrapper.h:277
GeneratorType gen_type
generator type
Definition ExplainWrapper.h:276
A generic tree explainer:
Definition ExplainWrapper.h:163
string proxy_model_init
proxy predictor arguments
Definition ExplainWrapper.h:176
void _learn(const MedFeatures &train_mat)
overload function for ModelExplainer - easier API
Definition ExplainWrapper.cpp:1407
float missing_value
missing value
Definition ExplainWrapper.h:179
int approximate
if true will run SAABAS alg - which is faster
Definition ExplainWrapper.h:178
bool interaction_shap
If true will calc interaction_shap values (slower)
Definition ExplainWrapper.h:177
void explain(const MedFeatures &matrix, vector< map< string, float > > &sample_explain_reasons) const
Virtual - return explain results in sample_feature_contrib.
Definition ExplainWrapper.cpp:1460
string proxy_model_type
proxy predictor type to relearn original predictor output with tree models
Definition ExplainWrapper.h:175
void init_post_processor(MedModel &model)
Init ModelExplainer from MedModel - copies predictor pointer, might save normalizers pointers.
Definition ExplainWrapper.cpp:1402
Definition tree_shap.h:70