16#if defined(_WIN32) || defined(WIN32)
24#include <unordered_set>
25#include <MedProcessTools/MedProcessTools/MedFeatures.h>
27#include "SamplesGenerator.h"
37namespace FEATURE_DEPENDENCE {
38 const unsigned independent = 0;
39 const unsigned tree_path_dependent = 1;
40 const unsigned global_path_dependent = 2;
43namespace MODEL_TRANSFORM {
44 const unsigned identity = 0;
45 const unsigned logistic = 1;
46 const unsigned logistic_nlogloss = 2;
47 const unsigned squared_loss = 3;
63 unsigned M,
unsigned num_R,
unsigned num_Exp);
65 unsigned M,
unsigned num_R);
73 int *children_default;
77 tfloat *node_sample_weights;
86 TreeEnsemble(
int *children_left,
int *children_right,
int *children_default,
int *features,
87 tfloat *thresholds, tfloat *values, tfloat *node_sample_weights,
88 unsigned max_depth,
unsigned tree_limit, tfloat base_offset,
89 unsigned max_nodes,
unsigned num_outputs);
91 void get_tree(
TreeEnsemble &tree,
const unsigned i)
const;
93 void allocate(
unsigned tree_limit_in,
unsigned max_nodes_in,
unsigned num_outputs_in);
110 tfloat zero_fraction;
120 short cl, cr, cd, pnode, feat, pfeat;
134 tfloat *out_contribs, tfloat transform(
const tfloat,
const tfloat));
141 tfloat *out_contribs,
unsigned *feature_sets, tfloat transform(
const tfloat,
const tfloat));
160 tfloat *out_contribs,
161 tfloat transform(
const tfloat,
const tfloat));
171 tfloat *out_contribs, tfloat transform(
const tfloat,
const tfloat));
178 const int feature_dependence,
unsigned model_transform,
bool interactions);
180 const int feature_dependence,
unsigned model_transform,
bool interactions,
unsigned *feature_sets);
186 const int feature_dependence,
unsigned model_transform,
bool interactions,
unsigned *feature_sets,
bool verbose,
187 vector<string>& names,
const MedMat<float>& abs_cov_mat,
int iteration_cnt,
bool max_in_groups);
193 double nchoosek(
long n,
long k);
195 void list_all_options_binary(
int nfeats, vector<vector<bool>> &all_opts);
197 void generate_mask(vector<bool> &mask,
int nfeat, mt19937 &gen,
bool uniform_rand =
false,
bool use_shuffle =
true);
199 void generate_mask_(vector<bool> &mask,
int nfeat, mt19937 &gen,
bool uniform_rand =
false,
float uniform_rand_p = 0.5,
200 bool use_shuffle =
true,
int limit_zero_cnt = 0);
202 void sample_options_SHAP(
int nfeats, vector<vector<bool>> &all_opts,
int opt_count, mt19937 &gen,
bool with_repeats
203 ,
bool uniform_rand =
false,
bool use_shuffle =
true);
205 double get_c(
int p1,
int p2,
int end_l);
207 void explain_shapley(
const MedFeatures &matrix,
int selected_sample,
int max_tests,
208 MedPredictor *predictor,
float missing_value,
const vector<vector<int>>& group2index,
const vector<string> &groupNames,
209 vector<float> &features_coeff, mt19937 &gen,
bool sample_masks_with_repeats,
210 float select_from_all,
bool uniform_rand,
bool use_shuffle,
bool verbose);
212 template<
typename T>
void explain_shapley(
const MedFeatures &matrix,
int selected_sample,
int max_tests,
213 MedPredictor *predictor,
const vector<vector<int>>& group2index,
const vector<string> &groupNames,
214 const SamplesGenerator<T> &sampler_gen, mt19937 &rnd_gen,
int sample_per_row,
void *sampling_params,
215 vector<float> &features_coeff,
bool use_random_sample,
bool verbose =
false);
218 void explain_minimal_set(
const MedFeatures &matrix,
int selected_sample,
int max_tests,
219 MedPredictor *predictor,
float missing_value,
const vector<vector<int>>& group2index
220 , vector<float> &features_coeff, vector<float> &scores_history,
int max_set_size,
221 float baseline_score,
float param_all_alpha,
float param_all_beta,
222 float param_all_k1,
float param_all_k2,
bool verbose);
225 void explain_minimal_set(
const MedFeatures &matrix,
int selected_sample,
int max_tests,
226 MedPredictor *predictor,
float missing_value,
const vector<vector<int>>& group2index,
228 , vector<float> &features_coeff, vector<float> &scores_history,
int max_set_size,
229 float baseline_score,
float param_all_alpha,
float param_all_beta,
230 float param_all_k1,
float param_all_k2,
bool verbose);
235 LimeWeightUniform = 1,
244 void *params,
const vector<vector<int>>& group2index,
const vector<string>& group_names, vector<vector<float>>& alphas);
249 void *params,
const vector<vector<int>>& group2index,
const vector<string>& group_names, vector<vector<int>>& forced, vector<vector<float>>& alphas);
254 void *params,
const vector<vector<int>>& group2index,
const vector<string>& group_names,
const MedMat<float>& abs_cov_mat,
int iteration_cnt, vector<vector<float>>& alphas,
bool max_in_groups);
MedAlgo - APIs to different algorithms: Linear Models, RF, GBM, KNN, and more.
A class for holding features data as a virtual matrix
Definition MedFeatures.h:47
Base Interface for predictor.
Definition MedAlgo.h:78
Abstract Random Samples generator.
Definition SamplesGenerator.h:34
Definition tree_shap.py:1
Definition tree_shap.h:50
tfloat * X
vector of all data. each row is sample of all features for that sample. cols(2nd dim) are features
Definition tree_shap.h:51
unsigned M
Features count.
Definition tree_shap.h:57
unsigned num_X
number of samples
Definition tree_shap.h:56
unsigned num_R
number of explanation features (allowing for grouping)
Definition tree_shap.h:59
bool * X_missing
bool mask to return true on missing value on matrix - same structure as X
Definition tree_shap.h:52
tfloat * y
the labels
Definition tree_shap.h:53
Definition tree_shap.h:119
Definition tree_shap.h:108
Definition tree_shap.h:70