Medial Code Documentation
|
Base Interface for predictor. More...
#include <MedAlgo.h>
Public Member Functions | |
virtual int | init (void *classifier_params) |
int | init_from_string (string initialization_text) |
virtual int | init (map< string, string > &mapper) |
Virtual to init object from parsed fields. | |
virtual int | set_params (map< string, string > &mapper) |
virtual void | init_defaults () |
virtual int | Learn (float *x, float *y, const float *w, int n_samples, int n_ftrs) |
Learn should be implemented for each model. | |
virtual int | Predict (float *x, float *&preds, int n_samples, int n_ftrs) const |
Predict should be implemented for each model. | |
virtual void | print (FILE *fp, const string &prefix, int level=0) const |
virtual int | n_preds_per_sample () const |
Number of predictions per sample. typically 1 - but some models return several per sample (for example a probability vector) | |
virtual int | denormalize_model (float *f_avg, float *f_std, float label_avg, float label_std) |
int | learn (float *x, float *y, int nsamples, int nftrs) |
simple no weights call | |
virtual int | learn (MedMat< float > &x, MedMat< float > &y, const vector< float > &wgts) |
MedMat x,y : will transpose/normalize x,y if needed by algorithm The convention is that untransposed mats are always samples x features, and transposed are features x samples. | |
int | learn (MedMat< float > &x, MedMat< float > &y) |
MedMat x,y : will transpose/normalize x,y if needed by algorithm The convention is that untransposed mats are always samples x features, and transposed are features x samples. | |
int | learn (MedMat< float > &x, vector< float > &y, const vector< float > &wgts) |
MedMat x, vector y: will transpose normalize x if needed (y assumed to be normalized) | |
int | learn (MedMat< float > &x, vector< float > &y) |
MedMat x, vector y: will transpose normalize x if needed (y assumed to be normalized) | |
int | learn (vector< float > &x, vector< float > &y, const vector< float > &wgts, int n_samples, int n_ftrs) |
vector x,y: transpose/normalizations not done. | |
int | learn (vector< float > &x, vector< float > &y, int n_samples, int n_ftrs) |
vector x,y: transpose/normalizations not done. | |
virtual int | predict (MedMat< float > &x, vector< float > &preds) const |
int | predict (vector< float > &x, vector< float > &preds, int n_samples, int n_ftrs) const |
int | threaded_predict (MedMat< float > &x, vector< float > &preds, int nthreads) const |
int | learn (const MedFeatures &features) |
int | learn (const MedFeatures &features, vector< string > &names) |
virtual int | predict (MedFeatures &features) const |
virtual void | calc_feature_importance (vector< float > &features_importance_scores, const string &general_params) |
Feature Importance - assume called after learn. | |
virtual void | calc_feature_importance (vector< float > &features_importance_scores, const string &general_params, const MedFeatures *features) |
virtual void | calc_feature_contribs (MedMat< float > &x, MedMat< float > &contribs) |
Feature contributions explains the prediction on each sample (aka BUT_WHY) | |
virtual void | calc_feature_contribs_conditional (MedMat< float > &mat_x_in, unordered_map< string, float > &contiditional_variables, MedMat< float > &mat_x_out, MedMat< float > &mat_contribs) |
virtual void | export_predictor (const string &output_fname) |
int | learn_prob_calibration (MedMat< float > &x, vector< float > &y, vector< float > &min_range, vector< float > &max_range, vector< float > &map_prob, int min_bucket_size=10000, float min_score_jump=0.001, float min_prob_jump=0.005, bool fix_prob_order=false) |
calibration for probability using training data | |
int | convert_scores_to_prob (const vector< float > &preds, const vector< float > &min_range, const vector< float > &max_range, const vector< float > &map_prob, vector< float > &probs) const |
If you have ran learn_prob_calibration before, you have min_range,max_range,map_prob from This function - that is used to convert preds to probs. | |
int | learn_prob_calibration (MedMat< float > &x, vector< float > &y, int poly_rank, vector< double > ¶ms, int min_bucket_size=10000, float min_score_jump=0.001) |
Will create probability bins using Platt scale method. | |
template<class T , class L > | |
int | convert_scores_to_prob (const vector< T > &preds, const vector< double > ¶ms, vector< L > &converted) const |
Converts probability from Platt scale model. | |
virtual bool | predict_single_not_implemented () |
Prepartion function for fast prediction on single item each time. | |
virtual void | prepare_predict_single () |
virtual void | predict_single (const vector< float > &x, vector< float > &preds) const |
virtual void | predict_single (const vector< double > &x, vector< double > &preds) const |
virtual void | calc_feature_importance_shap (vector< float > &features_importance_scores, string &importance_type, const MedFeatures *features) |
void * | new_polymorphic (string derived_class_name) |
for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived class given its type one needs to implement this function to return a new to the derived class given its type (as in my_type) | |
size_t | get_predictor_size () |
size_t | predictor_serialize (unsigned char *blob) |
![]() | |
virtual int | version () const |
Relevant for serializations. | |
virtual string | my_class_name () const |
For better handling of serializations it is highly recommended that each SerializableObject inheriting class will implement the next method. | |
virtual void | serialized_fields_name (vector< string > &field_names) const |
The names of the serialized fields. | |
virtual void | pre_serialization () |
virtual void | post_deserialization () |
virtual size_t | get_size () |
Gets bytes sizes for serializations. | |
virtual size_t | serialize (unsigned char *blob) |
Serialiazing object to blob memory. return number ob bytes wrote to memory. | |
virtual size_t | deserialize (unsigned char *blob) |
Deserialiazing blob to object. returns number of bytes read. | |
size_t | serialize_vec (vector< unsigned char > &blob) |
size_t | deserialize_vec (vector< unsigned char > &blob) |
virtual size_t | serialize (vector< unsigned char > &blob) |
virtual size_t | deserialize (vector< unsigned char > &blob) |
virtual int | read_from_file (const string &fname) |
read and deserialize model | |
virtual int | write_to_file (const string &fname) |
serialize model and write to file | |
virtual int | read_from_file_unsafe (const string &fname) |
read and deserialize model without checking version number - unsafe read | |
int | init_from_string (string init_string) |
Init from string. | |
int | init_params_from_file (string init_file) |
int | init_param_from_file (string file_str, string ¶m) |
int | update_from_string (const string &init_string) |
virtual int | update (map< string, string > &map) |
Virtual to update object from parsed fields. | |
virtual string | object_json () const |
Static Public Member Functions | |
static MedPredictor * | make_predictor (string model_type) |
static MedPredictor * | make_predictor (MedPredictorTypes model_type) |
static MedPredictor * | make_predictor (string model_type, string params) |
static MedPredictor * | make_predictor (MedPredictorTypes model_type, string params) |
Data Fields | |
MedPredictorTypes | classifier_type |
The Predicotr enum type. | |
bool | transpose_for_learn |
True if need to transpose before learn. | |
bool | normalize_for_learn |
True if need to normalize before learn. | |
bool | normalize_y_for_learn |
True if need to normalize labels before learn. | |
bool | transpose_for_predict |
True if need to transpose before predict. | |
bool | normalize_for_predict |
True if need to normalize before predict. | |
vector< string > | model_features |
The model features used in Learn, to validate when caling predict. | |
int | features_count = 0 |
The model features count used in Learn, to validate when caling predict. | |
Protected Member Functions | |
void | prepare_x_mat (MedMat< float > &x, const vector< float > &wgts, int &nsamples, int &nftrs, bool transpose_needed) const |
void | predict_thread (void *p) const |
Base Interface for predictor.
|
inlinevirtual |
Feature contributions explains the prediction on each sample (aka BUT_WHY)
Reimplemented in MedGDLM, MedLightGBM, MedLM, and MedXGB.
|
virtual |
Virtual to init object from parsed fields.
Reimplemented from SerializableObject.
Reimplemented in MedLightGBM, MedBooster, MedLinearModel, MedMicNet, MedPredictorsByMissingValues, and MedTQRF.
|
inlinevirtual |
Reimplemented in MedBART.
|
inlinevirtual |
Learn should be implemented for each model.
This API always assumes the data is already normalized/transposed as needed, and never changes data in x,y,w. method should support calling with w=NULL.
Reimplemented in MedExternalNN, MedBART, MedBooster, MedBP, MedDeepBit, MedGDLM, MedKNN, MedLightGBM, MedLinearModel, MedLM, MedLasso, MedMicNet, MedMultiClass, MedQRF, MedSpecificGroupModels, MedSvm, MedTQRF, MedVW, MedXGB, and MedSimpleEnsemble.
|
virtual |
MedMat x,y : will transpose/normalize x,y if needed by algorithm The convention is that untransposed mats are always samples x features, and transposed are features x samples.
Reimplemented in MedExternalNN, MedPredictorsByMissingValues, and MedSimpleEnsemble.
int MedPredictor::learn_prob_calibration | ( | MedMat< float > & | x, |
vector< float > & | y, | ||
int | poly_rank, | ||
vector< double > & | params, | ||
int | min_bucket_size = 10000 , |
||
float | min_score_jump = 0.001 |
||
) |
Will create probability bins using Platt scale method.
x | The training matrix |
y | The Labels |
poly_rank | the polynom rank for the Platt scale fit |
min_bucket_size | The minimal observations to create probability bin |
min_score_jump | The minimal diff in scores to create bin |
params Stores the Platt scale model params for conversion
int MedPredictor::learn_prob_calibration | ( | MedMat< float > & | x, |
vector< float > & | y, | ||
vector< float > & | min_range, | ||
vector< float > & | max_range, | ||
vector< float > & | map_prob, | ||
int | min_bucket_size = 10000 , |
||
float | min_score_jump = 0.001 , |
||
float | min_prob_jump = 0.005 , |
||
bool | fix_prob_order = false |
||
) |
calibration for probability using training data
x | The training matrix |
y | The Labels |
min_bucket_size | The minimal observations to create probability bin |
min_score_jump | The minimal diff in scores to create bin |
min_prob_jump | The minimal diff in probabilties to create bin |
fix_prob_order | If true will unite bins that are sorted in wrong way |
min_range - writes a corresponding vector with minimal score range
max_range - writes a corresponding vector with maximal score range
map_prob - writes a corresponding vector with probability for score range
|
inlinevirtual |
Number of predictions per sample. typically 1 - but some models return several per sample (for example a probability vector)
Reimplemented in MedExternalNN, MedLightGBM, MedMicNet, MedMultiClass, MedQRF, and MedXGB.
|
virtual |
for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived class given its type one needs to implement this function to return a new to the derived class given its type (as in my_type)
Reimplemented from SerializableObject.
|
inlinevirtual |
Predict should be implemented for each model.
This API assumes x is normalized/transposed if needed. preds should either be pre-allocated or NULL - in which case the predictor should allocate it to the right size.
Reimplemented in MedBART, MedBooster, MedBP, MedDeepBit, MedGDLM, MedKNN, MedLightGBM, MedLinearModel, MedLM, MedLasso, MedMicNet, MedMultiClass, MedQRF, MedSpecificGroupModels, MedSvm, MedTQRF, MedVW, and MedXGB.
|
inlinevirtual |
Prepartion function for fast prediction on single item each time.
Reimplemented in MedExternalNN, and MedLM.
|
inlinevirtual |
Reimplemented in MedDeepBit, MedXGB, MedBART, MedBP, MedGDLM, MedKNN, MedLinearModel, MedLM, MedLasso, MedQRF, MedSvm, and MedSimpleEnsemble.
int MedPredictor::features_count = 0 |
The model features count used in Learn, to validate when caling predict.
used if model_features is empty because feature names aren't availabe during learn