Base Interface for predictor. More...

#include <MedAlgo.h>

Inheritance diagram for MedPredictor:

Public Member Functions
virtual int	init (void *classifier_params)

int	init_from_string (string initialization_text)

virtual int	init (map< string, string > &mapper)
	Virtual to init object from parsed fields.

virtual int	set_params (map< string, string > &mapper)

virtual void	init_defaults ()

virtual int	Learn (float x, float y, const float *w, int n_samples, int n_ftrs)
	Learn should be implemented for each model.

virtual int	Predict (float x, float &preds, int n_samples, int n_ftrs) const
	Predict should be implemented for each model.

virtual void	print (FILE *fp, const string &prefix, int level=0) const

virtual int	n_preds_per_sample () const
	Number of predictions per sample. typically 1 - but some models return several per sample (for example a probability vector)

virtual int	denormalize_model (float f_avg, float f_std, float label_avg, float label_std)

int	learn (float x, float y, int nsamples, int nftrs)
	simple no weights call

virtual int	learn (MedMat< float > &x, MedMat< float > &y, const vector< float > &wgts)
	MedMat x,y : will transpose/normalize x,y if needed by algorithm The convention is that untransposed mats are always samples x features, and transposed are features x samples.

int	learn (MedMat< float > &x, MedMat< float > &y)
	MedMat x,y : will transpose/normalize x,y if needed by algorithm The convention is that untransposed mats are always samples x features, and transposed are features x samples.

int	learn (MedMat< float > &x, vector< float > &y, const vector< float > &wgts)
	MedMat x, vector y: will transpose normalize x if needed (y assumed to be normalized)

int	learn (MedMat< float > &x, vector< float > &y)
	MedMat x, vector y: will transpose normalize x if needed (y assumed to be normalized)

int	learn (vector< float > &x, vector< float > &y, const vector< float > &wgts, int n_samples, int n_ftrs)
	vector x,y: transpose/normalizations not done.

int	learn (vector< float > &x, vector< float > &y, int n_samples, int n_ftrs)
	vector x,y: transpose/normalizations not done.

virtual int	predict (MedMat< float > &x, vector< float > &preds) const

int	predict (vector< float > &x, vector< float > &preds, int n_samples, int n_ftrs) const

int	threaded_predict (MedMat< float > &x, vector< float > &preds, int nthreads) const

int	learn (const MedFeatures &features)

int	learn (const MedFeatures &features, vector< string > &names)

virtual int	predict (MedFeatures &features) const

virtual void	calc_feature_importance (vector< float > &features_importance_scores, const string &general_params)
	Feature Importance - assume called after learn.

virtual void	calc_feature_importance (vector< float > &features_importance_scores, const string &general_params, const MedFeatures *features)

virtual void	calc_feature_contribs (MedMat< float > &x, MedMat< float > &contribs)
	Feature contributions explains the prediction on each sample (aka BUT_WHY)

virtual void	calc_feature_contribs_conditional (MedMat< float > &mat_x_in, unordered_map< string, float > &contiditional_variables, MedMat< float > &mat_x_out, MedMat< float > &mat_contribs)

virtual void	export_predictor (const string &output_fname)

int	learn_prob_calibration (MedMat< float > &x, vector< float > &y, vector< float > &min_range, vector< float > &max_range, vector< float > &map_prob, int min_bucket_size=10000, float min_score_jump=0.001, float min_prob_jump=0.005, bool fix_prob_order=false)
	calibration for probability using training data

int	convert_scores_to_prob (const vector< float > &preds, const vector< float > &min_range, const vector< float > &max_range, const vector< float > &map_prob, vector< float > &probs) const
	If you have ran learn_prob_calibration before, you have min_range,max_range,map_prob from This function - that is used to convert preds to probs.

int	learn_prob_calibration (MedMat< float > &x, vector< float > &y, int poly_rank, vector< double > &params, int min_bucket_size=10000, float min_score_jump=0.001)
	Will create probability bins using Platt scale method.

template<class T , class L >
int	convert_scores_to_prob (const vector< T > &preds, const vector< double > &params, vector< L > &converted) const
	Converts probability from Platt scale model.

virtual bool	predict_single_not_implemented ()
	Prepartion function for fast prediction on single item each time.

virtual void	prepare_predict_single ()

virtual void	predict_single (const vector< float > &x, vector< float > &preds) const

virtual void	predict_single (const vector< double > &x, vector< double > &preds) const

virtual void	calc_feature_importance_shap (vector< float > &features_importance_scores, string &importance_type, const MedFeatures *features)

void *	new_polymorphic (string derived_class_name)
	for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived class given its type one needs to implement this function to return a new to the derived class given its type (as in my_type)

size_t	get_predictor_size ()

size_t	predictor_serialize (unsigned char *blob)

Public Member Functions inherited from SerializableObject
virtual int	version () const
	Relevant for serializations.

virtual string	my_class_name () const
	For better handling of serializations it is highly recommended that each SerializableObject inheriting class will implement the next method.

virtual void	serialized_fields_name (vector< string > &field_names) const
	The names of the serialized fields.

virtual void	pre_serialization ()

virtual void	post_deserialization ()

virtual size_t	get_size ()
	Gets bytes sizes for serializations.

virtual size_t	serialize (unsigned char *blob)
	Serialiazing object to blob memory. return number ob bytes wrote to memory.

virtual size_t	deserialize (unsigned char *blob)
	Deserialiazing blob to object. returns number of bytes read.

size_t	serialize_vec (vector< unsigned char > &blob)

size_t	deserialize_vec (vector< unsigned char > &blob)

virtual size_t	serialize (vector< unsigned char > &blob)

virtual size_t	deserialize (vector< unsigned char > &blob)

virtual int	read_from_file (const string &fname)
	read and deserialize model

virtual int	write_to_file (const string &fname)
	serialize model and write to file

virtual int	read_from_file_unsafe (const string &fname)
	read and deserialize model without checking version number - unsafe read

int	init_from_string (string init_string)
	Init from string.

int	init_params_from_file (string init_file)

int	init_param_from_file (string file_str, string &param)

int	update_from_string (const string &init_string)

virtual int	update (map< string, string > &map)
	Virtual to update object from parsed fields.

virtual string	object_json () const

Static Public Member Functions
static MedPredictor *	make_predictor (string model_type)

static MedPredictor *	make_predictor (MedPredictorTypes model_type)

static MedPredictor *	make_predictor (string model_type, string params)

static MedPredictor *	make_predictor (MedPredictorTypes model_type, string params)

Data Fields
MedPredictorTypes	classifier_type
	The Predicotr enum type.

bool	transpose_for_learn
	True if need to transpose before learn.

bool	normalize_for_learn
	True if need to normalize before learn.

bool	normalize_y_for_learn
	True if need to normalize labels before learn.

bool	transpose_for_predict
	True if need to transpose before predict.

bool	normalize_for_predict
	True if need to normalize before predict.

vector< string >	model_features
	The model features used in Learn, to validate when caling predict.

int	features_count = 0
	The model features count used in Learn, to validate when caling predict.

Protected Member Functions
void	prepare_x_mat (MedMat< float > &x, const vector< float > &wgts, int &nsamples, int &nftrs, bool transpose_needed) const

void	predict_thread (void *p) const

Detailed Description

Base Interface for predictor.

Member Function Documentation

◆ calc_feature_contribs()

virtual void MedPredictor::calc_feature_contribs	(	MedMat< float > &	x,
		MedMat< float > &	contribs
	)

inlinevirtual

Feature contributions explains the prediction on each sample (aka BUT_WHY)

Reimplemented in MedGDLM, MedLightGBM, MedLM, and MedXGB.

◆ init()

int MedPredictor::init ( map< string, string > & map )

virtual

Virtual to init object from parsed fields.

Reimplemented from SerializableObject.

Reimplemented in MedLightGBM, MedBooster, MedLinearModel, MedMicNet, MedPredictorsByMissingValues, and MedTQRF.

◆ init_defaults()

virtual void MedPredictor::init_defaults ( )

inlinevirtual

Reimplemented in MedBART.

◆ Learn()

virtual int MedPredictor::Learn	(	float *	x,
		float *	y,
		const float *	w,
		int	n_samples,
		int	n_ftrs
	)

inlinevirtual

Learn should be implemented for each model.

This API always assumes the data is already normalized/transposed as needed, and never changes data in x,y,w. method should support calling with w=NULL.

Reimplemented in MedExternalNN, MedBART, MedBooster, MedBP, MedDeepBit, MedGDLM, MedKNN, MedLightGBM, MedLinearModel, MedLM, MedLasso, MedMicNet, MedMultiClass, MedQRF, MedSpecificGroupModels, MedSvm, MedTQRF, MedVW, MedXGB, and MedSimpleEnsemble.

◆ learn()

int MedPredictor::learn	(	MedMat< float > &	x,
		MedMat< float > &	y,
		const vector< float > &	wgts
	)

virtual

MedMat x,y : will transpose/normalize x,y if needed by algorithm The convention is that untransposed mats are always samples x features, and transposed are features x samples.

Reimplemented in MedExternalNN, MedPredictorsByMissingValues, and MedSimpleEnsemble.

◆ learn_prob_calibration() [1/2]

int MedPredictor::learn_prob_calibration	(	MedMat< float > &	x,
		vector< float > &	y,
		int	poly_rank,
		vector< double > &	params,
		int	min_bucket_size = `10000`,
		float	min_score_jump = `0.001`
	)

Will create probability bins using Platt scale method.

Parameters

x	The training matrix
y	The Labels
poly_rank	the polynom rank for the Platt scale fit
min_bucket_size	The minimal observations to create probability bin
min_score_jump	The minimal diff in scores to create bin

Returns

params Stores the Platt scale model params for conversion

◆ learn_prob_calibration() [2/2]

int MedPredictor::learn_prob_calibration	(	MedMat< float > &	x,
		vector< float > &	y,
		vector< float > &	min_range,
		vector< float > &	max_range,
		vector< float > &	map_prob,
		int	min_bucket_size = `10000`,
		float	min_score_jump = `0.001`,
		float	min_prob_jump = `0.005`,
		bool	fix_prob_order = `false`
	)

calibration for probability using training data

Parameters

x	The training matrix
y	The Labels
min_bucket_size	The minimal observations to create probability bin
min_score_jump	The minimal diff in scores to create bin
min_prob_jump	The minimal diff in probabilties to create bin
fix_prob_order	If true will unite bins that are sorted in wrong way

Returns

min_range - writes a corresponding vector with minimal score range

max_range - writes a corresponding vector with maximal score range

map_prob - writes a corresponding vector with probability for score range

◆ n_preds_per_sample()

virtual int MedPredictor::n_preds_per_sample ( ) const

inlinevirtual

Number of predictions per sample. typically 1 - but some models return several per sample (for example a probability vector)

Reimplemented in MedExternalNN, MedLightGBM, MedMicNet, MedMultiClass, MedQRF, and MedXGB.

◆ new_polymorphic()

void * MedPredictor::new_polymorphic ( string derived_name )

virtual

for polymorphic classes that want to be able to serialize/deserialize a pointer * to the derived class given its type one needs to implement this function to return a new to the derived class given its type (as in my_type)

Reimplemented from SerializableObject.

◆ Predict()

virtual int MedPredictor::Predict	(	float *	x,
		float *&	preds,
		int	n_samples,
		int	n_ftrs
	)		const

inlinevirtual

Predict should be implemented for each model.

This API assumes x is normalized/transposed if needed. preds should either be pre-allocated or NULL - in which case the predictor should allocate it to the right size.

Reimplemented in MedBART, MedBooster, MedBP, MedDeepBit, MedGDLM, MedKNN, MedLightGBM, MedLinearModel, MedLM, MedLasso, MedMicNet, MedMultiClass, MedQRF, MedSpecificGroupModels, MedSvm, MedTQRF, MedVW, and MedXGB.

◆ predict_single_not_implemented()

virtual bool MedPredictor::predict_single_not_implemented ( )

inlinevirtual

Prepartion function for fast prediction on single item each time.

Reimplemented in MedExternalNN, and MedLM.

◆ set_params()

virtual int MedPredictor::set_params ( map< string, string > & mapper )

inlinevirtual

Reimplemented in MedDeepBit, MedXGB, MedBART, MedBP, MedGDLM, MedKNN, MedLinearModel, MedLM, MedLasso, MedQRF, MedSvm, and MedSimpleEnsemble.

Field Documentation

◆ features_count

int MedPredictor::features_count = 0

The model features count used in Learn, to validate when caling predict.

used if model_features is empty because feature names aren't availabe during learn

The documentation for this class was generated from the following files:

Internal/MedAlgo/MedAlgo/MedAlgo.h
Internal/MedAlgo/MedAlgo/MedAlgo.cpp

Public Member Functions

Static Public Member Functions

Data Fields

Protected Member Functions

Detailed Description

Member Function Documentation

◆ calc_feature_contribs()

◆ init()

◆ init_defaults()

◆ Learn()

◆ learn()

◆ learn_prob_calibration() [1/2]

◆ learn_prob_calibration() [2/2]

◆ n_preds_per_sample()

◆ new_polymorphic()

◆ Predict()

◆ predict_single_not_implemented()

◆ set_params()

Field Documentation

◆ features_count