Medial Code Documentation
Loading...
Searching...
No Matches
Public Member Functions | Data Fields | Protected Member Functions
xgboost.core.Booster Class Reference

Public Member Functions

None __init__ (self, Optional[BoosterParam] params=None, Optional[Sequence[DMatrix]] cache=None, Optional[Union["Booster", bytearray, os.PathLike, str]] model_file=None)
 
None __del__ (self)
 
Dict __getstate__ (self)
 
None __setstate__ (self, Dict state)
 
"Booster" __getitem__ (self, Union[int, tuple, slice] val)
 
Generator["Booster", None, None] __iter__ (self)
 
str save_config (self)
 
None load_config (self, str config)
 
"Booster" __copy__ (self)
 
"Booster" __deepcopy__ (self, Any _)
 
"Booster" copy (self)
 
Optional[str] attr (self, str key)
 
Dict[str, Optional[str]] attributes (self)
 
None set_attr (self, **Optional[Any] kwargs)
 
Optional[FeatureTypes] feature_types (self)
 
None feature_types (self, Optional[FeatureTypes] features)
 
Optional[FeatureNames] feature_names (self)
 
None feature_names (self, Optional[FeatureNames] features)
 
None set_param (self, Union[Dict, Iterable[Tuple[str, Any]], str] params, Optional[str] value=None)
 
None update (self, DMatrix dtrain, int iteration, Optional[Objective] fobj=None)
 
None boost (self, DMatrix dtrain, np.ndarray grad, np.ndarray hess)
 
str eval_set (self, Sequence[Tuple[DMatrix, str]] evals, int iteration=0, Optional[Metric] feval=None, bool output_margin=True)
 
str eval (self, DMatrix data, str name="eval", int iteration=0)
 
np.ndarray predict (self, DMatrix data, bool output_margin=False, bool pred_leaf=False, bool pred_contribs=False, bool approx_contribs=False, bool pred_interactions=False, bool validate_features=True, bool training=False, Tuple[int, int] iteration_range=(0, 0), bool strict_shape=False)
 
NumpyOrCupy inplace_predict (self, DataType data, Tuple[int, int] iteration_range=(0, 0), str predict_type="value", float missing=np.nan, bool validate_features=True, Any base_margin=None, bool strict_shape=False)
 
None save_model (self, Union[str, os.PathLike] fname)
 
bytearray save_raw (self, str raw_format="deprecated")
 
None load_model (self, ModelIn fname)
 
int best_iteration (self)
 
None best_iteration (self, int iteration)
 
float best_score (self)
 
None best_score (self, int score)
 
int num_boosted_rounds (self)
 
int num_features (self)
 
None dump_model (self, Union[str, os.PathLike] fout, Union[str, os.PathLike] fmap="", bool with_stats=False, str dump_format="text")
 
List[str] get_dump (self, Union[str, os.PathLike] fmap="", bool with_stats=False, str dump_format="text")
 
Dict[str, Union[float, List[float]]] get_fscore (self, Union[str, os.PathLike] fmap="")
 
Dict[str, Union[float, List[float]]] get_score (self, Union[str, os.PathLike] fmap="", str importance_type="weight")
 
DataFrame trees_to_dataframe (self, Union[str, os.PathLike] fmap="")
 
Union[np.ndarray, DataFrame] get_split_value_histogram (self, str feature, Union[os.PathLike, str] fmap="", Optional[int] bins=None, bool as_pandas=True)
 

Data Fields

 handle
 
 feature_names
 
 feature_types
 

Protected Member Functions

Union[Tuple[int,...], str] _transform_monotone_constrains (self, Union[Dict[str, int], str, Tuple[int,...]] value)
 
Union[str, List[List[int]]] _transform_interaction_constraints (self, Union[Sequence[Sequence[str]], str] value)
 
BoosterParam _configure_constraints (self, BoosterParam params)
 
Optional[FeatureInfo] _get_feature_info (self, str field)
 
None _set_feature_info (self, Optional[FeatureInfo] features, str field)
 
None _assign_dmatrix_features (self, DMatrix data)
 
None _validate_features (self, Optional[FeatureNames] feature_names)
 

Detailed Description

A Booster of XGBoost.

Booster is the model of xgboost, that contains low level routines for
training, prediction and evaluation.

Constructor & Destructor Documentation

◆ __init__()

None xgboost.core.Booster.__init__ (   self,
Optional[BoosterParam]   params = None,
Optional[Sequence[DMatrix]]   cache = None,
Optional[Union["Booster", bytearray, os.PathLike, str]]   model_file = None 
)
Parameters
----------
params :
    Parameters for boosters.
cache :
    List of cache items.
model_file :
    Path to the model file if it's string or PathLike.

Member Function Documentation

◆ __deepcopy__()

"Booster" xgboost.core.Booster.__deepcopy__ (   self,
Any  _ 
)
Return a copy of booster.

◆ __getitem__()

"Booster" xgboost.core.Booster.__getitem__ (   self,
Union[int, tuple, slice]  val 
)
Get a slice of the tree-based model.

.. versionadded:: 1.3.0

◆ __iter__()

Generator["Booster", None, None] xgboost.core.Booster.__iter__ (   self)
Iterator method for getting individual trees.

.. versionadded:: 2.0.0

◆ attr()

Optional[str] xgboost.core.Booster.attr (   self,
str  key 
)
Get attribute string from the Booster.

Parameters
----------
key :
    The key to get attribute from.

Returns
-------
value :
    The attribute value of the key, returns None if attribute do not exist.

◆ attributes()

Dict[str, Optional[str]] xgboost.core.Booster.attributes (   self)
Get attributes stored in the Booster as a dictionary.

Returns
-------
result : dictionary of  attribute_name: attribute_value pairs of strings.
    Returns an empty dict if there's no attributes.

◆ best_iteration()

int xgboost.core.Booster.best_iteration (   self)
The best iteration during training.

◆ best_score()

float xgboost.core.Booster.best_score (   self)
The best evaluation score during training.

◆ boost()

None xgboost.core.Booster.boost (   self,
DMatrix  dtrain,
np.ndarray  grad,
np.ndarray  hess 
)
Boost the booster for one iteration, with customized gradient
statistics.  Like :py:func:`xgboost.Booster.update`, this
function should not be called directly by users.

Parameters
----------
dtrain :
    The training DMatrix.
grad :
    The first order of gradient.
hess :
    The second order of gradient.

◆ copy()

"Booster" xgboost.core.Booster.copy (   self)
Copy the booster object.

Returns
-------
booster :
    A copied booster model

◆ dump_model()

None xgboost.core.Booster.dump_model (   self,
Union[str, os.PathLike]  fout,
Union[str, os.PathLike]   fmap = "",
bool   with_stats = False,
str   dump_format = "text" 
)
Dump model into a text or JSON file.  Unlike :py:meth:`save_model`, the
output format is primarily used for visualization or interpretation,
hence it's more human readable but cannot be loaded back to XGBoost.

Parameters
----------
fout :
    Output file name.
fmap :
    Name of the file containing feature map names.
with_stats :
    Controls whether the split statistics are output.
dump_format :
    Format of model dump file. Can be 'text' or 'json'.

◆ eval()

str xgboost.core.Booster.eval (   self,
DMatrix  data,
str   name = "eval",
int   iteration = 0 
)
Evaluate the model on mat.

Parameters
----------
data :
    The dmatrix storing the input.

name :
    The name of the dataset.

iteration :
    The current iteration number.

Returns
-------
result: str
    Evaluation result string.

◆ eval_set()

str xgboost.core.Booster.eval_set (   self,
Sequence[Tuple[DMatrix, str]]  evals,
int   iteration = 0,
Optional[Metric]   feval = None,
bool   output_margin = True 
)
Evaluate a set of data.

Parameters
----------
evals :
    List of items to be evaluated.
iteration :
    Current iteration.
feval :
    Custom evaluation function.

Returns
-------
result: str
    Evaluation result string.

◆ feature_names()

Optional[FeatureNames] xgboost.core.Booster.feature_names (   self)
Feature names for this booster.  Can be directly set by input data or by
assignment.

◆ feature_types()

Optional[FeatureTypes] xgboost.core.Booster.feature_types (   self)
Feature types for this booster.  Can be directly set by input data or by
assignment.  See :py:class:`DMatrix` for details.

◆ get_dump()

List[str] xgboost.core.Booster.get_dump (   self,
Union[str, os.PathLike]   fmap = "",
bool   with_stats = False,
str   dump_format = "text" 
)
Returns the model dump as a list of strings.  Unlike :py:meth:`save_model`, the output
format is primarily used for visualization or interpretation, hence it's more
human readable but cannot be loaded back to XGBoost.

Parameters
----------
fmap :
    Name of the file containing feature map names.
with_stats :
    Controls whether the split statistics are output.
dump_format :
    Format of model dump. Can be 'text', 'json' or 'dot'.

◆ get_fscore()

Dict[str, Union[float, List[float]]] xgboost.core.Booster.get_fscore (   self,
Union[str, os.PathLike]   fmap = "" 
)
Get feature importance of each feature.

.. note:: Zero-importance features will not be included

   Keep in mind that this function does not include zero-importance feature, i.e.
   those features that have not been used in any split conditions.

Parameters
----------
fmap :
   The name of feature map file

◆ get_score()

Dict[str, Union[float, List[float]]] xgboost.core.Booster.get_score (   self,
Union[str, os.PathLike]   fmap = "",
str   importance_type = "weight" 
)
Get feature importance of each feature.
For tree model Importance type can be defined as:

* 'weight': the number of times a feature is used to split the data across all trees.
* 'gain': the average gain across all splits the feature is used in.
* 'cover': the average coverage across all splits the feature is used in.
* 'total_gain': the total gain across all splits the feature is used in.
* 'total_cover': the total coverage across all splits the feature is used in.

.. note::

   For linear model, only "weight" is defined and it's the normalized coefficients
   without bias.

.. note:: Zero-importance features will not be included

   Keep in mind that this function does not include zero-importance feature, i.e.
   those features that have not been used in any split conditions.

Parameters
----------
fmap :
   The name of feature map file.
importance_type :
    One of the importance types defined above.

Returns
-------
A map between feature names and their scores.  When `gblinear` is used for
multi-class classification the scores for each feature is a list with length
`n_classes`, otherwise they're scalars.

◆ get_split_value_histogram()

Union[np.ndarray, DataFrame] xgboost.core.Booster.get_split_value_histogram (   self,
str  feature,
Union[os.PathLike, str]   fmap = "",
Optional[int]   bins = None,
bool   as_pandas = True 
)
Get split value histogram of a feature

Parameters
----------
feature :
    The name of the feature.
fmap:
    The name of feature map file.
bin :
    The maximum number of bins.
    Number of bins equals number of unique split values n_unique,
    if bins == None or bins > n_unique.
as_pandas :
    Return pd.DataFrame when pandas is installed.
    If False or pandas is not installed, return numpy ndarray.

Returns
-------
a histogram of used splitting values for the specified feature
either as numpy array or pandas DataFrame.

◆ inplace_predict()

NumpyOrCupy xgboost.core.Booster.inplace_predict (   self,
DataType  data,
Tuple[int, int]   iteration_range = (0, 0),
str   predict_type = "value",
float   missing = np.nan,
bool   validate_features = True,
Any   base_margin = None,
bool   strict_shape = False 
)
Run prediction in-place when possible, Unlike :py:meth:`predict` method,
inplace prediction does not cache the prediction result.

Calling only ``inplace_predict`` in multiple threads is safe and lock
free.  But the safety does not hold when used in conjunction with other
methods. E.g. you can't train the booster in one thread and perform
prediction in the other.

.. note::

    If the device ordinal of the input data doesn't match the one configured for
    the booster, data will be copied to the booster device.

.. code-block:: python

    booster.set_param({"device": "cuda:0"})
    booster.inplace_predict(cupy_array)

    booster.set_param({"device": "cpu"})
    booster.inplace_predict(numpy_array)

.. versionadded:: 1.1.0

Parameters
----------
data :
    The input data.
iteration_range :
    See :py:meth:`predict` for details.
predict_type :
    * `value` Output model prediction values.
    * `margin` Output the raw untransformed margin value.
missing :
    See :py:obj:`xgboost.DMatrix` for details.
validate_features:
    See :py:meth:`xgboost.Booster.predict` for details.
base_margin:
    See :py:obj:`xgboost.DMatrix` for details.

    .. versionadded:: 1.4.0

strict_shape:
    See :py:meth:`xgboost.Booster.predict` for details.

    .. versionadded:: 1.4.0

Returns
-------
prediction : numpy.ndarray/cupy.ndarray
    The prediction result.  When input data is on GPU, prediction result is
    stored in a cupy array.

◆ load_config()

None xgboost.core.Booster.load_config (   self,
str  config 
)
Load configuration returned by `save_config`.

.. versionadded:: 1.0.0

◆ load_model()

None xgboost.core.Booster.load_model (   self,
ModelIn  fname 
)
Load the model from a file or bytearray. Path to file can be local
or as an URI.

The model is loaded from XGBoost format which is universal among the various
XGBoost interfaces. Auxiliary attributes of the Python Booster object (such as
feature_names) will not be loaded when using binary format.  To save those
attributes, use JSON/UBJ instead.  See :doc:`Model IO </tutorials/saving_model>`
for more info.

.. code-block:: python

  model.load_model("model.json")
  # or
  model.load_model("model.ubj")

Parameters
----------
fname :
    Input file name or memory buffer(see also save_raw)

◆ num_boosted_rounds()

int xgboost.core.Booster.num_boosted_rounds (   self)
Get number of boosted rounds.  For gblinear this is reset to 0 after
serializing the model.

◆ num_features()

int xgboost.core.Booster.num_features (   self)
Number of features in booster.

◆ predict()

np.ndarray xgboost.core.Booster.predict (   self,
DMatrix  data,
bool   output_margin = False,
bool   pred_leaf = False,
bool   pred_contribs = False,
bool   approx_contribs = False,
bool   pred_interactions = False,
bool   validate_features = True,
bool   training = False,
Tuple[int, int]   iteration_range = (0, 0),
bool   strict_shape = False 
)
Predict with data.  The full model will be used unless `iteration_range` is specified,
meaning user have to either slice the model or use the ``best_iteration``
attribute to get prediction from best model returned from early stopping.

.. note::

    See :doc:`Prediction </prediction>` for issues like thread safety and a
    summary of outputs from this function.

Parameters
----------
data :
    The dmatrix storing the input.

output_margin :
    Whether to output the raw untransformed margin value.

pred_leaf :
    When this option is on, the output will be a matrix of (nsample,
    ntrees) with each record indicating the predicted leaf index of
    each sample in each tree.  Note that the leaf index of a tree is
    unique per tree, so you may find leaf 1 in both tree 1 and tree 0.

pred_contribs :
    When this is True the output will be a matrix of size (nsample,
    nfeats + 1) with each record indicating the feature contributions
    (SHAP values) for that prediction. The sum of all feature
    contributions is equal to the raw untransformed margin value of the
    prediction. Note the final column is the bias term.

approx_contribs :
    Approximate the contributions of each feature.  Used when ``pred_contribs`` or
    ``pred_interactions`` is set to True.  Changing the default of this parameter
    (False) is not recommended.

pred_interactions :
    When this is True the output will be a matrix of size (nsample,
    nfeats + 1, nfeats + 1) indicating the SHAP interaction values for
    each pair of features. The sum of each row (or column) of the
    interaction values equals the corresponding SHAP value (from
    pred_contribs), and the sum of the entire matrix equals the raw
    untransformed margin value of the prediction. Note the last row and
    column correspond to the bias term.

validate_features :
    When this is True, validate that the Booster's and data's
    feature_names are identical.  Otherwise, it is assumed that the
    feature_names are the same.

training :
    Whether the prediction value is used for training.  This can effect `dart`
    booster, which performs dropouts during training iterations but use all trees
    for inference. If you want to obtain result with dropouts, set this parameter
    to `True`.  Also, the parameter is set to true when obtaining prediction for
    custom objective function.

    .. versionadded:: 1.0.0

iteration_range :
    Specifies which layer of trees are used in prediction.  For example, if a
    random forest is trained with 100 rounds.  Specifying `iteration_range=(10,
    20)`, then only the forests built during [10, 20) (half open set) rounds are
    used in this prediction.

    .. versionadded:: 1.4.0

strict_shape :
    When set to True, output shape is invariant to whether classification is used.
    For both value and margin prediction, the output shape is (n_samples,
    n_groups), n_groups == 1 when multi-class is not used.  Default to False, in
    which case the output shape can be (n_samples, ) if multi-class is not used.

    .. versionadded:: 1.4.0

Returns
-------
prediction : numpy array

◆ save_config()

str xgboost.core.Booster.save_config (   self)
Output internal parameter configuration of Booster as a JSON
string.

.. versionadded:: 1.0.0

◆ save_model()

None xgboost.core.Booster.save_model (   self,
Union[str, os.PathLike]  fname 
)
Save the model to a file.

The model is saved in an XGBoost internal format which is universal among the
various XGBoost interfaces. Auxiliary attributes of the Python Booster object
(such as feature_names) will not be saved when using binary format.  To save
those attributes, use JSON/UBJ instead. See :doc:`Model IO
</tutorials/saving_model>` for more info.

.. code-block:: python

  model.save_model("model.json")
  # or
  model.save_model("model.ubj")

Parameters
----------
fname :
    Output file name

◆ save_raw()

bytearray xgboost.core.Booster.save_raw (   self,
str   raw_format = "deprecated" 
)
Save the model to a in memory buffer representation instead of file.

Parameters
----------
raw_format :
    Format of output buffer. Can be `json`, `ubj` or `deprecated`.  Right now
    the default is `deprecated` but it will be changed to `ubj` (univeral binary
    json) in the future.

Returns
-------
An in memory buffer representation of the model

◆ set_attr()

None xgboost.core.Booster.set_attr (   self,
**Optional[Any]  kwargs 
)
Set the attribute of the Booster.

Parameters
----------
**kwargs
    The attributes to set. Setting a value to None deletes an attribute.

◆ set_param()

None xgboost.core.Booster.set_param (   self,
Union[Dict, Iterable[Tuple[str, Any]], str]  params,
Optional[str]   value = None 
)
Set parameters into the Booster.

Parameters
----------
params :
   list of key,value pairs, dict of key to value or simply str key
value :
   value of the specified parameter, when params is str key

◆ trees_to_dataframe()

DataFrame xgboost.core.Booster.trees_to_dataframe (   self,
Union[str, os.PathLike]   fmap = "" 
)
Parse a boosted tree model text dump into a pandas DataFrame structure.

This feature is only defined when the decision tree model is chosen as base
learner (`booster in {gbtree, dart}`). It is not defined for other base learner
types, such as linear learners (`booster=gblinear`).

Parameters
----------
fmap :
   The name of feature map file.

◆ update()

None xgboost.core.Booster.update (   self,
DMatrix  dtrain,
int  iteration,
Optional[Objective]   fobj = None 
)
Update for one iteration, with objective function calculated
internally.  This function should not be called directly by users.

Parameters
----------
dtrain :
    Training data.
iteration :
    Current iteration number.
fobj :
    Customized objective function.

The documentation for this class was generated from the following file: