Medial Code Documentation
Loading...
Searching...
No Matches
learner.h
Go to the documentation of this file.
1
8#ifndef XGBOOST_LEARNER_H_
9#define XGBOOST_LEARNER_H_
10
11#include <dmlc/io.h> // for Serializable
12#include <xgboost/base.h> // for bst_feature_t, bst_target_t, bst_float, Args, GradientPair, ..
13#include <xgboost/context.h> // for Context
14#include <xgboost/linalg.h> // for Tensor, TensorView
15#include <xgboost/metric.h> // for Metric
16#include <xgboost/model.h> // for Configurable, Model
17#include <xgboost/span.h> // for Span
18#include <xgboost/task.h> // for ObjInfo
19
20#include <algorithm> // for max
21#include <cstdint> // for int32_t, uint32_t, uint8_t
22#include <map> // for map
23#include <memory> // for shared_ptr, unique_ptr
24#include <string> // for string
25#include <utility> // for move
26#include <vector> // for vector
27
28namespace xgboost {
29class FeatureMap;
30class Metric;
31class GradientBooster;
32class ObjFunction;
33class DMatrix;
34class Json;
35struct XGBAPIThreadLocalEntry;
36template <typename T>
37class HostDeviceVector;
38
39enum class PredictionType : std::uint8_t { // NOLINT
40 kValue = 0,
41 kMargin = 1,
42 kContribution = 2,
43 kApproxContribution = 3,
44 kInteraction = 4,
45 kApproxInteraction = 5,
46 kLeaf = 6
47};
48
65class Learner : public Model, public Configurable, public dmlc::Serializable {
66 public:
68 ~Learner() override;
72 virtual void Configure() = 0;
79 virtual void UpdateOneIter(int iter, std::shared_ptr<DMatrix> train) = 0;
87 virtual void BoostOneIter(int iter,
88 std::shared_ptr<DMatrix> train,
89 HostDeviceVector<GradientPair>* in_gpair) = 0;
97 virtual std::string EvalOneIter(int iter,
98 const std::vector<std::shared_ptr<DMatrix>>& data_sets,
99 const std::vector<std::string>& data_names) = 0;
113 virtual void Predict(std::shared_ptr<DMatrix> data, bool output_margin,
114 HostDeviceVector<bst_float>* out_preds, bst_layer_t layer_begin,
115 bst_layer_t layer_end, bool training = false, bool pred_leaf = false,
116 bool pred_contribs = false, bool approx_contribs = false,
117 bool pred_interactions = false) = 0;
118
129 virtual void InplacePredict(std::shared_ptr<DMatrix> p_m, PredictionType type, float missing,
130 HostDeviceVector<float>** out_preds, bst_layer_t layer_begin,
131 bst_layer_t layer_end) = 0;
132
136 virtual void CalcFeatureScore(std::string const& importance_type,
138 std::vector<bst_feature_t>* features,
139 std::vector<float>* scores) = 0;
140
141 /*
142 * \brief Get number of boosted rounds from gradient booster.
143 */
144 virtual int32_t BoostedRounds() const = 0;
148 virtual std::uint32_t Groups() const = 0;
149
150 void LoadModel(Json const& in) override = 0;
151 void SaveModel(Json* out) const override = 0;
152
153 virtual void LoadModel(dmlc::Stream* fi) = 0;
154 virtual void SaveModel(dmlc::Stream* fo) const = 0;
155
161 virtual void SetParams(Args const& args) = 0;
170 virtual void SetParam(const std::string& key, const std::string& value) = 0;
171
176 virtual uint32_t GetNumFeature() const = 0;
177
186 virtual void SetAttr(const std::string& key, const std::string& value) = 0;
194 virtual bool GetAttr(const std::string& key, std::string* out) const = 0;
200 virtual bool DelAttr(const std::string& key) = 0;
205 virtual std::vector<std::string> GetAttrNames() const = 0;
210 virtual void SetFeatureNames(std::vector<std::string> const& fn) = 0;
215 virtual void GetFeatureNames(std::vector<std::string>* fn) const = 0;
220 virtual void SetFeatureTypes(std::vector<std::string> const& ft) = 0;
225 virtual void GetFeatureTypes(std::vector<std::string>* ft) const = 0;
226
238 bool* out_of_bound) = 0;
246 virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,
247 bool with_stats,
248 std::string format) = 0;
249
250 virtual XGBAPIThreadLocalEntry& GetThreadLocal() const = 0;
256 static Learner* Create(const std::vector<std::shared_ptr<DMatrix> >& cache_data);
260 virtual Context const* Ctx() const = 0;
265 virtual const std::map<std::string, std::string>& GetConfigurationArguments() const = 0;
266
267 protected:
269 std::unique_ptr<ObjFunction> obj_;
271 std::unique_ptr<GradientBooster> gbm_;
273 std::vector<std::unique_ptr<Metric> > metrics_;
276};
277
279
283enum class MultiStrategy : std::int32_t {
284 kOneOutputPerTree = 0,
285 kMultiOutputTree = 1,
286};
287
292 private:
297 linalg::Tensor<float, 1> base_score_;
298
299 public:
307 std::uint32_t num_output_group{0};
311 ObjInfo task{ObjInfo::kRegression};
315 MultiStrategy multi_strategy{MultiStrategy::kOneOutputPerTree};
316
317 LearnerModelParam() = default;
318 // As the old `LearnerModelParamLegacy` is still used by binary IO, we keep
319 // this one as an immutable copy.
320 LearnerModelParam(Context const* ctx, LearnerModelParamLegacy const& user_param,
325 std::uint32_t n_groups, bst_target_t n_targets, MultiStrategy multi_strategy)
326 : base_score_{std::move(base_score)},
327 num_feature{n_features},
328 num_output_group{std::max(n_groups, n_targets)},
330
331 linalg::TensorView<float const, 1> BaseScore(Context const* ctx) const;
332 [[nodiscard]] linalg::TensorView<float const, 1> BaseScore(std::int32_t device) const;
333
334 void Copy(LearnerModelParam const& that);
335 [[nodiscard]] bool IsVectorLeaf() const noexcept {
336 return multi_strategy == MultiStrategy::kMultiOutputTree;
337 }
338 [[nodiscard]] bst_target_t OutputLength() const noexcept { return this->num_output_group; }
339 [[nodiscard]] bst_target_t LeafLength() const noexcept {
340 return this->IsVectorLeaf() ? this->OutputLength() : 1;
341 }
342
343 /* \brief Whether this parameter is initialized with LearnerModelParamLegacy. */
344 [[nodiscard]] bool Initialized() const { return num_feature != 0 && num_output_group != 0; }
345};
346
347} // namespace xgboost
348#endif // XGBOOST_LEARNER_H_
interface for serializable objects
Definition io.h:132
interface of stream I/O for serialization
Definition io.h:30
Feature map data structure to help text model dump. TODO(tqchen) consider make it even more lightweig...
Definition feature_map.h:22
Definition host_device_vector.h:87
Data structure representing JSON format.
Definition json.h:357
Learner class that does training and prediction. This is the user facing module of xgboost training....
Definition learner.h:65
virtual void InplacePredict(std::shared_ptr< DMatrix > p_m, PredictionType type, float missing, HostDeviceVector< float > **out_preds, bst_layer_t layer_begin, bst_layer_t layer_end)=0
Inplace prediction.
virtual void SetParam(const std::string &key, const std::string &value)=0
Set parameter for booster.
virtual std::vector< std::string > DumpModel(const FeatureMap &fmap, bool with_stats, std::string format)=0
dump the model in the requested format
std::vector< std::unique_ptr< Metric > > metrics_
The evaluation metrics used to evaluate the model.
Definition learner.h:273
virtual void CalcFeatureScore(std::string const &importance_type, common::Span< int32_t const > trees, std::vector< bst_feature_t > *features, std::vector< float > *scores)=0
Calculate feature score. See doc in C API for outputs.
virtual Context const * Ctx() const =0
Return the context object of this Booster.
virtual const std::map< std::string, std::string > & GetConfigurationArguments() const =0
Get configuration arguments currently stored by the learner.
~Learner() override
virtual destructor
virtual void SetFeatureNames(std::vector< std::string > const &fn)=0
Set the feature names for current booster.
virtual void BoostOneIter(int iter, std::shared_ptr< DMatrix > train, HostDeviceVector< GradientPair > *in_gpair)=0
Do customized gradient boosting with in_gpair. in_gair can be mutated after this call.
virtual std::string EvalOneIter(int iter, const std::vector< std::shared_ptr< DMatrix > > &data_sets, const std::vector< std::string > &data_names)=0
evaluate the model for specific iteration using the configured metrics.
static Learner * Create(const std::vector< std::shared_ptr< DMatrix > > &cache_data)
Create a new instance of learner.
Definition learner.cc:1485
virtual void Configure()=0
Configure Learner based on set parameters.
virtual bool DelAttr(const std::string &key)=0
Delete an attribute from the booster.
virtual Learner * Slice(bst_layer_t begin, bst_layer_t end, bst_layer_t step, bool *out_of_bound)=0
Slice the model.
virtual bool GetAttr(const std::string &key, std::string *out) const =0
Get attribute from the booster. The property will be saved along the booster.
virtual void SetParams(Args const &args)=0
Set multiple parameters at once.
virtual void UpdateOneIter(int iter, std::shared_ptr< DMatrix > train)=0
update the model for one iteration With the specified objective function.
virtual void SetAttr(const std::string &key, const std::string &value)=0
Set additional attribute to the Booster.
std::unique_ptr< ObjFunction > obj_
objective function
Definition learner.h:269
std::unique_ptr< GradientBooster > gbm_
The gradient booster used by the model.
Definition learner.h:271
virtual void GetFeatureNames(std::vector< std::string > *fn) const =0
Get the feature names for current booster.
void LoadModel(Json const &in) override=0
load the model from a JSON object
virtual void SetFeatureTypes(std::vector< std::string > const &ft)=0
Set the feature types for current booster.
void SaveModel(Json *out) const override=0
saves the model config to a JSON object
Context ctx_
Training parameter.
Definition learner.h:275
virtual std::uint32_t Groups() const =0
Get the number of output groups from the model.
virtual void Predict(std::shared_ptr< DMatrix > data, bool output_margin, HostDeviceVector< bst_float > *out_preds, bst_layer_t layer_begin, bst_layer_t layer_end, bool training=false, bool pred_leaf=false, bool pred_contribs=false, bool approx_contribs=false, bool pred_interactions=false)=0
get prediction given the model.
virtual std::vector< std::string > GetAttrNames() const =0
Get a vector of attribute names from the booster.
virtual uint32_t GetNumFeature() const =0
Get the number of features of the booster.
virtual void GetFeatureTypes(std::vector< std::string > *ft) const =0
Get the feature types for current booster.
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition span.h:424
A tensor storage.
Definition linalg.h:742
Copyright 2014-2023, XGBoost Contributors.
defines serializable interface of dmlc
Copyright 2015-2023 by XGBoost Contributors.
Copyright 2021-2023 by XGBoost Contributors.
Defines the abstract interface for different components in XGBoost.
Definition StdDeque.h:58
namespace of xgboost
Definition base.h:90
uint32_t bst_feature_t
Type for data column (feature) index.
Definition base.h:101
std::uint32_t bst_target_t
Type for indexing into output targets.
Definition base.h:118
std::int32_t bst_layer_t
Type for indexing boosted layers.
Definition base.h:122
MultiStrategy
Strategy for building multi-target models.
Definition learner.h:283
Definition model.h:31
Runtime context for XGBoost.
Definition context.h:84
training parameter for regression
Definition learner.cc:90
Basic model parameters, used to describe the booster.
Definition learner.h:291
std::uint32_t num_output_group
The number of classes or targets.
Definition learner.h:307
MultiStrategy multi_strategy
Strategy for building multi-target models.
Definition learner.h:315
ObjInfo task
Current task, determined by objective.
Definition learner.h:311
bst_feature_t num_feature
The number of features.
Definition learner.h:303
Definition model.h:17
A struct returned by objective, which determines task at hand. The struct is not used by any algorith...
Definition task.h:24
entry to to easily hold returning information
Definition api_entry.h:16
Copyright 2014-2023 by XGBoost Contributors.