7#ifndef XGBOOST_COMMON_RANDOM_H_
8#define XGBOOST_COMMON_RANDOM_H_
22#include "../collective/communicator-inl.h"
35#if XGBOOST_CUSTOMIZE_GLOBAL_PRNG
42class CustomGlobalRandomEngine {
45 using result_type = uint32_t;
47 inline static constexpr result_type min() {
51 inline static constexpr result_type max() {
52 return std::numeric_limits<result_type>::max();
58 void seed(result_type val);
62 result_type operator()();
92std::vector<T> WeightedSamplingWithoutReplacement(
Context const* ctx, std::vector<T>
const& array,
93 std::vector<float>
const& weights,
size_t n) {
95 CHECK_EQ(array.size(), weights.size());
96 std::vector<float> keys(weights.size());
97 std::uniform_real_distribution<float> dist;
99 for (
size_t i = 0; i < array.size(); ++i) {
100 auto w = std::max(weights.at(i),
kRtEps);
102 auto k = std::log(u) / w;
105 auto ind = ArgSort<std::size_t>(ctx, keys.data(), keys.data() + keys.size(), std::greater<>{});
108 std::vector<T> results(ind.size());
109 for (
size_t k = 0; k < ind.size(); ++k) {
111 results[k] = array[idx];
124 std::shared_ptr<HostDeviceVector<bst_feature_t>> feature_set_tree_;
125 std::map<int, std::shared_ptr<HostDeviceVector<bst_feature_t>>> feature_set_level_;
126 std::vector<float> feature_weights_;
127 float colsample_bylevel_{1.0f};
128 float colsample_bytree_{1.0f};
129 float colsample_bynode_{1.0f};
134 std::shared_ptr<HostDeviceVector<bst_feature_t>> ColSample(
164 float colsample_bynode,
float colsample_bylevel,
float colsample_bytree) {
166 colsample_bylevel_ = colsample_bylevel;
167 colsample_bytree_ = colsample_bytree;
168 colsample_bynode_ = colsample_bynode;
171 if (feature_set_tree_ ==
nullptr) {
172 feature_set_tree_ = std::make_shared<HostDeviceVector<bst_feature_t>>();
176 feature_set_tree_->Resize(num_col);
177 std::iota(feature_set_tree_->HostVector().begin(), feature_set_tree_->HostVector().end(), 0);
179 feature_set_tree_ = ColSample(feature_set_tree_, colsample_bytree_);
186 feature_set_tree_->Resize(0);
187 feature_set_level_.clear();
202 if (colsample_bylevel_ == 1.0f && colsample_bynode_ == 1.0f) {
203 return feature_set_tree_;
206 if (feature_set_level_.count(depth) == 0) {
208 feature_set_level_[depth] = ColSample(feature_set_tree_, colsample_bylevel_);
210 if (colsample_bynode_ == 1.0f) {
212 return feature_set_level_[depth];
215 return ColSample(feature_set_level_[depth], colsample_bynode_);
Definition host_device_vector.h:87
Handles selection of columns due to colsample_bytree, colsample_bylevel and colsample_bynode paramete...
Definition random.h:123
ColumnSampler()
Column sampler constructor.
Definition random.h:148
std::shared_ptr< HostDeviceVector< bst_feature_t > > GetFeatureSet(int depth)
Samples a feature set.
Definition random.h:201
ColumnSampler(uint32_t seed)
Column sampler constructor.
Definition random.h:140
void Init(Context const *ctx, int64_t num_col, std::vector< float > feature_weights, float colsample_bynode, float colsample_bylevel, float colsample_bytree)
Initialise this object before use.
Definition random.h:163
void Reset()
Resets this object.
Definition random.h:185
Copyright 2014-2023, XGBoost Contributors.
A device-and-host vector abstraction layer.
defines console logging options for xgboost. Use to enforce unified print behavior.
Definition feature_weights.py:1
void Broadcast(void *send_receive_buffer, size_t size, int root)
Broadcast a memory region to all others from root. This function is NOT thread-safe.
Definition communicator-inl.h:129
std::mt19937 RandomEngine
Define mt19937 as default type Random Engine.
Definition random.h:33
RandomEngine GlobalRandomEngine
global random engine
Definition random.h:74
GlobalRandomEngine & GlobalRandom()
global singleton of a random engine. This random engine is thread-local and only visible to current t...
Definition common.cc:23
namespace of xgboost
Definition base.h:90
constexpr bst_float kRtEps
small eps gap for minimum split decision.
Definition base.h:319
Runtime context for XGBoost.
Definition context.h:84
Copyright 2015-2023 by XGBoost Contributors.