6#include <gtest/gtest.h>
11#include <xgboost/json.h>
24#include "../../src/collective/communicator-inl.h"
25#include "../../src/common/common.h"
26#include "../../src/common/threading_utils.h"
27#include "../../src/data/array_interface.h"
28#include "filesystem.h"
31#if defined(__CUDACC__)
32#define DeclareUnifiedTest(name) GPU ## name
34#define DeclareUnifiedTest(name) name
37#if defined(__CUDACC__)
38#define GPUIDX (common::AllVisibleGPUs() == 1 ? 0 : collective::GetRank())
43#if defined(__CUDACC__)
44#define DeclareUnifiedDistributedTest(name) MGPU ## name
46#define DeclareUnifiedDistributedTest(name) name
52struct LearnerModelParam;
56template <
typename Float>
57Float RelError(Float l, Float r) {
58 static_assert(std::is_floating_point<Float>::value);
59 return std::abs(1.0f - l / r);
62bool FileExists(
const std::string& filename);
64void CreateSimpleTestData(
const std::string& filename);
68void CreateBigTestData(
const std::string& filename,
size_t n_entries,
bool zero_based =
true);
70void CreateTestCSV(std::string
const& path,
size_t rows,
size_t cols);
72void CheckObjFunction(std::unique_ptr<xgboost::ObjFunction>
const& obj,
73 std::vector<xgboost::bst_float> preds,
74 std::vector<xgboost::bst_float> labels,
75 std::vector<xgboost::bst_float> weights,
76 std::vector<xgboost::bst_float> out_grad,
77 std::vector<xgboost::bst_float> out_hess);
83xgboost::Json CheckConfigReload(std::unique_ptr<T>
const& configurable,
84 std::string name =
"") {
89void CheckRankingObjFunction(std::unique_ptr<xgboost::ObjFunction>
const& obj,
90 std::vector<xgboost::bst_float> preds,
91 std::vector<xgboost::bst_float> labels,
92 std::vector<xgboost::bst_float> weights,
93 std::vector<xgboost::bst_uint> groups,
94 std::vector<xgboost::bst_float> out_grad,
95 std::vector<xgboost::bst_float> out_hess);
100 std::vector<xgboost::bst_float> labels,
101 std::vector<xgboost::bst_float> weights = std::vector<xgboost::bst_float>(),
102 std::vector<xgboost::bst_uint> groups = std::vector<xgboost::bst_uint>(),
103 xgboost::DataSplitMode data_split_Mode = xgboost::DataSplitMode::kRow);
108 std::vector<xgboost::bst_float> weights = {},
109 std::vector<xgboost::bst_uint> groups = {},
110 xgboost::DataSplitMode data_split_Mode = xgboost::DataSplitMode::kRow);
114float GetBaseScore(Json
const &config);
126 using StateType = uint64_t;
127 static StateType
constexpr kDefaultInit = 3;
128 static StateType
constexpr kDefaultAlpha = 61;
129 static StateType
constexpr kMaxValue = (
static_cast<StateType
>(1) << 32) - 1;
132 StateType
const alpha_;
133 StateType
const mod_;
136 using result_type = StateType;
139 SimpleLCG() : state_{kDefaultInit}, alpha_{kDefaultAlpha}, mod_{kMaxValue} {}
143 void Seed(StateType seed) { state_ = seed % mod_; }
151 : state_{state == 0 ? kDefaultInit : state}, alpha_{kDefaultAlpha}, mod_{kMaxValue} {}
153 StateType operator()();
154 StateType Min()
const;
155 StateType Max()
const;
157 constexpr result_type
static min() {
return 0; };
158 constexpr result_type
static max() {
return kMaxValue; }
161template <
typename ResultT>
164 ResultT
const lower_;
165 ResultT
const upper_;
168 template <
size_t Bits,
typename GeneratorT>
169 ResultT GenerateCanonical(GeneratorT* rng)
const {
170 static_assert(std::is_floating_point<ResultT>::value,
171 "Result type must be floating point.");
172 long double const r = (
static_cast<long double>(rng->Max())
173 -
static_cast<long double>(rng->Min())) + 1.0L;
174 auto const log2r =
static_cast<size_t>(std::log(r) / std::log(2.0L));
175 size_t m = std::max<size_t>(1UL, (Bits + log2r - 1UL) / log2r);
176 ResultT sum_value = 0, r_k = 1;
178 for (
size_t k = m; k != 0; --k) {
179 sum_value +=
static_cast<ResultT
>((*rng)() - rng->Min()) * r_k;
180 r_k *=
static_cast<ResultT
>(r);
183 ResultT res = sum_value / r_k;
189 lower_{l}, upper_{u} {}
191 template <
typename GeneratorT>
192 ResultT operator()(GeneratorT* rng)
const {
193 ResultT tmp = GenerateCanonical<std::numeric_limits<ResultT>::digits,
195 auto ret = (tmp * (upper_ - lower_)) + lower_;
197 return std::max(ret, lower_);
204 array_interface[
"data"] = std::vector<Json>(2);
205 if (storage->DeviceCanRead()) {
206 array_interface[
"data"][0] = Integer{
reinterpret_cast<int64_t
>(storage->ConstDevicePointer())};
207 array_interface[
"stream"] =
nullptr;
209 array_interface[
"data"][0] = Integer{
reinterpret_cast<int64_t
>(storage->ConstHostPointer())};
211 array_interface[
"data"][1] = Boolean(
false);
213 array_interface[
"shape"] = std::vector<Json>(2);
214 array_interface[
"shape"][0] = rows;
215 array_interface[
"shape"][1] = cols;
217 char t = linalg::detail::ArrayInterfaceHandler::TypeChar<T>();
218 array_interface[
"typestr"] = String(std::string{
"<"} + t + std::to_string(
sizeof(T)));
219 array_interface[
"version"] = 3;
220 return array_interface;
234 std::int32_t device_{Context::kCpuId};
235 std::size_t n_batches_{0};
236 std::uint64_t seed_{0};
240 std::vector<FeatureType> ft_;
245 void GenerateLabels(std::shared_ptr<DMatrix> p_fmat)
const;
249 : rows_{rows}, cols_{cols}, sparsity_{sparsity}, lcg_{seed_} {}
264 n_batches_ = n_batches;
277 CHECK_EQ(ft.size(), cols_);
278 ft_.resize(ft.size());
279 std::copy(ft.cbegin(), ft.cend(), ft_.begin());
287 n_targets_ = n_targets;
313 [[nodiscard]] std::shared_ptr<DMatrix> GenerateDMatrix(
bool with_label =
false,
314 bool float_label =
true,
315 size_t classes = 1)
const;
317 [[nodiscard]] std::shared_ptr<DMatrix> GenerateSparsePageDMatrix(std::string prefix,
318 bool with_label)
const;
320#if defined(XGBOOST_USE_CUDA)
321 std::shared_ptr<DMatrix> GenerateDeviceDMatrix(
bool with_label);
323 std::shared_ptr<DMatrix> GenerateQuantileDMatrix(
bool with_label);
327inline std::shared_ptr<DMatrix> EmptyDMatrix() {
331inline std::vector<float> GenerateRandomCategoricalSingleColumn(
int n,
size_t num_categories) {
332 std::vector<float> x(n);
334 std::uniform_int_distribution<size_t> dist(0, num_categories - 1);
335 std::generate(x.begin(), x.end(), [&]() { return dist(rng); });
337 for (
size_t i = 0; i < num_categories; i++) {
338 x[i] =
static_cast<decltype(x)::value_type
>(i);
343std::shared_ptr<DMatrix> GetDMatrixFromData(
const std::vector<float>& x, std::size_t num_rows,
357 size_t n_batches, std::string prefix =
"cache");
382 size_t n_rows,
size_t n_cols,
size_t page_size,
bool deterministic,
385std::unique_ptr<GradientBooster> CreateTrainedGBM(std::string name, Args kwargs,
size_t kRows,
387 LearnerModelParam
const* learner_model_param,
388 Context
const* generic_param);
390inline std::unique_ptr<HostDeviceVector<GradientPair>> GenerateGradients(
392 auto p_gradients = std::make_unique<HostDeviceVector<GradientPair>>(rows * n_targets);
393 auto& h_gradients = p_gradients->HostVector();
398 for (std::size_t i = 0; i < rows * n_targets; ++i) {
399 auto grad = dist(&gen);
400 auto hess = dist(&gen);
411 if (device == Context::kCpuId) {
417inline HostDeviceVector<GradientPair> GenerateRandomGradients(
const size_t n_rows,
418 float lower= 0.0f,
float upper = 1.0f) {
421 std::vector<GradientPair> h_gpair(n_rows);
422 for (
auto &gpair : h_gpair) {
427 HostDeviceVector<GradientPair> gpair(h_gpair);
437 DMatrixHandle proxy_;
438 std::unique_ptr<RandomDataGenerator> rng_;
440 std::vector<std::string> batches_;
441 std::string interface_;
447 size_t static constexpr Rows() {
return 1024; }
448 size_t static constexpr Batches() {
return 100; }
449 size_t static constexpr Cols() {
return 13; }
452 [[nodiscard]] std::string AsArray()
const {
return interface_; }
454 virtual int Next() = 0;
455 virtual void Reset() { iter_ = 0; }
456 [[nodiscard]] std::size_t Iter()
const {
return iter_; }
457 auto Proxy() ->
decltype(proxy_) {
return proxy_; }
459 explicit ArrayIterForTest(
float sparsity,
size_t rows,
size_t cols,
size_t batches);
464 std::size_t n_samples,
bst_feature_t n_features, std::size_t n_batches);
471 size_t batches = Batches());
479 size_t batches = Batches());
482 std::size_t n_batches)
488void DMatrixToCSR(
DMatrix *dmat, std::vector<float> *p_data,
489 std::vector<size_t> *p_row_ptr,
490 std::vector<bst_feature_t> *p_cids);
492typedef void *DataIterHandle;
494inline void Reset(DataIterHandle self) {
499 return static_cast<ArrayIterForTest*
>(self)->Next();
503using RMMAllocatorPtr = std::unique_ptr<RMMAllocator, void(*)(RMMAllocator*)>;
504RMMAllocatorPtr SetUpRMMResourceForCppTests(
int argc,
char** argv);
509inline LearnerModelParam MakeMP(
bst_feature_t n_features,
float base_score, uint32_t n_groups,
510 int32_t device = Context::kCpuId) {
512 LearnerModelParam mparam(n_features, linalg::Tensor<float, 1>{{base_score}, shape, device},
513 n_groups, 1, MultiStrategy::kOneOutputPerTree);
517inline std::int32_t AllThreadsForTest() {
return Context{}.Threads(); }
519template <
bool use_nccl =
false,
typename Function,
typename... Args>
520void RunWithInMemoryCommunicator(int32_t world_size, Function&& function, Args&&... args) {
521 auto run = [&](
auto rank) {
522 Json config{JsonObject()};
523 if constexpr (use_nccl) {
524 config[
"xgboost_communicator"] = String(
"in-memory-nccl");
526 config[
"xgboost_communicator"] = String(
"in-memory");
528 config[
"in_memory_world_size"] = world_size;
529 config[
"in_memory_rank"] = rank;
532 std::forward<Function>(function)(std::forward<Args>(args)...);
537 common::ParallelFor(world_size, world_size, run);
539 std::vector<std::thread> threads;
540 for (
auto rank = 0; rank < world_size; rank++) {
541 threads.emplace_back(run, rank);
543 for (
auto& thread : threads) {
552 bool use_nccl_{
false};
554 void SetUp()
override {
555 auto const n_gpus = common::AllVisibleGPUs();
563 world_size_ = n_gpus;
568 template <
typename Function,
typename... Args>
569 void DoTest(Function&& function, Args&&... args) {
571 RunWithInMemoryCommunicator<true>(world_size_, function, args...);
573 RunWithInMemoryCommunicator<false>(world_size_, function, args...);
578class DeclareUnifiedDistributedTest(MetricTest) :
public BaseMGPUTest{};
Manager class for temporary directories. Whenever a new TemporaryDirectory object is constructed,...
Definition filesystem.h:54
Internal data structured used by XGBoost during training.
Definition data.h:509
Definition host_device_vector.h:87
Data structure representing JSON format.
Definition json.h:357
interface of evaluation metric used to evaluate model performance. This has nothing to do with traini...
Definition metric.h:29
std::pair< std::vector< std::string >, std::string > GenerateArrayInterfaceBatch(HostDeviceVector< float > *storage, size_t batches) const
Generate batches of array interface stored in consecutive memory.
Definition helpers.cc:310
Linear congruential generator.
Definition helpers.h:124
SimpleLCG(StateType state)
Initialize SimpleLCG.
Definition helpers.h:150
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition span.h:424
A tensor storage.
Definition linalg.h:742
Copyright 2014-2023, XGBoost Contributors.
void * DMatrixHandle
handle to DMatrix
Definition c_api.h:49
void * DataIterHandle
handle to a external data iterator
Definition c_api.h:334
Copyright 2015-2023 by XGBoost Contributors.
Copyright 2015-2023 by XGBoost Contributors.
Copyright 2021-2023 by XGBoost Contributors.
Defines the abstract interface for different components in XGBoost.
void Init(Json const &config)
Initialize the collective communicator.
Definition communicator-inl.h:60
void Finalize()
Finalize the collective communicator.
Definition communicator-inl.h:69
namespace of xgboost
Definition base.h:90
Context MakeCUDACtx(std::int32_t device)
Make a context that uses CUDA if device >= 0.
Definition helpers.h:410
uint32_t bst_feature_t
Type for data column (feature) index.
Definition base.h:101
std::uint32_t bst_target_t
Type for indexing into output targets.
Definition base.h:118
std::size_t bst_row_t
Type for data row index.
Definition base.h:110
int32_t bst_cat_t
Categorical value type.
Definition base.h:99
detail::GradientPairInternal< float > GradientPair
gradient statistics pair usually needed in gradient boosting
Definition base.h:256
std::unique_ptr< DMatrix > CreateSparsePageDMatrix(bst_row_t n_samples, bst_feature_t n_features, size_t n_batches, std::string prefix)
Create Sparse Page using data iterator.
Definition helpers.cc:514
int32_t bst_bin_t
Type for histogram bin index.
Definition base.h:103
float bst_float
float type, used for storing statistics
Definition base.h:97
std::unique_ptr< DMatrix > CreateSparsePageDMatrixWithRC(size_t n_rows, size_t n_cols, size_t page_size, bool deterministic, const dmlc::TemporaryDirectory &tempdir)
Deprecated, stop using it.
Definition helpers.cc:567
Runtime context for XGBoost.
Definition context.h:84
Context MakeCUDA(bst_d_ordinal_t ordinal=0) const
Make a CUDA context based on the current context.
Definition context.h:160