7#ifndef XGBOOST_DATA_SPARSE_PAGE_DMATRIX_H_
8#define XGBOOST_DATA_SPARSE_PAGE_DMATRIX_H_
17#include "ellpack_page_source.h"
18#include "gradient_index_page_source.h"
60 std::map<std::string, std::shared_ptr<Cache>> cache_info_;
69 std::string cache_prefix_;
70 uint32_t n_batches_{0};
72 void InitializeSparsePage(
Context const *ctx);
79 std::string cache_prefix);
83 sparse_page_source_.reset();
84 ellpack_page_source_.reset();
85 column_source_.reset();
86 sorted_column_source_.reset();
87 ghist_index_source_.reset();
89 for (
auto const &kv : cache_info_) {
91 auto n = kv.second->ShardName();
92 TryDeleteCacheFile(n);
97 const MetaInfo &Info()
const override;
98 Context const *Ctx()
const override {
return &fmat_ctx_; }
100 bool SingleColBlock()
const override {
return false; }
102 LOG(FATAL) <<
"Slicing DMatrix is not supported for external memory.";
105 DMatrix *SliceCol(
int,
int)
override {
106 LOG(FATAL) <<
"Slicing DMatrix columns is not supported for external memory.";
117 LOG(FATAL) <<
"Can not obtain a single CSR page for external memory DMatrix";
122 std::shared_ptr<SparsePageSource> sparse_page_source_;
123 std::shared_ptr<EllpackPageSource> ellpack_page_source_;
124 std::shared_ptr<CSCPageSource> column_source_;
125 std::shared_ptr<SortedCSCPageSource> sorted_column_source_;
126 std::shared_ptr<GradientIndexPageSource> ghist_index_source_;
128 bool EllpackExists()
const override {
return static_cast<bool>(ellpack_page_source_); }
129 bool GHistIndexExists()
const override {
return static_cast<bool>(ghist_index_source_); }
130 bool SparsePageExists()
const override {
return static_cast<bool>(sparse_page_source_); }
134 std::stringstream ss;
136 return prefix +
"-" + ss.str();
139inline std::string MakeCache(SparsePageDMatrix *ptr, std::string format, std::string prefix,
140 std::map<std::string, std::shared_ptr<Cache>> *out) {
141 auto &cache_info = *out;
142 auto name = MakeId(prefix, ptr);
143 auto id = name + format;
144 auto it = cache_info.find(
id);
145 if (it == cache_info.cend()) {
146 cache_info[id].reset(
new Cache{
false, name, format});
147 LOG(INFO) <<
"Make cache:" << cache_info[id]->ShardName() << std::endl;
span class implementation, based on ISO++20 span<T>. The interface should be the same.
Definition span.h:424
DMatrix used for external memory.
Definition sparse_page_dmatrix.h:57
XGB_EXTERN_C typedef int XGDMatrixCallbackNext(DataIterHandle iter)
Callback function prototype for getting next batch of data.
XGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle)
Callback function prototype for resetting external iterator.
Copyright 2015-2023 by XGBoost Contributors.
defines console logging options for xgboost. Use to enforce unified print behavior.
Copyright 2019-2023, XGBoost Contributors.
Definition data.py:1
Copyright 2014-2023, XGBoost Contributors.
Parameters for constructing histogram index batches.
Definition data.h:244
Runtime context for XGBoost.
Definition context.h:84