4#ifndef XGBOOST_DATA_FILE_ITERATOR_H_
5#define XGBOOST_DATA_FILE_ITERATOR_H_
20[[nodiscard]] std::string ValidateFileFormat(std::string
const& uri);
39 std::unique_ptr<dmlc::Parser<uint32_t>> parser_;
48 FileIterator(std::string uri,
unsigned part_index,
unsigned num_parts)
49 : uri_{ValidateFileFormat(std::move(uri))}, part_idx_{part_index}, n_parts_{num_parts} {
58 if (parser_->Next()) {
59 row_block_ = parser_->Value();
62 indptr_ = ArrayInterfaceStr(MakeVec(row_block_.
offset, row_block_.
size + 1));
63 values_ = ArrayInterfaceStr(MakeVec(row_block_.
value, row_block_.
offset[row_block_.
size]));
64 indices_ = ArrayInterfaceStr(MakeVec(row_block_.
index, row_block_.
offset[row_block_.
size]));
66 size_t n_columns = *std::max_element(row_block_.
index,
73 values_.c_str(), n_columns);
75 if (row_block_.
label) {
92 auto Proxy() ->
decltype(proxy_) {
return proxy_; }
100inline void Reset(DataIterHandle self) {
104inline int Next(DataIterHandle self) {
105 return static_cast<FileIterator*
>(self)->Next();
parser interface that parses input data used to load dmlc data format into your own data format Diffe...
Definition data.h:293
An iterator for implementing external memory support with file inputs.
Definition file_iterator.h:27
defines common input data structure, and interface for handling the input data
XGB_DLL int XGDMatrixSetDenseInfo(DMatrixHandle handle, const char *field, void const *data, bst_ulong size, int type)
Set meta info from dense matrix. Valid field names are:
Definition c_api.cc:672
XGB_DLL int XGDMatrixFree(DMatrixHandle handle)
free space in data matrix
Definition c_api.cc:585
XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle *out)
Second set of callback functions, used by constructing Quantile DMatrix or external memory DMatrix us...
Definition c_api.cc:359
XGB_DLL int XGProxyDMatrixSetDataCSR(DMatrixHandle handle, char const *indptr, char const *indices, char const *data, bst_ulong ncol)
Set data on a DMatrix proxy.
Definition c_api.cc:406
defines console logging options for xgboost. Use to enforce unified print behavior.
Copyright 2021-2023 by XGBoost Contributors.
Copyright 2019-2023, XGBoost Contributors.
Definition data.py:1
auto MakeVec(T *ptr, size_t s, int32_t device=-1)
Create a vector view from contigious memory.
Definition linalg.h:649
a block of data, containing several rows in sparse matrix This is useful for (streaming-sxtyle) algor...
Definition data.h:175
const DType * label
array[size] label of each instance
Definition data.h:181
size_t size
batch size
Definition data.h:177
const real_t * weight
With weight: array[size] label of each instance, otherwise nullptr.
Definition data.h:183
const IndexType * index
feature index
Definition data.h:189
const DType * value
feature value, can be NULL, indicating all values are 1
Definition data.h:191
const uint64_t * qid
With qid: array[size] session id of each instance, otherwise nullptr.
Definition data.h:185
const size_t * offset
array[size+1], row pointer to beginning of each rows
Definition data.h:179
Copyright 2015~2023 by XGBoost Contributors.