55template<
typename DType>
63 virtual bool Next(
void) = 0;
65 virtual const DType &
Value(
void)
const = 0;
73template<
typename IndexType,
typename DType = real_t>
117 return value == NULL ? DType(1.0f) :
value[i];
137 return qid == NULL ? 0 : *
qid;
148 V sum =
static_cast<V
>(0);
150 for (
size_t i = 0; i <
length; ++i) {
151 CHECK(
index[i] < size) <<
"feature index exceed bound";
155 for (
size_t i = 0; i <
length; ++i) {
156 CHECK(
index[i] < size) <<
"feature index exceed bound";
174template<
typename IndexType,
typename DType = real_t>
200 size_t cost =
size * (
sizeof(size_t) +
sizeof(DType));
202 if (
qid != NULL) cost +=
size *
sizeof(size_t);
204 if (
field != NULL) cost += ndata *
sizeof(IndexType);
205 if (
index != NULL) cost += ndata *
sizeof(IndexType);
206 if (
value != NULL) cost += ndata *
sizeof(DType);
216 CHECK(begin <= end && end <=
size);
218 ret.size = end - begin;
219 ret.label =
label + begin;
221 ret.weight =
weight + begin;
226 ret.qid =
qid + begin;
230 ret.offset =
offset + begin;
253template<
typename IndexType,
typename DType = real_t>
292template <
typename IndexType,
typename DType = real_t>
316 (
const std::string& path,
317 const std::map<std::string, std::string>& args,
327template<
typename IndexType,
typename DType = real_t>
330 typename Parser<IndexType, DType>::Factory> {};
358#define DMLC_REGISTER_DATA_PARSER(IndexType, DataType, TypeName, FactoryFunction) \
359 DMLC_REGISTRY_REGISTER(ParserFactoryReg<IndexType __DMLC_COMMA DataType>, \
360 ParserFactoryReg ## _ ## IndexType ## _ ## DataType, TypeName) \
361 .set_body(FactoryFunction)
365template<
typename IndexType,
typename DType>
366inline Row<IndexType, DType>
370 inst.label = label + rowid;
371 if (weight != NULL) {
372 inst.weight = weight + rowid;
377 inst.qid = qid + rowid;
381 inst.length = offset[rowid + 1] - offset[rowid];
383 inst.field = field + offset[rowid];
387 inst.index = index + offset[rowid];
391 inst.value = value + offset[rowid];
data iterator interface this is not a C++ style iterator, but nice for data pulling:) This interface ...
Definition data.h:56
virtual bool Next(void)=0
move to next item
virtual void BeforeFirst(void)=0
set before first of the item
virtual const DType & Value(void) const =0
get current data
virtual ~DataIter(void) DMLC_THROW_EXCEPTION
destructor
Definition data.h:59
Common base class for function registry.
Definition registry.h:151
parser interface that parses input data used to load dmlc data format into your own data format Diffe...
Definition data.h:293
static Parser< IndexType, DType > * Create(const char *uri_, unsigned part_index, unsigned num_parts, const char *type)
create a new instance of parser based on the "type"
virtual size_t BytesRead(void) const =0
Data structure that holds the data Row block iterator interface that gets RowBlocks Difference betwee...
Definition data.h:254
static RowBlockIter< IndexType, DType > * Create(const char *uri, unsigned part_index, unsigned num_parts, const char *type)
create a new instance of iterator that returns rowbatch by default, a in-memory based iterator will b...
virtual size_t NumCol() const =0
one row of training instance
Definition data.h:74
IndexType get_field(size_t i) const
Definition data.h:101
real_t get_weight() const
Definition data.h:129
const IndexType * index
index of each instance
Definition data.h:91
DType get_value(size_t i) const
Definition data.h:116
const DType * label
label of the instance
Definition data.h:77
const real_t * weight
weight of the instance
Definition data.h:79
const uint64_t * qid
session-id of the instance
Definition data.h:81
size_t length
length of the sparse vector
Definition data.h:83
const DType * value
array value of each instance, this can be NULL indicating every value is set to be 1
Definition data.h:96
uint64_t get_qid() const
Definition data.h:136
IndexType get_index(size_t i) const
Definition data.h:108
V SDot(const V *weight, size_t size) const
helper function to compute dot product of current
Definition data.h:147
const IndexType * field
field of each instance
Definition data.h:87
DType get_label() const
Definition data.h:122
defines console logging options for xgboost. Use to enforce unified print behavior.
namespace for dmlc
Definition array_view.h:12
unsigned index_t
this defines the unsigned integer type that can normally be used to store feature index
Definition data.h:32
float real_t
this defines the float point that will be used to store feature values
Definition data.h:26
Macros common to all headers.
Registry utility that helps to build registry singletons.
Copyright 2014-2023, XGBoost Contributors.
registry entry of parser factory
Definition data.h:330
a block of data, containing several rows in sparse matrix This is useful for (streaming-sxtyle) algor...
Definition data.h:175
const DType * label
array[size] label of each instance
Definition data.h:181
size_t size
batch size
Definition data.h:177
const real_t * weight
With weight: array[size] label of each instance, otherwise nullptr.
Definition data.h:183
const IndexType * index
feature index
Definition data.h:189
const DType * value
feature value, can be NULL, indicating all values are 1
Definition data.h:191
const uint64_t * qid
With qid: array[size] session id of each instance, otherwise nullptr.
Definition data.h:185
const IndexType * field
field id
Definition data.h:187
Row< IndexType, DType > operator[](size_t rowid) const
get specific rows in the batch
Definition data.h:367
RowBlock Slice(size_t begin, size_t end) const
slice a RowBlock to get rows in [begin, end)
Definition data.h:215
const size_t * offset
array[size+1], row pointer to beginning of each rows
Definition data.h:179
size_t MemCostBytes(void) const
Definition data.h:199