8#ifndef DMLC_DATA_ROW_BLOCK_H_
9#define DMLC_DATA_ROW_BLOCK_H_
26template<
typename IndexType,
typename DType = real_t>
35 std::vector<uint64_t>
qid;
71 inline size_t Size(
void)
const {
76 return offset.size() *
sizeof(size_t) +
79 qid.size() *
sizeof(size_t) +
80 field.size() *
sizeof(IndexType) +
81 index.size() *
sizeof(IndexType) +
82 value.size() *
sizeof(DType);
91 label.push_back(row.get_label());
92 weight.push_back(row.get_weight());
93 qid.push_back(row.get_qid());
94 if (row.field != NULL) {
95 for (
size_t i = 0; i < row.length; ++i) {
96 CHECK_LE(row.field[i], std::numeric_limits<IndexType>::max())
97 <<
"field exceed numeric bound of current type";
98 IndexType field_id =
static_cast<IndexType
>(row.field[i]);
99 field.push_back(field_id);
103 for (
size_t i = 0; i < row.length; ++i) {
104 CHECK_LE(row.index[i], std::numeric_limits<IndexType>::max())
105 <<
"index exceed numeric bound of current type";
106 IndexType findex =
static_cast<IndexType
>(row.index[i]);
107 index.push_back(findex);
110 if (row.value != NULL) {
111 for (
size_t i = 0; i < row.length; ++i) {
112 value.push_back(row.value[i]);
124 size_t size =
label.size();
127 batch.
size *
sizeof(DType));
128 if (batch.
weight != NULL) {
131 if (batch.
qid != NULL) {
135 if (batch.
field != NULL) {
138 for (
size_t i = 0; i < ndata; ++i) {
139 CHECK_LE(batch.
field[i], std::numeric_limits<IndexType>::max())
140 <<
"field exceed numeric bound of current type";
141 IndexType field_id =
static_cast<IndexType
>(batch.
field[i]);
148 for (
size_t i = 0; i < ndata; ++i) {
149 CHECK_LE(batch.
index[i], std::numeric_limits<IndexType>::max())
150 <<
"index exceed numeric bound of current type";
151 IndexType findex =
static_cast<IndexType
>(batch.
index[i]);
155 if (batch.
value != NULL) {
158 ndata *
sizeof(DType));
160 size_t shift =
offset[size];
163 for (
size_t i = 0; i < batch.
size; ++i) {
164 ohead[i] = shift + batch.
offset[i + 1] - batch.
offset[0];
169template<
typename IndexType,
typename DType>
174 CHECK_EQ(label.size() + 1, offset.size());
176 CHECK_EQ(offset.back(), index.size());
177 CHECK(offset.back() == value.size() || value.size() == 0);
179 data.size = offset.size() - 1;
189template<
typename IndexType,
typename DType>
199 fo->Write(&max_field,
sizeof(IndexType));
200 fo->Write(&max_index,
sizeof(IndexType));
202template<
typename IndexType,
typename DType>
205 if (!fi->Read(&offset))
return false;
206 CHECK(fi->Read(&label)) <<
"Bad RowBlock format";
207 CHECK(fi->Read(&weight)) <<
"Bad RowBlock format";
208 CHECK(fi->Read(&qid)) <<
"Bad RowBlock format";
209 CHECK(fi->Read(&field)) <<
"Bad RowBlock format";
210 CHECK(fi->Read(&index)) <<
"Bad RowBlock format";
211 CHECK(fi->Read(&value)) <<
"Bad RowBlock format";
212 CHECK(fi->Read(&max_field,
sizeof(IndexType))) <<
"Bad RowBlock format";
213 CHECK(fi->Read(&max_index,
sizeof(IndexType))) <<
"Bad RowBlock format";
one row of training instance
Definition data.h:74
interface of stream I/O for serialization
Definition io.h:30
defines common input data structure, and interface for handling the input data
defines serializable interface of dmlc
defines logging macros of dmlc allows use of GLOG, fall back to internal implementation when disabled
namespace for dmlc
Definition array_view.h:12
float real_t
this defines the float point that will be used to store feature values
Definition data.h:26
T * BeginPtr(std::vector< T > &vec)
safely get the beginning address of a vector
Definition base.h:284
a block of data, containing several rows in sparse matrix This is useful for (streaming-sxtyle) algor...
Definition data.h:175
const DType * label
array[size] label of each instance
Definition data.h:181
size_t size
batch size
Definition data.h:177
const real_t * weight
With weight: array[size] label of each instance, otherwise nullptr.
Definition data.h:183
const IndexType * index
feature index
Definition data.h:189
const DType * value
feature value, can be NULL, indicating all values are 1
Definition data.h:191
const uint64_t * qid
With qid: array[size] session id of each instance, otherwise nullptr.
Definition data.h:185
const IndexType * field
field id
Definition data.h:187
const size_t * offset
array[size+1], row pointer to beginning of each rows
Definition data.h:179
dynamic data structure that holds a row block of data
Definition row_block.h:27
void Save(Stream *fo) const
write the row block to a binary stream
Definition row_block.h:191
std::vector< IndexType > index
feature index
Definition row_block.h:39
IndexType max_index
maximum value of index
Definition row_block.h:45
size_t Size(void) const
size of the data
Definition row_block.h:71
std::vector< size_t > offset
array[size+1], row pointer to beginning of each rows
Definition row_block.h:29
RowBlock< IndexType, DType > GetBlock(void) const
convert to a row block
Definition row_block.h:171
std::vector< IndexType > field
field index
Definition row_block.h:37
std::vector< uint64_t > qid
array[size] session-id of each instance
Definition row_block.h:35
void Push(Row< I, DType > row)
push the row into container
Definition row_block.h:90
bool Load(Stream *fi)
load row block from a binary stream
Definition row_block.h:204
std::vector< DType > value
feature value
Definition row_block.h:41
void Push(RowBlock< I, DType > batch)
push the row block into container
Definition row_block.h:123
void Clear(void)
clear the container
Definition row_block.h:64
IndexType max_field
maximum value of field
Definition row_block.h:43
size_t MemCostBytes(void) const
Definition row_block.h:75
std::vector< real_t > weight
array[size] weight of each instance
Definition row_block.h:33
std::vector< DType > label
array[size] label of each instance
Definition row_block.h:31