7#ifndef DMLC_DATA_PARSER_H_
8#define DMLC_DATA_PARSER_H_
19template <
typename IndexType,
typename DType>
23template <
typename IndexType,
typename DType = real_t>
41 data_end_ =
static_cast<IndexType
>(
data_.size());
63 std::vector<RowBlockContainer<IndexType, DType> >
data_;
68#if DMLC_ENABLE_STD_THREAD
70template <
typename IndexType,
typename DType = real_t>
74 : base_(base), tmp_(NULL) {
75 iter_.set_max_capacity(8);
78 *dptr =
new std::vector<RowBlockContainer<IndexType, DType> >();
83 virtual ~ThreadedParser(
void) {
89 virtual void BeforeFirst() {
93 using ParserImpl<IndexType, DType>::data_ptr_;
94 using ParserImpl<IndexType, DType>::data_end_;
95 virtual bool Next(
void) {
97 while (data_ptr_ < data_end_) {
99 if ((*tmp_)[data_ptr_ - 1].Size() != 0) {
100 this->block_ = (*tmp_)[data_ptr_ - 1].GetBlock();
104 if (tmp_ != NULL) iter_.Recycle(&tmp_);
105 if (!iter_.Next(&tmp_))
break;
106 data_ptr_ = 0; data_end_ = tmp_->size();
110 virtual size_t BytesRead()
const {
111 return base_->BytesRead();
115 virtual bool ParseNext(std::vector<RowBlockContainer<IndexType, DType>> * ) {
116 LOG(FATAL) <<
"cannot call ParseNext";
122 Parser<IndexType, DType> *base_;
124 ThreadedIter<std::vector<RowBlockContainer<IndexType, DType> > > iter_;
126 std::vector<RowBlockContainer<IndexType, DType> > *tmp_;
virtual void BeforeFirst(void)=0
set before first of the item
parser interface that parses input data used to load dmlc data format into your own data format Diffe...
Definition data.h:293
base class for parser to parse data
Definition parser.h:24
virtual bool ParseNext(std::vector< RowBlockContainer< IndexType, DType > > *data)=0
read in next several blocks of data
virtual size_t BytesRead(void) const =0
std::vector< RowBlockContainer< IndexType, DType > > data_
internal data
Definition parser.h:63
RowBlock< IndexType, DType > block_
internal row block
Definition parser.h:65
IndexType data_ptr_
pointer to begin and end of data
Definition parser.h:61
virtual bool Next(void)
implement next
Definition parser.h:30
virtual const RowBlock< IndexType, DType > & Value(void) const
get current data
Definition parser.h:45
declare thread class
Definition parser.h:20
defines configuration macros
defines logging macros of dmlc allows use of GLOG, fall back to internal implementation when disabled
namespace for dmlc
Definition array_view.h:12
additional data structure to support RowBlock data structure
a block of data, containing several rows in sparse matrix This is useful for (streaming-sxtyle) algor...
Definition data.h:175
dynamic data structure that holds a row block of data
Definition row_block.h:27
thread backed iterator that can be used to implement general thread-based pipeline such as prefetch a...