Medial Code Documentation
Loading...
Searching...
No Matches
parser.h
1
7#ifndef DMLC_DATA_PARSER_H_
8#define DMLC_DATA_PARSER_H_
9
10#include <dmlc/base.h>
11#include <dmlc/logging.h>
12#include <dmlc/threadediter.h>
13#include <vector>
14#include "./row_block.h"
15
16namespace dmlc {
17namespace data {
19template <typename IndexType, typename DType>
23template <typename IndexType, typename DType = real_t>
24class ParserImpl : public Parser<IndexType, DType> {
25 public:
26 ParserImpl() : data_ptr_(0), data_end_(0) {}
27 // virtual destructor
28 virtual ~ParserImpl() {}
30 virtual bool Next(void) {
31 while (true) {
32 while (data_ptr_ < data_end_) {
33 data_ptr_ += 1;
34 if (data_[data_ptr_ - 1].Size() != 0) {
35 block_ = data_[data_ptr_ - 1].GetBlock();
36 return true;
37 }
38 }
39 if (!ParseNext(&data_)) break;
40 data_ptr_ = 0;
41 data_end_ = static_cast<IndexType>(data_.size());
42 }
43 return false;
44 }
45 virtual const RowBlock<IndexType, DType> &Value(void) const {
46 return block_;
47 }
49 virtual size_t BytesRead(void) const = 0;
50
51 protected:
52 // allow ThreadedParser to see ParseNext
53 friend class ThreadedParser<IndexType, DType>;
59 virtual bool ParseNext(std::vector<RowBlockContainer<IndexType, DType> > *data) = 0;
61 IndexType data_ptr_, data_end_;
63 std::vector<RowBlockContainer<IndexType, DType> > data_;
66};
67
68#if DMLC_ENABLE_STD_THREAD
69
70template <typename IndexType, typename DType = real_t>
71class ThreadedParser : public ParserImpl<IndexType, DType> {
72 public:
74 : base_(base), tmp_(NULL) {
75 iter_.set_max_capacity(8);
76 iter_.Init([base](std::vector<RowBlockContainer<IndexType, DType> > **dptr) {
77 if (*dptr == NULL) {
78 *dptr = new std::vector<RowBlockContainer<IndexType, DType> >();
79 }
80 return base->ParseNext(*dptr);
81 }, [base]() {base->BeforeFirst();});
82 }
83 virtual ~ThreadedParser(void) {
84 // stop things before base is deleted
85 iter_.Destroy();
86 delete base_;
87 delete tmp_;
88 }
89 virtual void BeforeFirst() {
90 iter_.BeforeFirst();
91 }
93 using ParserImpl<IndexType, DType>::data_ptr_;
94 using ParserImpl<IndexType, DType>::data_end_;
95 virtual bool Next(void) {
96 while (true) {
97 while (data_ptr_ < data_end_) {
98 data_ptr_ += 1;
99 if ((*tmp_)[data_ptr_ - 1].Size() != 0) {
100 this->block_ = (*tmp_)[data_ptr_ - 1].GetBlock();
101 return true;
102 }
103 }
104 if (tmp_ != NULL) iter_.Recycle(&tmp_);
105 if (!iter_.Next(&tmp_)) break;
106 data_ptr_ = 0; data_end_ = tmp_->size();
107 }
108 return false;
109 }
110 virtual size_t BytesRead() const {
111 return base_->BytesRead();
112 }
113
114 protected:
115 virtual bool ParseNext(std::vector<RowBlockContainer<IndexType, DType>> * /*data*/) {
116 LOG(FATAL) << "cannot call ParseNext";
117 return false;
118 }
119
120 private:
122 Parser<IndexType, DType> *base_;
124 ThreadedIter<std::vector<RowBlockContainer<IndexType, DType> > > iter_;
126 std::vector<RowBlockContainer<IndexType, DType> > *tmp_;
127};
128#endif // DMLC_USE_CXX11
129} // namespace data
130} // namespace dmlc
131#endif // DMLC_DATA_PARSER_H_
virtual void BeforeFirst(void)=0
set before first of the item
parser interface that parses input data used to load dmlc data format into your own data format Diffe...
Definition data.h:293
base class for parser to parse data
Definition parser.h:24
virtual bool ParseNext(std::vector< RowBlockContainer< IndexType, DType > > *data)=0
read in next several blocks of data
virtual size_t BytesRead(void) const =0
std::vector< RowBlockContainer< IndexType, DType > > data_
internal data
Definition parser.h:63
RowBlock< IndexType, DType > block_
internal row block
Definition parser.h:65
IndexType data_ptr_
pointer to begin and end of data
Definition parser.h:61
virtual bool Next(void)
implement next
Definition parser.h:30
virtual const RowBlock< IndexType, DType > & Value(void) const
get current data
Definition parser.h:45
declare thread class
Definition parser.h:20
defines configuration macros
defines logging macros of dmlc allows use of GLOG, fall back to internal implementation when disabled
namespace for dmlc
Definition array_view.h:12
additional data structure to support RowBlock data structure
a block of data, containing several rows in sparse matrix This is useful for (streaming-sxtyle) algor...
Definition data.h:175
dynamic data structure that holds a row block of data
Definition row_block.h:27
thread backed iterator that can be used to implement general thread-based pipeline such as prefetch a...