Medial Code Documentation
Loading...
Searching...
No Matches
single_threaded_input_split.h
1// Copyright by contributors
2#ifndef DMLC_IO_SINGLE_THREADED_INPUT_SPLIT_H_
3#define DMLC_IO_SINGLE_THREADED_INPUT_SPLIT_H_
4
5#include <dmlc/threadediter.h>
6#include <dmlc/base.h>
7#include <algorithm>
9
10namespace dmlc {
11namespace io {
19 public:
21 const size_t batch_size)
22 : buffer_size_(InputSplitBase::kBufferSize), batch_size_(batch_size),
23 base_(base), tmp_chunk_(NULL) {}
24 bool NextProducer(InputSplitBase::Chunk **dptr) {
25 if (*dptr == NULL) {
26 *dptr = new InputSplitBase::Chunk(buffer_size_);
27 }
28 return base_->NextBatchEx(*dptr, batch_size_);
29 }
30 void BeforeFirstProducer() { base_->BeforeFirst(); }
31 virtual ~SingleThreadedInputSplit(void) {
32 delete tmp_chunk_;
33 delete base_;
34 }
35 virtual void BeforeFirst() {
36 BeforeFirstProducer();
37 if (tmp_chunk_ != NULL) {
38 tmp_chunk_ = NULL;
39 }
40 }
41 virtual void HintChunkSize(size_t chunk_size) {
42 buffer_size_ = std::max(chunk_size / sizeof(uint32_t), buffer_size_);
43 }
44
45 virtual bool NextRecord(Blob *out_rec) {
46 if (tmp_chunk_ == NULL) {
47 if (!NextProducer(&tmp_chunk_))
48 return false;
49 }
50 while (!base_->ExtractNextRecord(out_rec, tmp_chunk_)) {
51 tmp_chunk_ = NULL;
52 if (!NextProducer(&tmp_chunk_))
53 return false;
54 }
55 return true;
56 }
57
58 virtual bool NextChunk(Blob *out_chunk) {
59 if (tmp_chunk_ == NULL) {
60 if (!NextProducer(&tmp_chunk_))
61 return false;
62 }
63 while (!base_->ExtractNextChunk(out_chunk, tmp_chunk_)) {
64 tmp_chunk_ = NULL;
65 if (!NextProducer(&tmp_chunk_))
66 return false;
67 }
68 return true;
69 }
70
71 virtual size_t GetTotalSize(void) { return base_->GetTotalSize(); }
72
73 virtual void ResetPartition(unsigned part_index, unsigned num_parts) {
74 base_->ResetPartition(part_index, num_parts);
75 this->BeforeFirst();
76 }
77
78 private:
79 size_t buffer_size_;
80 size_t batch_size_;
81 InputSplitBase *base_;
82 InputSplitBase::Chunk *tmp_chunk_;
83};
84} // namespace io
85} // namespace dmlc
86
87#endif // DMLC_IO_SINGLE_THREADED_INPUT_SPLIT_H_
input split creates that allows reading of records from split of data, independent part that covers a...
Definition io.h:155
class to construct input split from multiple files
Definition input_split_base.h:21
virtual bool NextBatchEx(Chunk *chunk, size_t)
fill the given chunk with new batch of data without using internal temporary chunk
Definition input_split_base.h:115
virtual size_t GetTotalSize(void)
get the total size of the InputSplit
Definition input_split_base.h:48
bool ExtractNextChunk(Blob *out_rchunk, Chunk *chunk)
extract next chunk from the chunk
Definition input_split_base.cc:300
virtual void BeforeFirst(void)
reset the position of InputSplit to beginning
Definition input_split_base.cc:66
virtual bool ExtractNextRecord(Blob *out_rec, Chunk *chunk)=0
extract next record from the chunk
virtual void ResetPartition(unsigned rank, unsigned nsplit)
reset the Input split to a certain part id, The InputSplit will be pointed to the head of the new spe...
Definition input_split_base.cc:30
provides a single threaded input split Useful for debugging purposes. Be cautious of use for producti...
Definition single_threaded_input_split.h:18
virtual void ResetPartition(unsigned part_index, unsigned num_parts)
reset the Input split to a certain part id, The InputSplit will be pointed to the head of the new spe...
Definition single_threaded_input_split.h:73
virtual bool NextRecord(Blob *out_rec)
get the next record, the returning value is valid until next call to NextRecord, NextChunk or NextBat...
Definition single_threaded_input_split.h:45
virtual size_t GetTotalSize(void)
get the total size of the InputSplit
Definition single_threaded_input_split.h:71
virtual void BeforeFirst()
reset the position of InputSplit to beginning
Definition single_threaded_input_split.h:35
virtual bool NextChunk(Blob *out_chunk)
get a chunk of memory that can contain multiple records, the caller needs to parse the content of the...
Definition single_threaded_input_split.h:58
virtual void HintChunkSize(size_t chunk_size)
hint the inputsplit how large the chunk size it should return when implementing NextChunk this is a h...
Definition single_threaded_input_split.h:41
defines configuration macros
base class to construct input split from multiple files
namespace for dmlc
Definition array_view.h:12
a blob of memory region
Definition io.h:158
helper struct to hold chunk data with internal pointer to move along the record
Definition input_split_base.h:27
thread backed iterator that can be used to implement general thread-based pipeline such as prefetch a...