Medial Code Documentation
Loading...
Searching...
No Matches
recordio.h
Go to the documentation of this file.
1
8#ifndef DMLC_RECORDIO_H_
9#define DMLC_RECORDIO_H_
10#include <cstring>
11#include <string>
12#include "./io.h"
13#include "./logging.h"
14
15namespace dmlc {
39 public:
45 static const uint32_t kMagic = 0xced7230a;
52 inline static uint32_t EncodeLRec(uint32_t cflag, uint32_t length) {
53 return (cflag << 29U) | length;
54 }
60 inline static uint32_t DecodeFlag(uint32_t rec) {
61 return (rec >> 29U) & 7U;
62 }
68 inline static uint32_t DecodeLength(uint32_t rec) {
69 return rec & ((1U << 29U) - 1U);
70 }
75 explicit RecordIOWriter(Stream *stream)
76 : stream_(stream), seek_stream_(dynamic_cast<SeekStream*>(stream)),
77 except_counter_(0) {
78 CHECK(sizeof(uint32_t) == 4) << "uint32_t needs to be 4 bytes";
79 }
85 void WriteRecord(const void *buf, size_t size);
90 inline void WriteRecord(const std::string &data) {
91 this->WriteRecord(data.c_str(), data.length());
92 }
97 inline size_t except_counter(void) const {
98 return except_counter_;
99 }
100
102 inline size_t Tell(void) {
103 CHECK(seek_stream_ != NULL) << "The input stream is not seekable";
104 return seek_stream_->Tell();
105 }
106
107 private:
109 Stream *stream_;
111 SeekStream *seek_stream_;
113 size_t except_counter_;
114};
120 public:
125 explicit RecordIOReader(Stream *stream)
126 : stream_(stream), seek_stream_(dynamic_cast<SeekStream*>(stream)),
127 end_of_stream_(false) {
128 CHECK(sizeof(uint32_t) == 4) << "uint32_t needs to be 4 bytes";
129 }
135 bool NextRecord(std::string *out_rec);
136
138 inline void Seek(size_t pos) {
139 CHECK(seek_stream_ != NULL) << "The input stream is not seekable";
140 seek_stream_->Seek(pos);
141 }
142
144 inline size_t Tell(void) {
145 CHECK(seek_stream_ != NULL) << "The input stream is not seekable";
146 return seek_stream_->Tell();
147 }
148
149 private:
151 Stream *stream_;
152 SeekStream *seek_stream_;
154 bool end_of_stream_;
155};
156
167 public:
175 unsigned part_index = 0,
176 unsigned num_parts = 1);
186 bool NextRecord(InputSplit::Blob *out_rec);
187
188 private:
190 std::string temp_;
192 char *pbegin_, *pend_;
193};
194
195} // namespace dmlc
196#endif // DMLC_RECORDIO_H_
reader of binary recordio from Blob returned by InputSplit This class divides the blob into several i...
Definition recordio.h:166
bool NextRecord(InputSplit::Blob *out_rec)
read next complete record from stream the blob contains the memory content NOTE: this function is not...
Definition recordio.cc:114
reader of binary recordio to reads in record from stream
Definition recordio.h:119
RecordIOReader(Stream *stream)
constructor
Definition recordio.h:125
bool NextRecord(std::string *out_rec)
read next complete record from stream
Definition recordio.cc:53
void Seek(size_t pos)
seek to certain position of the input stream
Definition recordio.h:138
size_t Tell(void)
tell the current position of the input stream
Definition recordio.h:144
writer of binary recordio binary format for recordio recordio format: magic lrecord data pad
Definition recordio.h:38
static const uint32_t kMagic
magic number of recordio note: (kMagic >> 29U) & 7 > 3 this ensures lrec will not be kMagic
Definition recordio.h:45
static uint32_t DecodeFlag(uint32_t rec)
decode the flag part of lrecord
Definition recordio.h:60
size_t except_counter(void) const
Definition recordio.h:97
static uint32_t DecodeLength(uint32_t rec)
decode the length part of lrecord
Definition recordio.h:68
void WriteRecord(const std::string &data)
write record to the stream
Definition recordio.h:90
size_t Tell(void)
tell the current position of the input stream
Definition recordio.h:102
static uint32_t EncodeLRec(uint32_t cflag, uint32_t length)
encode the lrecord
Definition recordio.h:52
RecordIOWriter(Stream *stream)
constructor
Definition recordio.h:75
void WriteRecord(const void *buf, size_t size)
write record to the stream
Definition recordio.cc:11
interface of i/o stream that support seek
Definition io.h:109
virtual void Seek(size_t pos)=0
seek to certain position of the file
virtual size_t Tell(void)=0
tell the position of the stream
interface of stream I/O for serialization
Definition io.h:30
defines console logging options for xgboost. Use to enforce unified print behavior.
namespace for dmlc
Definition array_view.h:12
Copyright 2014-2023, XGBoost Contributors.
a blob of memory region
Definition io.h:158