Medial Code Documentation
Loading...
Searching...
No Matches
recordio_split.h
Go to the documentation of this file.
1
7#ifndef DMLC_IO_RECORDIO_SPLIT_H_
8#define DMLC_IO_RECORDIO_SPLIT_H_
9
10#include <dmlc/io.h>
11#include <dmlc/recordio.h>
12#include <vector>
13#include <cstdio>
14#include <string>
15#include <cstring>
16#include "./input_split_base.h"
17
18namespace dmlc {
19namespace io {
22 public:
23 RecordIOSplitter(FileSystem *fs,
24 const char *uri,
25 unsigned rank,
26 unsigned nsplit,
27 const bool recurse_directories) {
28 this->Init(fs, uri, 4, recurse_directories);
29 this->ResetPartition(rank, nsplit);
30 }
31
32 bool IsTextParser(void) {
33 return false;
34 }
35 virtual bool ExtractNextRecord(Blob *out_rec, Chunk *chunk);
36
37 protected:
38 virtual size_t SeekRecordBegin(Stream *fi);
39 virtual const char*
40 FindLastRecordBegin(const char *begin, const char *end);
41};
42} // namespace io
43} // namespace dmlc
44#endif // DMLC_IO_RECORDIO_SPLIT_H_
interface of stream I/O for serialization
Definition io.h:30
class to construct input split from multiple files
Definition input_split_base.h:21
void Init(FileSystem *fs, const char *uri, size_t align_bytes, const bool recurse_directories=false)
intialize the base before doing anything
Definition input_split_base.cc:13
virtual void ResetPartition(unsigned rank, unsigned nsplit)
reset the Input split to a certain part id, The InputSplit will be pointed to the head of the new spe...
Definition input_split_base.cc:30
class that split the files by line
Definition recordio_split.h:21
virtual size_t SeekRecordBegin(Stream *fi)
seek to the beginning of the first record in current file pointer
Definition recordio_split.cc:9
virtual const char * FindLastRecordBegin(const char *begin, const char *end)
find the last occurance of record header
Definition recordio_split.cc:26
bool IsTextParser(void)
query whether this object is a text parser
Definition recordio_split.h:32
virtual bool ExtractNextRecord(Blob *out_rec, Chunk *chunk)
extract next record from the chunk
Definition recordio_split.cc:44
defines serializable interface of dmlc
base class to construct input split from multiple files
namespace for dmlc
Definition array_view.h:12
recordio that is able to pack binary data into a splittable format, useful to exchange data in binary...