Medial Code Documentation
Loading...
Searching...
No Matches
line_split.cc
1// Copyright by Contributors
2#include <dmlc/io.h>
3#include <dmlc/logging.h>
4#include <algorithm>
5#include "./line_split.h"
6
7namespace dmlc {
8namespace io {
10 char c = '\0';
11 size_t nstep = 0;
12 // search till fist end-of-line
13 while (true) {
14 if (fi->Read(&c, sizeof(c)) == 0) return nstep;
15 nstep += 1;
16 if (c == '\n' || c == '\r') break;
17 }
18 // search until first non-endofline
19 while (true) {
20 if (fi->Read(&c, sizeof(c)) == 0) return nstep;
21 if (c != '\n' && c != '\r') break;
22 // non-end-of-line should not count
23 nstep += 1;
24 }
25 return nstep;
26}
27const char* LineSplitter::FindLastRecordBegin(const char *begin,
28 const char *end) {
29 CHECK(begin != end);
30 for (const char *p = end - 1; p != begin; --p) {
31 if (*p == '\n' || *p == '\r') return p + 1;
32 }
33 return begin;
34}
35
37 if (chunk->begin == chunk->end) return false;
38 char *p;
39 for (p = chunk->begin; p != chunk->end; ++p) {
40 if (*p == '\n' || *p == '\r') break;
41 }
42 for (; p != chunk->end; ++p) {
43 if (*p != '\n' && *p != '\r') break;
44 }
45 // set the string end sign for safety
46 if (p == chunk->end) {
47 *p = '\0';
48 } else {
49 *(p - 1) = '\0';
50 }
51 out_rec->dptr = chunk->begin;
52 out_rec->size = p - chunk->begin;
53 chunk->begin = p;
54 return true;
55}
56
57} // namespace io
58} // namespace dmlc
interface of stream I/O for serialization
Definition io.h:30
virtual size_t SeekRecordBegin(Stream *fi)
seek to the beginning of the first record in current file pointer
Definition line_split.cc:9
virtual const char * FindLastRecordBegin(const char *begin, const char *end)
find the last occurance of record header
Definition line_split.cc:27
virtual bool ExtractNextRecord(Blob *out_rec, Chunk *chunk)
extract next record from the chunk
Definition line_split.cc:36
defines serializable interface of dmlc
defines logging macros of dmlc allows use of GLOG, fall back to internal implementation when disabled
base class implementation of input splitter
namespace for dmlc
Definition array_view.h:12
a blob of memory region
Definition io.h:158
size_t size
size of the memory region
Definition io.h:162
void * dptr
points to start of the memory region
Definition io.h:160
helper struct to hold chunk data with internal pointer to move along the record
Definition input_split_base.h:27