Medial Code Documentation
Loading...
Searching...
No Matches
Public Member Functions | Protected Member Functions | Static Protected Member Functions
dmlc::data::TextParserBase< IndexType, DType > Class Template Referenceabstract

Text parser that parses the input lines and returns rows in input data. More...

#include <text_parser.h>

Inheritance diagram for dmlc::data::TextParserBase< IndexType, DType >:
dmlc::data::ParserImpl< IndexType, DType > dmlc::Parser< IndexType, DType > dmlc::DataIter< DType > dmlc::data::CSVParser< IndexType, real_t > dmlc::data::LibFMParser< IndexType, real_t > dmlc::data::LibSVMParser< IndexType, real_t > dmlc::data::CSVParser< IndexType, DType > dmlc::data::LibFMParser< IndexType, DType > dmlc::data::LibSVMParser< IndexType, DType > parser_test::CSVParserTest< IndexType, DType > parser_test::LibFMParserTest< IndexType, DType > parser_test::LibSVMParserTest< IndexType, DType >

Public Member Functions

 TextParserBase (InputSplit *source, int nthread)
 
virtual void BeforeFirst (void)
 set before first of the item
 
virtual size_t BytesRead (void) const
 
virtual bool ParseNext (std::vector< RowBlockContainer< IndexType, DType > > *data)
 read in next several blocks of data
 
- Public Member Functions inherited from dmlc::data::ParserImpl< IndexType, DType >
virtual bool Next (void)
 implement next
 
virtual const RowBlock< IndexType, DType > & Value (void) const
 get current data
 
- Public Member Functions inherited from dmlc::Parser< IndexType, DType >
Parser< uint32_t, real_t > * Create (const char *uri_, unsigned part_index, unsigned num_parts, const char *type)
 
Parser< uint64_t, real_t > * Create (const char *uri_, unsigned part_index, unsigned num_parts, const char *type)
 
Parser< uint32_t, int32_t > * Create (const char *uri_, unsigned part_index, unsigned num_parts, const char *type)
 
Parser< uint64_t, int32_t > * Create (const char *uri_, unsigned part_index, unsigned num_parts, const char *type)
 
Parser< uint32_t, int64_t > * Create (const char *uri_, unsigned part_index, unsigned num_parts, const char *type)
 
Parser< uint64_t, int64_t > * Create (const char *uri_, unsigned part_index, unsigned num_parts, const char *type)
 
- Public Member Functions inherited from dmlc::DataIter< DType >
virtual ~DataIter (void) DMLC_THROW_EXCEPTION
 destructor
 

Protected Member Functions

virtual void ParseBlock (const char *begin, const char *end, RowBlockContainer< IndexType, DType > *out)=0
 parse data into out
 
bool FillData (std::vector< RowBlockContainer< IndexType, DType > > *data)
 read in next several blocks of data
 

Static Protected Member Functions

static const char * BackFindEndLine (const char *bptr, const char *begin)
 start from bptr, go backward and find first endof line
 
static void IgnoreUTF8BOM (const char **begin, const char **end)
 Ignore UTF-8 BOM if present.
 

Additional Inherited Members

- Public Types inherited from dmlc::Parser< IndexType, DType >
typedef Parser< IndexType, DType > *(* Factory) (const std::string &path, const std::map< std::string, std::string > &args, unsigned part_index, unsigned num_parts)
 Factory type of the parser.
 
- Static Public Member Functions inherited from dmlc::Parser< IndexType, DType >
static Parser< IndexType, DType > * Create (const char *uri_, unsigned part_index, unsigned num_parts, const char *type)
 create a new instance of parser based on the "type"
 
- Protected Attributes inherited from dmlc::data::ParserImpl< IndexType, DType >
IndexType data_ptr_
 pointer to begin and end of data
 
IndexType data_end_
 
std::vector< RowBlockContainer< IndexType, DType > > data_
 internal data
 
RowBlock< IndexType, DType > block_
 internal row block
 

Detailed Description

template<typename IndexType, typename DType = real_t>
class dmlc::data::TextParserBase< IndexType, DType >

Text parser that parses the input lines and returns rows in input data.

Member Function Documentation

◆ BackFindEndLine()

template<typename IndexType , typename DType = real_t>
static const char * dmlc::data::TextParserBase< IndexType, DType >::BackFindEndLine ( const char *  bptr,
const char *  begin 
)
inlinestaticprotected

start from bptr, go backward and find first endof line

Parameters
bptrend position to go backward
beginthe beginning position of buffer
Returns
position of first endof line going backward, returns begin if not found

◆ BeforeFirst()

template<typename IndexType , typename DType = real_t>
virtual void dmlc::data::TextParserBase< IndexType, DType >::BeforeFirst ( void  )
inlinevirtual

set before first of the item

Implements dmlc::DataIter< DType >.

◆ BytesRead()

template<typename IndexType , typename DType = real_t>
virtual size_t dmlc::data::TextParserBase< IndexType, DType >::BytesRead ( void  ) const
inlinevirtual
Returns
size of bytes read so far

Implements dmlc::data::ParserImpl< IndexType, DType >.

◆ FillData()

template<typename IndexType , typename DType >
bool dmlc::data::TextParserBase< IndexType, DType >::FillData ( std::vector< RowBlockContainer< IndexType, DType > > *  data)
inlineprotected

read in next several blocks of data

Parameters
datavector of data to be returned
Returns
true if the data is loaded, false if reach end

◆ IgnoreUTF8BOM()

template<typename IndexType , typename DType = real_t>
static void dmlc::data::TextParserBase< IndexType, DType >::IgnoreUTF8BOM ( const char **  begin,
const char **  end 
)
inlinestaticprotected

Ignore UTF-8 BOM if present.

Parameters
beginreference to begin pointer
endreference to end pointer

◆ ParseBlock()

template<typename IndexType , typename DType = real_t>
virtual void dmlc::data::TextParserBase< IndexType, DType >::ParseBlock ( const char *  begin,
const char *  end,
RowBlockContainer< IndexType, DType > *  out 
)
protectedpure virtual

◆ ParseNext()

template<typename IndexType , typename DType = real_t>
virtual bool dmlc::data::TextParserBase< IndexType, DType >::ParseNext ( std::vector< RowBlockContainer< IndexType, DType > > *  data)
inlinevirtual

read in next several blocks of data

Parameters
datavector of data to be returned
Returns
true if the data is loaded, false if reach end

Implements dmlc::data::ParserImpl< IndexType, DType >.


The documentation for this class was generated from the following file: