Medial Code Documentation
|
CSVParser, parses a dense csv format. All columns are treated as real dense data. Label will be empty if the label column is not specified. More...
#include <csv_parser.h>
Public Member Functions | |
CSVParser (InputSplit *source, const std::map< std::string, std::string > &args, int nthread) | |
![]() | |
TextParserBase (InputSplit *source, int nthread) | |
virtual void | BeforeFirst (void) |
set before first of the item | |
virtual size_t | BytesRead (void) const |
virtual bool | ParseNext (std::vector< RowBlockContainer< IndexType, DType > > *data) |
read in next several blocks of data | |
![]() | |
virtual bool | Next (void) |
implement next | |
virtual const RowBlock< IndexType, DType > & | Value (void) const |
get current data | |
![]() | |
Parser< uint32_t, real_t > * | Create (const char *uri_, unsigned part_index, unsigned num_parts, const char *type) |
Parser< uint64_t, real_t > * | Create (const char *uri_, unsigned part_index, unsigned num_parts, const char *type) |
Parser< uint32_t, int32_t > * | Create (const char *uri_, unsigned part_index, unsigned num_parts, const char *type) |
Parser< uint64_t, int32_t > * | Create (const char *uri_, unsigned part_index, unsigned num_parts, const char *type) |
Parser< uint32_t, int64_t > * | Create (const char *uri_, unsigned part_index, unsigned num_parts, const char *type) |
Parser< uint64_t, int64_t > * | Create (const char *uri_, unsigned part_index, unsigned num_parts, const char *type) |
![]() | |
virtual | ~DataIter (void) DMLC_THROW_EXCEPTION |
destructor | |
Protected Member Functions | |
virtual void | ParseBlock (const char *begin, const char *end, RowBlockContainer< IndexType, DType > *out) |
parse data into out | |
![]() | |
bool | FillData (std::vector< RowBlockContainer< IndexType, DType > > *data) |
read in next several blocks of data | |
Additional Inherited Members | |
![]() | |
typedef Parser< IndexType, DType > *(* | Factory) (const std::string &path, const std::map< std::string, std::string > &args, unsigned part_index, unsigned num_parts) |
Factory type of the parser. | |
![]() | |
static Parser< IndexType, DType > * | Create (const char *uri_, unsigned part_index, unsigned num_parts, const char *type) |
create a new instance of parser based on the "type" | |
![]() | |
static const char * | BackFindEndLine (const char *bptr, const char *begin) |
start from bptr, go backward and find first endof line | |
static void | IgnoreUTF8BOM (const char **begin, const char **end) |
Ignore UTF-8 BOM if present. | |
![]() | |
IndexType | data_ptr_ |
pointer to begin and end of data | |
IndexType | data_end_ |
std::vector< RowBlockContainer< IndexType, DType > > | data_ |
internal data | |
RowBlock< IndexType, DType > | block_ |
internal row block | |
CSVParser, parses a dense csv format. All columns are treated as real dense data. Label will be empty if the label column is not specified.
This should be extended in future to accept arguments of column types.
|
protectedvirtual |
parse data into out
begin | beginning of buffer |
end | end of buffer |
Implements dmlc::data::TextParserBase< IndexType, DType >.