Medial Code Documentation
Loading...
Searching...
No Matches
parser.hpp
1#ifndef LIGHTGBM_IO_PARSER_HPP_
2#define LIGHTGBM_IO_PARSER_HPP_
3#include <LightGBM/dataset.h>
4
5#include <LightGBM/utils/common.h>
6#include <LightGBM/utils/log.h>
7
8
9#include <unordered_map>
10#include <vector>
11#include <utility>
12
13namespace LightGBM {
14
15class CSVParser: public Parser {
16public:
17 explicit CSVParser(int label_idx, int total_columns)
18 :label_idx_(label_idx), total_columns_(total_columns) {
19 }
20 inline void ParseOneLine(const char* str,
21 std::vector<std::pair<int, double>>* out_features, double* out_label) const override {
22 int idx = 0;
23 double val = 0.0f;
24 int bias = 0;
25 *out_label = 0.0f;
26 while (*str != '\0') {
27 str = Common::Atof(str, &val);
28 if (idx == label_idx_) {
29 *out_label = val;
30 bias = -1;
31 }
32 else if (std::fabs(val) > kZeroThreshold || std::isnan(val)) {
33 out_features->emplace_back(idx + bias, val);
34 }
35 ++idx;
36 if (*str == ',') {
37 ++str;
38 } else if (*str != '\0') {
39 Log::Fatal("Input format error when parsing as CSV");
40 }
41 }
42 }
43
44 inline int TotalColumns() const override {
45 return total_columns_;
46 }
47
48private:
49 int label_idx_ = 0;
50 int total_columns_ = -1;
51};
52
53class TSVParser: public Parser {
54public:
55 explicit TSVParser(int label_idx, int total_columns)
56 :label_idx_(label_idx), total_columns_(total_columns) {
57 }
58 inline void ParseOneLine(const char* str,
59 std::vector<std::pair<int, double>>* out_features, double* out_label) const override {
60 int idx = 0;
61 double val = 0.0f;
62 int bias = 0;
63 while (*str != '\0') {
64 str = Common::Atof(str, &val);
65 if (idx == label_idx_) {
66 *out_label = val;
67 bias = -1;
68 } else if (std::fabs(val) > kZeroThreshold || std::isnan(val)) {
69 out_features->emplace_back(idx + bias, val);
70 }
71 ++idx;
72 if (*str == '\t') {
73 ++str;
74 } else if (*str != '\0') {
75 Log::Fatal("Input format error when parsing as TSV");
76 }
77 }
78 }
79
80 inline int TotalColumns() const override {
81 return total_columns_;
82 }
83
84private:
85 int label_idx_ = 0;
86 int total_columns_ = -1;
87};
88
89class LibSVMParser: public Parser {
90public:
91 explicit LibSVMParser(int label_idx)
92 :label_idx_(label_idx) {
93 if (label_idx > 0) {
94 Log::Fatal("Label should be the first column in a LibSVM file");
95 }
96 }
97 inline void ParseOneLine(const char* str,
98 std::vector<std::pair<int, double>>* out_features, double* out_label) const override {
99 int idx = 0;
100 double val = 0.0f;
101 if (label_idx_ == 0) {
102 str = Common::Atof(str, &val);
103 *out_label = val;
104 str = Common::SkipSpaceAndTab(str);
105 }
106 while (*str != '\0') {
107 str = Common::Atoi(str, &idx);
108 str = Common::SkipSpaceAndTab(str);
109 if (*str == ':') {
110 ++str;
111 str = Common::Atof(str, &val);
112 out_features->emplace_back(idx, val);
113 } else {
114 Log::Fatal("Input format error when parsing as LibSVM");
115 }
116 str = Common::SkipSpaceAndTab(str);
117 }
118 }
119
120 inline int TotalColumns() const override {
121 return -1;
122 }
123
124private:
125 int label_idx_ = 0;
126};
127
128} // namespace LightGBM
129#endif // LightGBM_IO_PARSER_HPP_
Definition parser.hpp:15
void ParseOneLine(const char *str, std::vector< std::pair< int, double > > *out_features, double *out_label) const override
Parse one line with label.
Definition parser.hpp:20
Definition parser.hpp:89
void ParseOneLine(const char *str, std::vector< std::pair< int, double > > *out_features, double *out_label) const override
Parse one line with label.
Definition parser.hpp:97
Interface for Parser.
Definition dataset.h:249
Definition parser.hpp:53
void ParseOneLine(const char *str, std::vector< std::pair< int, double > > *out_features, double *out_label) const override
Parse one line with label.
Definition parser.hpp:58
desc and descl2 fields must be written in reStructuredText format
Definition application.h:10