Medial Code Documentation
Loading...
Searching...
No Matches
strtonum.h
Go to the documentation of this file.
1
6#ifndef DMLC_STRTONUM_H_
7#define DMLC_STRTONUM_H_
8
9#if DMLC_USE_CXX11
10#include <type_traits>
11#endif
12
13#include <string>
14#include <limits>
15#include <cstdint>
16#include "./base.h"
17#include "./logging.h"
18
19namespace dmlc {
26inline bool isspace(char c) {
27 return (c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f');
28}
29
36inline bool isblank(char c) {
37 return (c == ' ' || c == '\t');
38}
39
46inline bool isdigit(char c) {
47 return (c >= '0' && c <= '9');
48}
49
56inline bool isalpha(char c) {
57 static_assert(
58 static_cast<int>('A') == 65 && static_cast<int>('Z' - 'A') == 25,
59 "Only system with ASCII character set is supported");
60 return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
61}
62
70inline bool isdigitchars(char c) {
71 return (c >= '0' && c <= '9')
72 || c == '+' || c == '-'
73 || c == '.'
74 || c == 'e' || c == 'E';
75}
76
81const int kStrtofMaxDigits = 19;
82
98template <typename FloatType, bool CheckRange = false>
99inline FloatType ParseFloat(const char* nptr, char** endptr) {
100#if DMLC_USE_CXX11
101 static_assert(std::is_same<FloatType, double>::value
102 || std::is_same<FloatType, float>::value,
103 "ParseFloat is defined only for 'float' and 'double' types");
104 constexpr unsigned kMaxExponent
105 = (std::is_same<FloatType, double>::value ? 308U : 38U);
106 constexpr FloatType kMaxSignificandForMaxExponent
107 = static_cast<FloatType>(std::is_same<FloatType, double>::value
108 ? 1.79769313486231570 : 3.402823466);
109 // If a floating-point value has kMaxExponent, what is
110 // the largest possible significand value?
111 constexpr FloatType kMaxSignificandForNegMaxExponent
112 = static_cast<FloatType>(std::is_same<FloatType, double>::value
113 ? 2.22507385850720139 : 1.175494351);
114 // If a floating-point value has -kMaxExponent, what is
115 // the largest possible significand value?
116#else
117 const unsigned kMaxExponent
118 = (sizeof(FloatType) == sizeof(double) ? 308U : 38U);
119 const FloatType kMaxSignificandForMaxExponent
120 = static_cast<FloatType>(sizeof(FloatType) == sizeof(double)
121 ? 1.79769313486231570 : 3.402823466);
122 const FloatType kMaxSignificandForNegMaxExponent
123 = static_cast<FloatType>(sizeof(FloatType) == sizeof(double)
124 ? 2.22507385850720139 : 1.175494351);
125#endif
126
127 const char *p = nptr;
128 // Skip leading white space, if any. Not necessary
129 while (isspace(*p) ) ++p;
130
131 // Get sign, if any.
132 bool sign = true;
133 if (*p == '-') {
134 sign = false; ++p;
135 } else if (*p == '+') {
136 ++p;
137 }
138
139 // Handle INF and NAN
140 {
141 int i = 0;
142 // case-insensitive match for INF and INFINITY
143 while (i < 8 && static_cast<char>((*p) | 32) == "infinity"[i]) {
144 ++i; ++p;
145 }
146 if (i == 3 || i == 8) {
147 if (endptr) *endptr = (char*)p; // NOLINT(*)
148 return sign ? std::numeric_limits<FloatType>::infinity()
149 : -std::numeric_limits<FloatType>::infinity();
150 } else {
151 p -= i;
152 }
153
154 // case-insensitive match for NAN
155 i = 0;
156 while (i < 3 && static_cast<char>((*p) | 32) == "nan"[i]) {
157 ++i; ++p;
158 }
159 if (i == 3) {
160 // Got NAN; check if the value is of form NAN(char_sequence)
161 if (*p == '(') {
162 ++p;
163 while (isdigit(*p) || isalpha(*p) || *p == '_') ++p;
164 CHECK_EQ(*p, ')') << "Invalid NAN literal";
165 ++p;
166 }
167 static_assert(std::numeric_limits<FloatType>::has_quiet_NaN,
168 "Only system with quiet NaN is supported");
169 if (endptr) *endptr = (char*)p; // NOLINT(*)
170 return std::numeric_limits<FloatType>::quiet_NaN();
171 } else {
172 p -= i;
173 }
174 }
175
176 // Get digits before decimal point or exponent, if any.
177 uint64_t predec; // to store digits before decimal point
178 for (predec = 0; isdigit(*p); ++p) {
179 predec = predec * 10ULL + static_cast<uint64_t>(*p - '0');
180 }
181 FloatType value = static_cast<FloatType>(predec);
182
183 // Get digits after decimal point, if any.
184 if (*p == '.') {
185 uint64_t pow10 = 1;
186 uint64_t val2 = 0;
187 int digit_cnt = 0;
188 ++p;
189 while (isdigit(*p)) {
190 if (digit_cnt < kStrtofMaxDigits) {
191 val2 = val2 * 10ULL + static_cast<uint64_t>(*p - '0');
192 pow10 *= 10ULL;
193 } // when kStrtofMaxDigits is read, ignored following digits
194 ++p;
195 ++digit_cnt;
196 }
197 value += static_cast<FloatType>(
198 static_cast<double>(val2) / static_cast<double>(pow10));
199 }
200
201 // Handle exponent, if any.
202 if ((*p == 'e') || (*p == 'E')) {
203 ++p;
204 bool frac = false;
205 FloatType scale = static_cast<FloatType>(1.0f);
206 unsigned expon;
207 // Get sign of exponent, if any.
208 if (*p == '-') {
209 frac = true;
210 ++p;
211 } else if (*p == '+') {
212 ++p;
213 }
214 // Get digits of exponent, if any.
215 for (expon = 0; isdigit(*p); ++p) {
216 expon = expon * 10U + static_cast<unsigned>(*p - '0');
217 }
218 if (expon > kMaxExponent) { // out of range, clip or raise error
219 if (CheckRange) {
220 errno = ERANGE;
221 if (endptr) *endptr = (char*)p; // NOLINT(*)
222 return std::numeric_limits<FloatType>::infinity();
223 } else {
224 expon = kMaxExponent;
225 }
226 }
227 // handle edge case where exponent is exactly kMaxExponent
228 if (expon == kMaxExponent
229 && ((!frac && value > kMaxSignificandForMaxExponent)
230 || (frac && value < kMaxSignificandForNegMaxExponent))) {
231 if (CheckRange) {
232 errno = ERANGE;
233 if (endptr) *endptr = (char*)p; // NOLINT(*)
234 return std::numeric_limits<FloatType>::infinity();
235 } else {
236 value = (frac ? kMaxSignificandForNegMaxExponent
237 : kMaxSignificandForMaxExponent);
238 }
239 }
240 // Calculate scaling factor.
241 while (expon >= 8U) { scale *= static_cast<FloatType>(1E8f); expon -= 8U; }
242 while (expon > 0U) { scale *= static_cast<FloatType>(10.0f); expon -= 1U; }
243 // Return signed and scaled floating point result.
244 value = frac ? (value / scale) : (value * scale);
245 }
246 // Consume 'f' suffix, if any
247 if (*p == 'f' || *p == 'F') {
248 ++p;
249 }
250
251 if (endptr) *endptr = (char*)p; // NOLINT(*)
252 return sign ? value : - value;
253}
254
268inline float strtof(const char* nptr, char** endptr) {
269 return ParseFloat<float>(nptr, endptr);
270}
271
286inline float strtof_check_range(const char* nptr, char** endptr) {
287 return ParseFloat<float, true>(nptr, endptr);
288}
289
303inline double strtod(const char* nptr, char** endptr) {
304 return ParseFloat<double>(nptr, endptr);
305}
306
321inline double strtod_check_range(const char* nptr, char** endptr) {
322 return ParseFloat<double, true>(nptr, endptr);
323}
324
336template <typename SignedIntType>
337inline SignedIntType ParseSignedInt(const char* nptr, char** endptr, int base) {
338#ifdef DMLC_USE_CXX11
339 static_assert(std::is_signed<SignedIntType>::value
340 && std::is_integral<SignedIntType>::value,
341 "ParseSignedInt is defined for signed integers only");
342#endif
343 CHECK(base <= 10 && base >= 2);
344 const char* p = nptr;
345 // Skip leading white space, if any. Not necessary
346 while (isspace(*p) ) ++p;
347
348 // Get sign if any
349 bool sign = true;
350 if (*p == '-') {
351 sign = false; ++p;
352 } else if (*p == '+') {
353 ++p;
354 }
355
356 SignedIntType value;
357 const SignedIntType base_val = static_cast<SignedIntType>(base);
358 for (value = 0; isdigit(*p); ++p) {
359 value = value * base_val + static_cast<SignedIntType>(*p - '0');
360 }
361
362 if (endptr) *endptr = (char*)p; // NOLINT(*)
363 return sign ? value : - value;
364}
365
377template <typename UnsignedIntType>
378inline UnsignedIntType ParseUnsignedInt(const char* nptr, char** endptr, int base) {
379#ifdef DMLC_USE_CXX11
380 static_assert(std::is_unsigned<UnsignedIntType>::value
381 && std::is_integral<UnsignedIntType>::value,
382 "ParseUnsignedInt is defined for unsigned integers only");
383#endif
384 CHECK(base <= 10 && base >= 2);
385 const char *p = nptr;
386 // Skip leading white space, if any. Not necessary
387 while (isspace(*p)) ++p;
388
389 // Get sign if any
390 bool sign = true;
391 if (*p == '-') {
392 sign = false; ++p;
393 } else if (*p == '+') {
394 ++p;
395 }
396
397 // we are parsing unsigned, so no minus sign should be found
398 CHECK_EQ(sign, true);
399
400 UnsignedIntType value;
401 const UnsignedIntType base_val = static_cast<UnsignedIntType>(base);
402 for (value = 0; isdigit(*p); ++p) {
403 value = value * base_val + static_cast<UnsignedIntType>(*p - '0');
404 }
405
406 if (endptr) *endptr = (char*)p; // NOLINT(*)
407 return value;
408}
409
422inline uint64_t strtoull(const char* nptr, char **endptr, int base) {
423 return ParseUnsignedInt<uint64_t>(nptr, endptr, base);
424}
425
434inline long atol(const char* p) { // NOLINT(*)
435 return ParseSignedInt<long>(p, 0, 10); // NOLINT(*)
436}
437
448inline float atof(const char* nptr) {
449 return strtof(nptr, 0);
450}
451
467inline float stof(const std::string& value, size_t* pos = nullptr) {
468 const char* str_source = value.c_str();
469 char* endptr;
470 const float parsed_value = dmlc::strtof_check_range(str_source, &endptr);
471 if (errno == ERANGE && parsed_value == std::numeric_limits<float>::infinity()) {
472 throw std::out_of_range("Out of range value");
473 } else if (const_cast<const char*>(endptr) == str_source) {
474 throw std::invalid_argument("No conversion could be performed");
475 }
476 if (pos) {
477 *pos = static_cast<size_t>(const_cast<const char*>(endptr) - str_source);
478 }
479 return parsed_value;
480}
481
497inline double stod(const std::string& value, size_t* pos = nullptr) {
498 const char* str_source = value.c_str();
499 char* endptr;
500 const double parsed_value = dmlc::strtod_check_range(str_source, &endptr);
501 if (errno == ERANGE && parsed_value == std::numeric_limits<double>::infinity()) {
502 throw std::out_of_range("Out of range value");
503 } else if (const_cast<const char*>(endptr) == str_source) {
504 throw std::invalid_argument("No conversion could be performed");
505 }
506 if (pos) {
507 *pos = static_cast<size_t>(const_cast<const char*>(endptr) - str_source);
508 }
509 return parsed_value;
510}
511
518template<typename T>
519class Str2T {
520 public:
526 static inline T get(const char * begin);
527};
528
535template<typename T>
536inline T Str2Type(const char * begin) {
537 return Str2T<T>::get(begin);
538}
539
543template<>
544class Str2T<int32_t> {
545 public:
551 static inline int32_t get(const char * begin) {
552 return ParseSignedInt<int32_t>(begin, NULL, 10);
553 }
554};
555
559template<>
560class Str2T<uint32_t> {
561 public:
567 static inline uint32_t get(const char* begin) {
568 return ParseUnsignedInt<uint32_t>(begin, NULL, 10);
569 }
570};
571
575template<>
576class Str2T<int64_t> {
577 public:
583 static inline int64_t get(const char * begin) {
584 return ParseSignedInt<int64_t>(begin, NULL, 10);
585 }
586};
587
591template<>
592class Str2T<uint64_t> {
593 public:
599 static inline uint64_t get(const char * begin) {
600 return ParseUnsignedInt<uint64_t>(begin, NULL, 10);
601 }
602};
603
607template<>
608class Str2T<float> {
609 public:
615 static inline float get(const char * begin) {
616 return atof(begin);
617 }
618};
619
623template<>
624class Str2T<double> {
625 public:
631 static inline double get(const char * begin) {
632 return strtod(begin, 0);
633 }
634};
635
647template<typename T1, typename T2>
648inline int ParsePair(const char * begin, const char * end,
649 const char ** endptr, T1 &v1, T2 &v2) { // NOLINT(*)
650 const char * p = begin;
651 while (p != end && !isdigitchars(*p)) ++p;
652 if (p == end) {
653 *endptr = end;
654 return 0;
655 }
656 const char * q = p;
657 while (q != end && isdigitchars(*q)) ++q;
658 v1 = Str2Type<T1>(p);
659 p = q;
660 while (p != end && isblank(*p)) ++p;
661 if (p == end || *p != ':') {
662 // only v1
663 *endptr = p;
664 return 1;
665 }
666 p++;
667 while (p != end && !isdigitchars(*p)) ++p;
668 q = p;
669 while (q != end && isdigitchars(*q)) ++q;
670 *endptr = q;
671 v2 = Str2Type<T2>(p);
672 return 2;
673}
674
688template<typename T1, typename T2, typename T3>
689inline int ParseTriple(const char * begin, const char * end,
690 const char ** endptr, T1 &v1, T2 &v2, T3 &v3) { // NOLINT(*)
691 const char * p = begin;
692 while (p != end && !isdigitchars(*p)) ++p;
693 if (p == end) {
694 *endptr = end;
695 return 0;
696 }
697 const char * q = p;
698 while (q != end && isdigitchars(*q)) ++q;
699 v1 = Str2Type<T1>(p);
700 p = q;
701 while (p != end && isblank(*p)) ++p;
702 if (p == end || *p != ':') {
703 // only v1
704 *endptr = p;
705 return 1;
706 }
707 p++;
708 while (p != end && !isdigitchars(*p)) ++p;
709 q = p;
710 while (q != end && isdigitchars(*q)) ++q;
711 v2 = Str2Type<T2>(p);
712 p = q;
713 while (p != end && isblank(*p)) ++p;
714 if (p == end || *p != ':') {
715 // only v1:v2
716 *endptr = p;
717 return 2;
718 }
719 p++;
720 while (p != end && !isdigitchars(*p)) ++p;
721 q = p;
722 while (q != end && isdigitchars(*q)) ++q;
723 *endptr = q;
724 v3 = Str2Type<T3>(p);
725 return 3;
726}
727} // namespace dmlc
728
729#endif // DMLC_STRTONUM_H_
static double get(const char *begin)
Convert a string into double.
Definition strtonum.h:631
static float get(const char *begin)
Convert a string into float.
Definition strtonum.h:615
static int32_t get(const char *begin)
Convert a string into signed 32-bit integer.
Definition strtonum.h:551
static int64_t get(const char *begin)
Convert a string into signed 64-bit integer.
Definition strtonum.h:583
static uint32_t get(const char *begin)
Convert a string into unsigned 32-bit integer.
Definition strtonum.h:567
static uint64_t get(const char *begin)
Convert a string into unsigned 64-bit integer.
Definition strtonum.h:599
Interface class that defines a single method get() to convert a string into type T....
Definition strtonum.h:519
static T get(const char *begin)
Convert a string into type T.
defines console logging options for xgboost. Use to enforce unified print behavior.
namespace for dmlc
Definition array_view.h:12
double strtod(const char *nptr, char **endptr)
A faster implementation of strtod(). See documentation of std::strtof() for more information....
Definition strtonum.h:303
float strtof_check_range(const char *nptr, char **endptr)
A faster implementation of strtof(). See documentation of std::strtof() for more information....
Definition strtonum.h:286
bool isblank(char c)
Inline implementation of isblank(). Tests whether the given character is a space or tab character.
Definition strtonum.h:36
float stof(const std::string &value, size_t *pos=nullptr)
A faster implementation of stof(). See documentation of std::stof() for more information....
Definition strtonum.h:467
long atol(const char *p)
A faster implementation of atol(). See documentation of std::atol() for more information....
Definition strtonum.h:434
float atof(const char *nptr)
A faster implementation of atof(). Unlike std::atof(), this function returns float type....
Definition strtonum.h:448
bool isspace(char c)
Inline implementation of isspace(). Tests whether the given character is a whitespace letter.
Definition strtonum.h:26
bool isdigit(char c)
Inline implementation of isdigit(). Tests whether the given character is a decimal digit.
Definition strtonum.h:46
double stod(const std::string &value, size_t *pos=nullptr)
A faster implementation of stod(). See documentation of std::stod() for more information....
Definition strtonum.h:497
const int kStrtofMaxDigits
Maximum number of decimal digits dmlc::strtof() / dmlc::strtod() will process. Trailing digits will b...
Definition strtonum.h:81
bool isalpha(char c)
Inline implementation of isalpha(). Tests whether the given character is an alphabet letter.
Definition strtonum.h:56
float strtof(const char *nptr, char **endptr)
A faster implementation of strtof(). See documentation of std::strtof() for more information....
Definition strtonum.h:268
T Str2Type(const char *begin)
Convenience function for converting string into type T.
Definition strtonum.h:536
bool isdigitchars(char c)
Tests whether the given character is a valid letter in the string representation of a floating-point ...
Definition strtonum.h:70
double strtod_check_range(const char *nptr, char **endptr)
A faster implementation of strtod(). See documentation of std::strtod() for more information....
Definition strtonum.h:321
SignedIntType ParseSignedInt(const char *nptr, char **endptr, int base)
A fast string-to-integer convertor, for signed integers TODO: the current version supports only base ...
Definition strtonum.h:337
int ParsePair(const char *begin, const char *end, const char **endptr, T1 &v1, T2 &v2)
Parse colon seperated pair v1[:v2].
Definition strtonum.h:648
uint64_t strtoull(const char *nptr, char **endptr, int base)
A faster implementation of strtoull(). See documentation of std::strtoull() for more information....
Definition strtonum.h:422
int ParseTriple(const char *begin, const char *end, const char **endptr, T1 &v1, T2 &v2, T3 &v3)
Parse colon seperated triple v1:v2[:v3].
Definition strtonum.h:689
FloatType ParseFloat(const char *nptr, char **endptr)
Common implementation for dmlc::strtof() and dmlc::strtod() TODO: the current version does not suppor...
Definition strtonum.h:99
UnsignedIntType ParseUnsignedInt(const char *nptr, char **endptr, int base)
A fast string-to-integer convertor, for unsigned integers TODO: the current version supports only bas...
Definition strtonum.h:378
Macros common to all headers.