Advertisement
Guest User

Untitled

a guest
Sep 2nd, 2020
27
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 15.47 KB | None | 0 0
  1. // FUNCTION from_chars (STRING TO FLOATING-POINT)
  2.  
  3. // C11 6.4.2.1 "General"
  4. // digit: one of
  5. //     0 1 2 3 4 5 6 7 8 9
  6.  
  7. // C11 6.4.4.1 "Integer constants"
  8. // hexadecimal-digit: one of
  9. //     0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F
  10.  
  11. // C11 6.4.4.2 "Floating constants" (without floating-suffix, hexadecimal-prefix)
  12. // amended by C11 7.22.1.3 "The strtod, strtof, and strtold functions" making exponents optional
  13. // LWG-3080: "the sign '+' may only appear in the exponent part"
  14.  
  15. // digit-sequence:
  16. //     digit
  17. //     digit-sequence digit
  18.  
  19. // hexadecimal-digit-sequence:
  20. //     hexadecimal-digit
  21. //     hexadecimal-digit-sequence hexadecimal-digit
  22.  
  23. // sign: one of
  24. //     + -
  25.  
  26. // decimal-floating-constant:
  27. //     fractional-constant exponent-part[opt]
  28. //     digit-sequence exponent-part[opt]
  29.  
  30. // fractional-constant:
  31. //     digit-sequence[opt] . digit-sequence
  32. //     digit-sequence .
  33.  
  34. // exponent-part:
  35. //     e sign[opt] digit-sequence
  36. //     E sign[opt] digit-sequence
  37.  
  38. // hexadecimal-floating-constant:
  39. //     hexadecimal-fractional-constant binary-exponent-part[opt]
  40. //     hexadecimal-digit-sequence binary-exponent-part[opt]
  41.  
  42. // hexadecimal-fractional-constant:
  43. //     hexadecimal-digit-sequence[opt] . hexadecimal-digit-sequence
  44. //     hexadecimal-digit-sequence .
  45.  
  46. // binary-exponent-part:
  47. //     p sign[opt] digit-sequence
  48. //     P sign[opt] digit-sequence
  49.  
  50. template <class _Floating>
  51. _NODISCARD from_chars_result _Ordinary_floating_from_chars(const char* const _First, const char* const _Last,
  52.     _Floating& _Value, const chars_format _Fmt, const bool _Minus_sign, const char* _Next) noexcept {
  53.     // vvvvvvvvvv DERIVED FROM corecrt_internal_strtox.h WITH SIGNIFICANT MODIFICATIONS vvvvvvvvvv
  54.  
  55.     const bool _Is_hexadecimal = _Fmt == chars_format::hex;
  56.     const int _Base{_Is_hexadecimal ? 16 : 10};
  57.  
  58.     // PERFORMANCE NOTE: _Fp_string is intentionally left uninitialized. Zero-initialization is quite expensive
  59.     // and is unnecessary. The benefit of not zero-initializing is greatest for short inputs.
  60.     _Floating_point_string _Fp_string;
  61.  
  62.     // Record the optional minus sign:
  63.     _Fp_string._Myis_negative = _Minus_sign;
  64.  
  65.     uint8_t* const _Mantissa_first = _Fp_string._Mymantissa;
  66.     uint8_t* const _Mantissa_last  = _STD end(_Fp_string._Mymantissa);
  67.     uint8_t* _Mantissa_it          = _Mantissa_first;
  68.  
  69.     // [_Whole_begin, _Whole_end) will contain 0 or more digits/hexits
  70.     const char* const _Whole_begin = _Next;
  71.  
  72.     // Skip past any leading zeroes in the mantissa:
  73.     for (; _Next != _Last && *_Next == '0'; ++_Next) {
  74.     }
  75.     const char* const _Leading_zero_end = _Next;
  76.  
  77.     // Scan the integer part of the mantissa:
  78.     for (; _Next != _Last; ++_Next) {
  79.         const unsigned char _Digit_value = _Digit_from_char(*_Next);
  80.  
  81.         if (_Digit_value >= _Base) {
  82.             break;
  83.         }
  84.  
  85.         if (_Mantissa_it != _Mantissa_last) {
  86.             *_Mantissa_it++ = _Digit_value;
  87.         }
  88.     }
  89.     const char* const _Whole_end = _Next;
  90.  
  91.     // Defend against _Exponent_adjustment integer overflow. (These values don't need to be strict.)
  92.     constexpr ptrdiff_t _Maximum_adjustment = 1'000'000;
  93.     constexpr ptrdiff_t _Minimum_adjustment = -1'000'000;
  94.  
  95.     // The exponent adjustment holds the number of digits in the mantissa buffer that appeared before the radix point.
  96.     // It can be negative, and leading zeroes in the integer part are ignored. Examples:
  97.     // For "03333.111", it is 4.
  98.     // For "00000.111", it is 0.
  99.     // For "00000.001", it is -2.
  100.     int _Exponent_adjustment = static_cast<int>((_STD min)(_Whole_end - _Leading_zero_end, _Maximum_adjustment));
  101.  
  102.     // [_Whole_end, _Dot_end) will contain 0 or 1 '.' characters
  103.     if (_Next != _Last && *_Next == '.') {
  104.         ++_Next;
  105.     }
  106.     const char* const _Dot_end = _Next;
  107.  
  108.     // [_Dot_end, _Frac_end) will contain 0 or more digits/hexits
  109.  
  110.     // If we haven't yet scanned any nonzero digits, continue skipping over zeroes,
  111.     // updating the exponent adjustment to account for the zeroes we are skipping:
  112.     if (_Exponent_adjustment == 0) {
  113.         for (; _Next != _Last && *_Next == '0'; ++_Next) {
  114.         }
  115.  
  116.         _Exponent_adjustment = static_cast<int>((_STD max)(_Dot_end - _Next, _Minimum_adjustment));
  117.     }
  118.  
  119.     // Scan the fractional part of the mantissa:
  120.     bool _Has_zero_tail = true;
  121.  
  122.     for (; _Next != _Last; ++_Next) {
  123.         const unsigned char _Digit_value = _Digit_from_char(*_Next);
  124.  
  125.         if (_Digit_value >= _Base) {
  126.             break;
  127.         }
  128.  
  129.         if (_Mantissa_it != _Mantissa_last) {
  130.             *_Mantissa_it++ = _Digit_value;
  131.         } else {
  132.             _Has_zero_tail = _Has_zero_tail && _Digit_value == 0;
  133.         }
  134.     }
  135.     const char* const _Frac_end = _Next;
  136.  
  137.     // We must have at least 1 digit/hexit
  138.     if (_Whole_begin == _Whole_end && _Dot_end == _Frac_end) {
  139.         return {_First, errc::invalid_argument};
  140.     }
  141.  
  142.     const char _Exponent_prefix{_Is_hexadecimal ? 'p' : 'e'};
  143.  
  144.     bool _Exponent_is_negative = false;
  145.     int _Exponent              = 0;
  146.  
  147.     constexpr int _Maximum_temporary_decimal_exponent = 5200;
  148.     constexpr int _Minimum_temporary_decimal_exponent = -5200;
  149.  
  150.     if (_Fmt != chars_format::fixed // N4713 23.20.3 [charconv.from.chars]/7.3
  151.                                     // "if fmt has chars_format::fixed set but not chars_format::scientific,
  152.                                     // the optional exponent part shall not appear"
  153.         && _Next != _Last && (static_cast<unsigned char>(*_Next) | 0x20) == _Exponent_prefix) { // found exponent prefix
  154.         const char* _Unread = _Next + 1;
  155.  
  156.         if (_Unread != _Last && (*_Unread == '+' || *_Unread == '-')) { // found optional sign
  157.             _Exponent_is_negative = *_Unread == '-';
  158.             ++_Unread;
  159.         }
  160.  
  161.         while (_Unread != _Last) {
  162.             const unsigned char _Digit_value = _Digit_from_char(*_Unread);
  163.  
  164.             if (_Digit_value >= 10) {
  165.                 break;
  166.             }
  167.  
  168.             // found decimal digit
  169.  
  170.             if (_Exponent <= _Maximum_temporary_decimal_exponent) {
  171.                 _Exponent = _Exponent * 10 + _Digit_value;
  172.             }
  173.  
  174.             ++_Unread;
  175.             _Next = _Unread; // consume exponent-part/binary-exponent-part
  176.         }
  177.  
  178.         if (_Exponent_is_negative) {
  179.             _Exponent = -_Exponent;
  180.         }
  181.     }
  182.  
  183.     // [_Frac_end, _Exponent_end) will either be empty or contain "[EPep] sign[opt] digit-sequence"
  184.     const char* const _Exponent_end = _Next;
  185.  
  186.     if (_Fmt == chars_format::scientific
  187.         && _Frac_end == _Exponent_end) { // N4713 23.20.3 [charconv.from.chars]/7.2
  188.                                          // "if fmt has chars_format::scientific set but not chars_format::fixed,
  189.                                          // the otherwise optional exponent part shall appear"
  190.         return {_First, errc::invalid_argument};
  191.     }
  192.  
  193.     // Remove trailing zeroes from mantissa:
  194.     while (_Mantissa_it != _Mantissa_first && *(_Mantissa_it - 1) == 0) {
  195.         --_Mantissa_it;
  196.     }
  197.  
  198.     // If the mantissa buffer is empty, the mantissa was composed of all zeroes (so the mantissa is 0).
  199.     // All such strings have the value zero, regardless of what the exponent is (because 0 * b^n == 0 for all b and n).
  200.     // We can return now. Note that we defer this check until after we scan the exponent, so that we can correctly
  201.     // update _Next to point past the end of the exponent.
  202.     if (_Mantissa_it == _Mantissa_first) {
  203.         _STL_INTERNAL_CHECK(_Has_zero_tail);
  204.         _Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value);
  205.         return {_Next, errc{}};
  206.     }
  207.  
  208.     // Before we adjust the exponent, handle the case where we detected a wildly
  209.     // out of range exponent during parsing and clamped the value:
  210.     if (_Exponent > _Maximum_temporary_decimal_exponent) {
  211.         _Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value);
  212.         return {_Next, errc::result_out_of_range}; // Overflow example: "1e+9999"
  213.     }
  214.  
  215.     if (_Exponent < _Minimum_temporary_decimal_exponent) {
  216.         _Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value);
  217.         return {_Next, errc::result_out_of_range}; // Underflow example: "1e-9999"
  218.     }
  219.  
  220.     // In hexadecimal floating constants, the exponent is a base 2 exponent. The exponent adjustment computed during
  221.     // parsing has the same base as the mantissa (so, 16 for hexadecimal floating constants).
  222.     // We therefore need to scale the base 16 multiplier to base 2 by multiplying by log2(16):
  223.     const int _Exponent_adjustment_multiplier{_Is_hexadecimal ? 4 : 1};
  224.  
  225.     _Exponent += _Exponent_adjustment * _Exponent_adjustment_multiplier;
  226.  
  227.     // Verify that after adjustment the exponent isn't wildly out of range (if it is, it isn't representable
  228.     // in any supported floating-point format).
  229.     if (_Exponent > _Maximum_temporary_decimal_exponent) {
  230.         _Assemble_floating_point_infinity(_Fp_string._Myis_negative, _Value);
  231.         return {_Next, errc::result_out_of_range}; // Overflow example: "10e+5199"
  232.     }
  233.  
  234.     if (_Exponent < _Minimum_temporary_decimal_exponent) {
  235.         _Assemble_floating_point_zero(_Fp_string._Myis_negative, _Value);
  236.         return {_Next, errc::result_out_of_range}; // Underflow example: "0.001e-5199"
  237.     }
  238.  
  239.     _Fp_string._Myexponent       = _Exponent;
  240.     _Fp_string._Mymantissa_count = static_cast<uint32_t>(_Mantissa_it - _Mantissa_first);
  241.  
  242.     if (_Is_hexadecimal) {
  243.         const errc _Ec = _Convert_hexadecimal_string_to_floating_type(_Fp_string, _Value, _Has_zero_tail);
  244.         return {_Next, _Ec};
  245.     } else {
  246.         const errc _Ec = _Convert_decimal_string_to_floating_type(_Fp_string, _Value, _Has_zero_tail);
  247.         return {_Next, _Ec};
  248.     }
  249.  
  250.     // ^^^^^^^^^^ DERIVED FROM corecrt_internal_strtox.h WITH SIGNIFICANT MODIFICATIONS ^^^^^^^^^^
  251. }
  252.  
  253. _NODISCARD inline bool _Starts_with_case_insensitive(
  254.     const char* _First, const char* const _Last, const char* _Lowercase) noexcept {
  255.     // pre: _Lowercase contains only ['a', 'z'] and is null-terminated
  256.     for (; _First != _Last && *_Lowercase != '\0'; ++_First, ++_Lowercase) {
  257.         if ((static_cast<unsigned char>(*_First) | 0x20) != *_Lowercase) {
  258.             return false;
  259.         }
  260.     }
  261.  
  262.     return *_Lowercase == '\0';
  263. }
  264.  
  265. template <class _Floating>
  266. _NODISCARD from_chars_result _Infinity_from_chars(const char* const _First, const char* const _Last, _Floating& _Value,
  267.     const bool _Minus_sign, const char* _Next) noexcept {
  268.     // pre: _Next points at 'i' (case-insensitively)
  269.     if (!_Starts_with_case_insensitive(_Next + 1, _Last, "nf")) { // definitely invalid
  270.         return {_First, errc::invalid_argument};
  271.     }
  272.  
  273.     // definitely inf
  274.     _Next += 3;
  275.  
  276.     if (_Starts_with_case_insensitive(_Next, _Last, "inity")) { // definitely infinity
  277.         _Next += 5;
  278.     }
  279.  
  280.     _Assemble_floating_point_infinity(_Minus_sign, _Value);
  281.  
  282.     return {_Next, errc{}};
  283. }
  284.  
  285. template <class _Floating>
  286. _NODISCARD from_chars_result _Nan_from_chars(const char* const _First, const char* const _Last, _Floating& _Value,
  287.     bool _Minus_sign, const char* _Next) noexcept {
  288.     // pre: _Next points at 'n' (case-insensitively)
  289.     if (!_Starts_with_case_insensitive(_Next + 1, _Last, "an")) { // definitely invalid
  290.         return {_First, errc::invalid_argument};
  291.     }
  292.  
  293.     // definitely nan
  294.     _Next += 3;
  295.  
  296.     bool _Quiet = true;
  297.  
  298.     if (_Next != _Last && *_Next == '(') { // possibly nan(n-char-sequence[opt])
  299.         const char* const _Seq_begin = _Next + 1;
  300.  
  301.         for (const char* _Temp = _Seq_begin; _Temp != _Last; ++_Temp) {
  302.             if (*_Temp == ')') { // definitely nan(n-char-sequence[opt])
  303.                 _Next = _Temp + 1;
  304.  
  305.                 if (_Temp - _Seq_begin == 3
  306.                     && _Starts_with_case_insensitive(_Seq_begin, _Temp, "ind")) { // definitely nan(ind)
  307.                     // The UCRT considers indeterminate NaN to be negative quiet NaN with no payload bits set.
  308.                     // It parses "nan(ind)" and "-nan(ind)" identically.
  309.                     _Minus_sign = true;
  310.                 } else if (_Temp - _Seq_begin == 4
  311.                            && _Starts_with_case_insensitive(_Seq_begin, _Temp, "snan")) { // definitely nan(snan)
  312.                     _Quiet = false;
  313.                 }
  314.  
  315.                 break;
  316.             } else if (*_Temp == '_' || ('0' <= *_Temp && *_Temp <= '9') || ('A' <= *_Temp && *_Temp <= 'Z')
  317.                        || ('a' <= *_Temp && *_Temp <= 'z')) { // possibly nan(n-char-sequence[opt]), keep going
  318.             } else { // definitely nan, not nan(n-char-sequence[opt])
  319.                 break;
  320.             }
  321.         }
  322.     }
  323.  
  324.     // Intentional behavior difference between the UCRT and the STL:
  325.     // strtod()/strtof() parse plain "nan" as being a quiet NaN with all payload bits set.
  326.     // numeric_limits::quiet_NaN() returns a quiet NaN with no payload bits set.
  327.     // This implementation of from_chars() has chosen to be consistent with numeric_limits.
  328.  
  329.     using _Traits    = _Floating_type_traits<_Floating>;
  330.     using _Uint_type = typename _Traits::_Uint_type;
  331.  
  332.     _Uint_type _Uint_value = _Traits::_Shifted_exponent_mask;
  333.  
  334.     if (_Minus_sign) {
  335.         _Uint_value |= _Traits::_Shifted_sign_mask;
  336.     }
  337.  
  338.     if (_Quiet) {
  339.         _Uint_value |= _Traits::_Special_nan_mantissa_mask;
  340.     } else {
  341.         _Uint_value |= 1;
  342.     }
  343.  
  344.     _Value = _Bit_cast<_Floating>(_Uint_value);
  345.  
  346.     return {_Next, errc{}};
  347. }
  348.  
  349. template <class _Floating>
  350. _NODISCARD from_chars_result _Floating_from_chars(
  351.     const char* const _First, const char* const _Last, _Floating& _Value, const chars_format _Fmt) noexcept {
  352.     _Adl_verify_range(_First, _Last);
  353.  
  354.     _STL_ASSERT(_Fmt == chars_format::general || _Fmt == chars_format::scientific || _Fmt == chars_format::fixed
  355.                     || _Fmt == chars_format::hex,
  356.         "invalid format in from_chars()");
  357.  
  358.     bool _Minus_sign = false;
  359.  
  360.     const char* _Next = _First;
  361.  
  362.     if (_Next == _Last) {
  363.         return {_First, errc::invalid_argument};
  364.     }
  365.  
  366.     if (*_Next == '-') {
  367.         _Minus_sign = true;
  368.         ++_Next;
  369.  
  370.         if (_Next == _Last) {
  371.             return {_First, errc::invalid_argument};
  372.         }
  373.     }
  374.  
  375.     // Distinguish ordinary numbers versus inf/nan with a single test.
  376.     // ordinary numbers start with ['.'] ['0', '9'] ['A', 'F'] ['a', 'f']
  377.     // inf/nan start with ['I'] ['N'] ['i'] ['n']
  378.     // All other starting characters are invalid.
  379.     // Setting the 0x20 bit folds these ranges in a useful manner.
  380.     // ordinary (and some invalid) starting characters are folded to ['.'] ['0', '9'] ['a', 'f']
  381.     // inf/nan starting characters are folded to ['i'] ['n']
  382.     // These are ordered: ['.'] ['0', '9'] ['a', 'f'] < ['i'] ['n']
  383.     // Note that invalid starting characters end up on both sides of this test.
  384.     const unsigned char _Folded_start = static_cast<unsigned char>(static_cast<unsigned char>(*_Next) | 0x20);
  385.  
  386.     if (_Folded_start <= 'f') { // possibly an ordinary number
  387.         return _Ordinary_floating_from_chars(_First, _Last, _Value, _Fmt, _Minus_sign, _Next);
  388.     } else if (_Folded_start == 'i') { // possibly inf
  389.         return _Infinity_from_chars(_First, _Last, _Value, _Minus_sign, _Next);
  390.     } else if (_Folded_start == 'n') { // possibly nan
  391.         return _Nan_from_chars(_First, _Last, _Value, _Minus_sign, _Next);
  392.     } else { // definitely invalid
  393.         return {_First, errc::invalid_argument};
  394.     }
  395. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement