Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #pragma once
- #ifndef _HALF_H_
- #define _HALF_H_
- //---------------------------------------------------------------------------//
- //
- // halp.hpp
- // ヘッダファイルだけで使える半精度浮動小数点数
- // Portable implementation of IEEE 754 half-precision floating-point format
- // Copyright (C) tapetums 2015
- //
- //---------------------------------------------------------------------------//
- //
- // Copyright (c) 2006, Industrial Light & Magic, a division of Lucasfilm
- // Entertainment Company Ltd. Portions contributed and copyright held by
- // others as indicated. All rights reserved.
- //
- // Redistribution and use in source and binary forms, with or without
- // modification, are permitted provided that the following conditions are
- // met:
- //
- // * Redistributions of source code must retain the above
- // copyright notice, this list of conditions and the following
- // disclaimer.
- //
- // * Redistributions in binary form must reproduce the above
- // copyright notice, this list of conditions and the following
- // disclaimer in the documentation and/or other materials provided with
- // the distribution.
- //
- // * Neither the name of Industrial Light & Magic nor the names of
- // any other contributors to this software may be used to endorse or
- // promote products derived from this software without specific prior
- // written permission.
- //
- // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
- // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
- // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- //
- //---------------------------------------------------------------------------//
- // Primary authors:
- // Florian Kainz <kainz@ilm.com>
- // Rod Bogart <rgb@ilm.com>
- //
- // Modification for portable implementation:
- // tapetums <tapetums@live.jp>
- //---------------------------------------------------------------------------//
- #include <cstdint>
- #include <cmath>
- #include <iostream>
- //---------------------------------------------------------------------------//
- namespace IEEE754 {
- //---------------------------------------------------------------------------//
- using float32_t = float;
- //---------------------------------------------------------------------------//
- // Class
- //---------------------------------------------------------------------------//
- class half
- {
- private: // types
- union uif { uint32_t i; float32_t f; };
- private: // members
- uint16_t data { 0 };
- public: // ctor / dtor
- constexpr half() = default;
- ~half() = default;
- half(const half&) = default;
- half& operator=(const half&) = default;
- half(half&&) noexcept = default;
- half& operator=(half&&) noexcept = default;
- explicit half(float32_t f) noexcept { operator=(f); }
- public: // operators
- operator float32_t() const noexcept;
- constexpr half operator-() const noexcept;
- half& operator=(float32_t) noexcept;
- half& operator+=(half) noexcept;
- half& operator+=(float32_t) noexcept;
- half& operator-=(half) noexcept;
- half& operator-=(float32_t) noexcept;
- half& operator*=(half) noexcept;
- half& operator*=(float32_t) noexcept;
- half& operator/=(half) noexcept;
- half& operator/=(float32_t) noexcept;
- public: // methods
- half round(uint8_t digits) const noexcept;
- public: // properties
- constexpr bool is_finite() const noexcept;
- constexpr bool is_normalized() const noexcept;
- constexpr bool is_denormalized() const noexcept;
- constexpr bool is_zero() const noexcept;
- constexpr bool is_NaN() const noexcept;
- constexpr bool is_infinity() const noexcept;
- constexpr bool is_negative() const noexcept;
- constexpr bool is_pos_inf() const noexcept;
- constexpr bool is_neg_inf() const noexcept;
- public: // constant values
- static constexpr half pos_inf() noexcept { half h; h.data = 0x7C00; return h; }
- static constexpr half neg_inf() noexcept { half h; h.data = 0xFC00; return h; }
- static constexpr half qNaN() noexcept { half h; h.data = 0x7FFF; return h; }
- static constexpr half sNaN() noexcept { half h; h.data = 0x7DFF; return h; }
- public: // accessors
- uint16_t bits() const noexcept { return data; }
- half& bits(uint16_t bits) noexcept { data = bits; return *this; }
- private: // internal methods
- static uint16_t convert(int32_t) noexcept;
- static float32_t overflow() noexcept;
- };
- //---------------------------------------------------------------------------//
- // Limits
- //
- // Visual C++ will complain if HALF_MIN, HALF_NRM_MIN etc. are not float
- // constants, but at least one other compiler (gcc 2.96) produces incorrect
- // results if they are.
- //---------------------------------------------------------------------------//
- #if (defined _WIN32 || defined _WIN64) && defined _MSC_VER
- static constexpr auto HALF_MIN = 5.96046448e-08f; // Smallest positive half
- static constexpr auto HALF_NRM_MIN = 6.10351562e-05f; // Smallest positive normalized half
- static constexpr auto HALF_MAX = 65504.0f; // Largest positive half
- static constexpr auto HALF_EPSILON = 0.00097656f; // Smallest positive e for which
- // half (1.0 + e) != half (1.0)
- #else
- static constexpr auto HALF_MIN = 5.96046448e-08; // Smallest positive half
- static constexpr auto HALF_NRM_MIN = 6.10351562e-05; // Smallest positive normalized half
- static constexpr auto HALF_MAX = 65504.0; // Largest positive half
- static constexpr auto HALF_EPSILON = 0.00097656; // Smallest positive e for which
- // half (1.0 + e) != half (1.0)
- #endif
- static constexpr auto HALF_MANT_DIG = 11; // Number of digits in mantissa
- // (significand + hidden leading 1)
- static constexpr auto HALF_DIG = 2; // Number of base 10 digits that
- // can be represented without change
- static constexpr auto HALF_RADIX = 2; // Base of the exponent
- static constexpr auto HALF_MIN_EXP = -13; // Minimum negative integer such that
- // HALF_RADIX raised to the power of
- // one less than that integer is a
- // normalized half
- static constexpr auto HALF_MAX_EXP = 16; // Maximum positive integer such that
- // HALF_RADIX raised to the power of
- // one less than that integer is a
- // normalized half
- static constexpr auto HALF_MIN_10_EXP = -4; // Minimum positive integer such
- // that 10 raised to that power is
- // a normalized half
- static constexpr auto HALF_MAX_10_EXP = 4; // Maximum positive integer such
- // that 10 raised to that power is
- // a normalized half
- static constexpr auto HALF_POS_INF_BIT = 0x7C00; // Bit pattern for +∞
- static constexpr auto HALF_NEG_INF_BIT = 0xFC00; // Bit pattern for -∞
- static constexpr auto HALF_Q_NAN_BIT = 0x7FFF; // Bit pattern for quiet NaN
- static constexpr auto HALF_S_NAN_BIT = 0x7DFF; // Bit pattern for signaling NaN
- //---------------------------------------------------------------------------//
- //
- // Implementation --
- //
- // Representation of a float:
- //
- // We assume that a float, f, is an IEEE 754 single-precision
- // floating point number, whose bits are arranged as follows:
- //
- // 31 (msb)
- // |
- // | 30 23
- // | | |
- // | | | 22 0 (lsb)
- // | | | | |
- // X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
- //
- // s e m
- //
- // S is the sign-bit, e is the exponent and m is the significand.
- //
- // If e is between 1 and 254, f is a normalized number:
- //
- // s e-127
- // f = (-1) * 2 * 1.m
- //
- // If e is 0, and m is not zero, f is a denormalized number:
- //
- // s -126
- // f = (-1) * 2 * 0.m
- //
- // If e and m are both zero, f is zero:
- //
- // f = 0.0
- //
- // If e is 255, f is an "infinity" or "not a number" (NAN),
- // depending on whether m is zero or not.
- //
- // Examples:
- //
- // 0 00000000 00000000000000000000000 = 0.0
- // 0 01111110 00000000000000000000000 = 0.5
- // 0 01111111 00000000000000000000000 = 1.0
- // 0 10000000 00000000000000000000000 = 2.0
- // 0 10000000 10000000000000000000000 = 3.0
- // 1 10000101 11110000010000000000000 = -124.0625
- // 0 11111111 00000000000000000000000 = +infinity
- // 1 11111111 00000000000000000000000 = -infinity
- // 0 11111111 10000000000000000000000 = NAN
- // 1 11111111 11111111111111111111111 = NAN
- //
- // Representation of a half:
- //
- // Here is the bit-layout for a half number, h:
- //
- // 15 (msb)
- // |
- // | 14 10
- // | | |
- // | | | 9 0 (lsb)
- // | | | | |
- // X XXXXX XXXXXXXXXX
- //
- // s e m
- //
- // S is the sign-bit, e is the exponent and m is the significand.
- //
- // If e is between 1 and 30, h is a normalized number:
- //
- // s e-15
- // h = (-1) * 2 * 1.m
- //
- // If e is 0, and m is not zero, h is a denormalized number:
- //
- // S -14
- // h = (-1) * 2 * 0.m
- //
- // If e and m are both zero, h is zero:
- //
- // h = 0.0
- //
- // If e is 31, h is an "infinity" or "not a number" (NAN),
- // depending on whether m is zero or not.
- //
- // Examples:
- //
- // 0 00000 0000000000 = 0.0
- // 0 01110 0000000000 = 0.5
- // 0 01111 0000000000 = 1.0
- // 0 10000 0000000000 = 2.0
- // 0 10000 1000000000 = 3.0
- // 1 10101 1111000001 = -124.0625
- // 0 11111 0000000000 = +infinity
- // 1 11111 0000000000 = -infinity
- // 0 11111 1000000000 = NAN
- // 1 11111 1111111111 = NAN
- //
- // Conversion:
- //
- // Converting from a float to a half requires some non-trivial bit
- // manipulations. In some cases, this makes conversion relatively
- // slow, but the most common case is accelerated via table lookups.
- //
- // Converting back from a half to a float is easier because we don't
- // have to do any rounding. In addition, there are only 65536
- // different half numbers; we can convert each of those numbers once
- // and store the results in a table. Later, all conversions can be
- // done using only simple table lookups.
- //
- // <NOTE>
- // tapetums <tapetums@live.jp> removed table lookup features.
- // This change caused a drop of the speed
- // in exchange for improvement of the portability.
- //
- //---------------------------------------------------------------------------//
- //---------------------------------------------------------------------------//
- // Operators
- //---------------------------------------------------------------------------//
- inline half::operator float32_t() const noexcept
- {
- if ( data == 0x0000 )
- {
- return 0.0;
- }
- else if ( data == 0x8000 )
- {
- return -0.0;
- }
- int32_t s = (data << 16) & 0x8000'0000;
- int32_t e = ((data >> 10) & 0b0001'1111) + (127 - 15);
- int32_t m = data & 0x0000'03FF;
- uif tmp;
- tmp.i = s | (e << 23) | (m << (23 - 10));
- return tmp.f;
- }
- //---------------------------------------------------------------------------//
- inline constexpr half half::operator-() const noexcept
- {
- half h;
- h.data = data ^ 0x8000;
- return h;
- }
- //---------------------------------------------------------------------------//
- inline half& half::operator=(float32_t f) noexcept
- {
- uif tmp;
- tmp.f = f;
- if ( f == 0.0 )
- {
- // Common special case - zero.
- // Preserve the zero's sign bit.
- data = (tmp.i >> 16);
- }
- else
- {
- int32_t s = (tmp.i >> 16) & 0x0000'8000;
- int32_t e = ((tmp.i >> 23) & 0x0000'00FF) - (127 - 15);
- int32_t m = tmp.i & 0x007F'FFFF;
- //std::cout << " s = " << std::dec << s << std::endl;
- //std::cout << " e = " << std::dec << e << std::endl;
- //std::cout << " m = 0x" << std::hex << m << std::endl;
- if ( 0 < e && e < 31 )
- {
- // Simple case - round the significand, m, to 10
- // bits and combine it with the sign and exponent.
- data = s | (e << 10) | (m >> (23 - 10));
- }
- else
- {
- // Difficult case - call a function.
- data = convert(tmp.i); // too small
- }
- }
- //std::cout << " data = 0x" << std::hex << data << std::endl;
- return *this;
- }
- //---------------------------------------------------------------------------//
- inline half& half::operator+=(half h) noexcept
- {
- //std::cout << "[op+=()]" << std::endl;
- if ( is_NaN() )
- {
- // Return NaN.
- }
- else if ( h.is_NaN() )
- {
- data = h.data; // Propagate NaN.
- }
- else if ( is_pos_inf() && h.is_neg_inf() )
- {
- data = HALF_Q_NAN_BIT; // ∞ + -∞ : undefined
- }
- else if ( is_neg_inf() && h.is_pos_inf() )
- {
- data = HALF_Q_NAN_BIT; // -∞ + ∞ : undefined
- }
- else
- {
- operator=(float32_t(*this) + float32_t(h));
- }
- return *this;
- }
- //---------------------------------------------------------------------------//
- inline half& half::operator+=(float32_t f) noexcept
- {
- //std::cout << "[op+=()]" << std::endl;
- if ( is_NaN() )
- {
- // Return NaN.
- }
- else if ( isnan(f) )
- {
- data = HALF_Q_NAN_BIT; // Propagate NaN.
- }
- else if ( is_pos_inf() && (f == (-1.0 / 0.0)) )
- {
- data = HALF_Q_NAN_BIT; // Undefined
- }
- else if ( is_neg_inf() && (f == (+1.0 / 0.0)) )
- {
- data = HALF_Q_NAN_BIT; // Undefined
- }
- else
- {
- operator=(float32_t(*this) + f);
- }
- return *this;
- }
- //---------------------------------------------------------------------------//
- inline half& half::operator-=(half h) noexcept
- {
- //std::cout << "[op-=()]" << std::endl;
- if ( is_NaN() )
- {
- // Return NaN.
- }
- else if ( h.is_NaN() )
- {
- data = h.data; // Propagate NaN.
- }
- else if ( is_pos_inf() && h.is_pos_inf() )
- {
- data = HALF_Q_NAN_BIT; // ∞ - ∞ : undefined
- }
- else if ( is_neg_inf() && h.is_neg_inf() )
- {
- data = HALF_Q_NAN_BIT; // -∞ - -∞ : undefined
- }
- else
- {
- operator=(float32_t(*this) - float32_t(h));
- }
- return *this;
- }
- //---------------------------------------------------------------------------//
- inline half& half::operator-=(float32_t f) noexcept
- {
- //std::cout << "[op-=()]" << std::endl;
- if ( is_NaN() )
- {
- // Return NaN.
- }
- else if ( isnan(f) )
- {
- data = HALF_Q_NAN_BIT; // Propagate NaN.
- }
- else if ( is_pos_inf() && (f == (+1.0 / 0.0)) )
- {
- data = HALF_Q_NAN_BIT; // ∞ - ∞ : undefined
- }
- else if ( is_neg_inf() && (f == (-1.0 / 0.0)) )
- {
- data = HALF_Q_NAN_BIT; // -∞ - -∞ : undefined
- }
- else
- {
- operator=(float32_t(*this) - f);
- }
- return *this;
- }
- //---------------------------------------------------------------------------//
- inline half& half::operator*=(half h) noexcept
- {
- //std::cout << "[op*=()]" << std::endl;
- if ( is_NaN() )
- {
- // Return NaN.
- }
- else if ( h.is_NaN() )
- {
- data = h.data; // Propagate NaN.
- }
- else if ( is_infinity() && h.is_zero() )
- {
- data = HALF_Q_NAN_BIT; // ±∞ * ±0 : undefined
- }
- else if ( is_zero() && h.is_infinity() )
- {
- data = HALF_Q_NAN_BIT; // ±0 * ±∞ : undefined
- }
- else
- {
- operator=(float32_t(*this) * float32_t(h));
- }
- return *this;
- }
- //---------------------------------------------------------------------------//
- inline half& half::operator*=(float32_t f) noexcept
- {
- //std::cout << "[op*=()]" << std::endl;
- if ( is_NaN() )
- {
- // Return NaN.
- }
- else if ( isnan(f) )
- {
- data = HALF_Q_NAN_BIT; // Propagate NaN.
- }
- else if ( is_infinity() && (f == 0.0) )
- {
- data = HALF_Q_NAN_BIT; // ±∞ * ±0 : undefined
- }
- else if ( is_zero() && isinf(f) )
- {
- data = HALF_Q_NAN_BIT; // ±0 * ±∞ : undefined
- }
- else
- {
- operator=(float32_t(*this) * f);
- }
- return *this;
- }
- //---------------------------------------------------------------------------//
- inline half& half::operator/=(half h) noexcept
- {
- //std::cout << "[op/=()]" << std::endl;
- if ( is_NaN() )
- {
- // Return NaN.
- }
- else if ( h.is_NaN() )
- {
- data = h.data; // Propagate NaN.
- }
- else if ( is_zero() && h.is_zero() )
- {
- data = HALF_Q_NAN_BIT; // ±0 ÷ ±0 : undefined
- }
- else if ( is_infinity() && h.is_infinity() )
- {
- data = HALF_Q_NAN_BIT; // ±∞ ÷ ±∞ : undefined
- }
- else
- {
- operator=(float32_t(*this) / float32_t(h));
- }
- return *this;
- }
- //---------------------------------------------------------------------------//
- inline half& half::operator/=(float32_t f) noexcept
- {
- //std::cout << "[op/=()]" << std::endl;
- if ( is_NaN() )
- {
- // Return NaN.
- }
- else if ( isnan(f) )
- {
- data = HALF_Q_NAN_BIT; // Propagate NaN.
- }
- else if ( is_zero() && (f == 0.0) )
- {
- data = HALF_Q_NAN_BIT; // ±0 ÷ ±0 : undefined
- }
- else if ( is_infinity() && isinf(f) )
- {
- data = HALF_Q_NAN_BIT; // ±∞ ÷ ±∞ : undefined
- }
- else
- {
- operator=(float32_t(*this) / f);
- }
- return *this;
- }
- //---------------------------------------------------------------------------//
- // Methods
- //---------------------------------------------------------------------------//
- //---------------------------------------------------------
- // Round to n-bit precision (n should be between 0 and 10).
- // After rounding, the significand's 10-n least significant
- // bits will be zero.
- //---------------------------------------------------------
- inline half half::round(uint8_t n) const noexcept
- {
- //std::cout << "[round()]" << std::endl;
- // Parameter check.
- if ( n >= 10 ) { return *this; }
- // Disassemble h into the sign, s,
- // and the combined exponent and significand, e.
- uint16_t s = data & 0x8000;
- uint16_t e = data & 0x7FFF;
- // Round the exponent and significand to the nearest value
- // where ones occur only in the (10-n) most significant bits.
- // Note that the exponent adjusts automatically if rounding
- // up causes the significand to overflow.
- e >>= 9 - n;
- e += e & 1;
- e <<= 9 - n;
- // Check for exponent overflow.
- if ( e >= 0x7C00 )
- {
- // Overflow occurred -- truncate instead of rounding.
- e = data;
- e >>= 10 - n;
- e <<= 10 - n;
- }
- // Put the original sign bit back.
- half h;
- h.data = s | e;
- return h;
- }
- //---------------------------------------------------------------------------//
- // Properties
- //---------------------------------------------------------------------------//
- inline constexpr bool half::is_finite() const noexcept
- {
- uint16_t e = (data >> 10) & 0x001F;
- return e < 31;
- }
- //---------------------------------------------------------------------------//
- inline constexpr bool half::is_normalized() const noexcept
- {
- uint16_t e = (data >> 10) & 0x001F;
- return e > 0 && e < 31;
- }
- //---------------------------------------------------------------------------//
- inline constexpr bool half::is_denormalized() const noexcept
- {
- uint16_t e = (data >> 10) & 0x001F;
- uint16_t m = data & 0x03FF;
- return e == 0 && m != 0;
- }
- //---------------------------------------------------------------------------//
- inline constexpr bool half::is_zero() const noexcept
- {
- return (data & 0x7FFF) == 0;
- }
- //---------------------------------------------------------------------------//
- inline constexpr bool half::is_NaN() const noexcept
- {
- uint16_t e = (data >> 10) & 0x001F;
- uint16_t m = data & 0x03FF;
- return e == 31 && m != 0;
- }
- //---------------------------------------------------------------------------//
- inline constexpr bool half::is_infinity() const noexcept
- {
- uint16_t e = (data >> 10) & 0x001F;
- uint16_t m = data & 0x03FF;
- return e == 31 && m == 0;
- }
- //---------------------------------------------------------------------------//
- inline constexpr bool half::is_negative() const noexcept
- {
- return (data & 0x8000) != 0;
- }
- //---------------------------------------------------------------------------//
- inline constexpr bool half::is_pos_inf() const noexcept
- {
- return data == HALF_POS_INF_BIT;
- }
- //---------------------------------------------------------------------------//
- inline constexpr bool half::is_neg_inf() const noexcept
- {
- return data == HALF_NEG_INF_BIT;
- }
- //---------------------------------------------------------------------------//
- // Internal Methods
- //---------------------------------------------------------------------------//
- inline uint16_t half::convert(int32_t i) noexcept
- {
- //std::cout << "[convert()]" << std::endl;
- int32_t s = (i >> 16) & 0x0000'8000;
- int32_t e = ((i >> 23) & 0x0000'00FF) - (127 - 15);
- int32_t m = i & 0x007F'FFFF;
- //std::cout << " s = " << std::dec << s << std::endl;
- //std::cout << " e = " << std::dec << e << std::endl;
- //std::cout << " m = 0x" << std::hex << m << std::endl;
- // Now reassemble s, e and m into a half:
- if ( e <= 0 )
- {
- if ( e < -10 )
- {
- // E is less than -10. The absolute value of f is
- // less than HALF_MIN (f may be a small normalized
- // float, a denormalized float or a zero).
- // We convert f to a half zero with the same sign as f.
- return s;
- }
- // E is between -10 and 0. F is a normalized float
- // whose magnitude is less than HALF_NRM_MIN.
- // We convert f to a denormalized half.
- // Add an explicit leading 1 to the significand.
- m = m | 0x00800000;
- // Round to m to the nearest (10+e)-bit value (with e between
- // -10 and 0); in case of a tie, round to the nearest even value.
- // Rounding may cause the significand to overflow and make
- // our number normalized. Because of the way a half's bits
- // are laid out, we don't have to treat this case separately;
- // the code below will handle it correctly.
- const auto t = 14 - e;
- const auto a = (1 << (t - 1)) - 1;
- const auto b = (m >> t) & 1;
- m = (m + a + b) >> t;
- // Assemble the half from s, e (zero) and m.
- return s | m;
- }
- else if ( e == 0xFF - (127 - 15) )
- {
- if ( m == 0 )
- {
- // F is an infinity; convert f to a half
- // infinity with the same sign as f.
- return s | 0x7C00;
- }
- else
- {
- // F is a NAN; we produce a half NAN that preserves
- // the sign bit and the 10 leftmost bits of the
- // significand of f, with one exception: If the 10
- // leftmost bits are all zero, the NAN would turn
- // into an infinity, so we have to set at least one
- // bit in the significand.
- m >>= 13;
- return s | 0x7C00 | m | (m == 0);
- }
- }
- else
- {
- // E is greater than zero. F is a normalized float.
- // We try to convert f to a normalized half.
- // Round to m to the nearest 10-bit value. In case of
- // a tie, round to the nearest even value.
- m = m + 0x00000FFF + ((m >> 13) & 1);
- if (m & 0x00800000)
- {
- m = 0; // overflow in significand,
- e += 1; // adjust exponent
- }
- // Handle exponent overflow
- if ( e > 30 )
- {
- overflow(); // Cause a hardware floating point overflow;
- return s | 0x7C00; // if this returns, the half becomes an
- } // infinity with the same sign as f.
- // Assemble the half from s, e and m.
- return s | (e << 10) | (m >> (23 -10));
- }
- }
- //---------------------------------------------------------------------------//
- inline float32_t half::overflow() noexcept
- {
- volatile float32_t f { 1e10 };
- for ( auto i = 0; i < 10; ++i )
- {
- f *= f; // this will overflow before the forloop terminates
- }
- return f;
- }
- //---------------------------------------------------------------------------//
- // Global Operators
- //---------------------------------------------------------------------------//
- inline half operator+(half a, half b) noexcept
- {
- //std::cout << "[op+()]" << std::endl;
- if ( a.is_NaN() || b.is_NaN() )
- {
- return half::qNaN();
- }
- else if ( a.is_pos_inf() && b.is_neg_inf() )
- {
- return half::qNaN(); // ∞ + -∞ : undefined
- }
- else if ( a.is_neg_inf() && b.is_pos_inf() )
- {
- return half::qNaN(); // -∞ + ∞ : undefined
- }
- else
- {
- return half { float32_t(a) + float32_t(b) };
- }
- }
- //---------------------------------------------------------------------------//
- inline half operator+(float32_t a, half b) noexcept
- {
- //std::cout << "[op+()]" << std::endl;
- if ( isnan(a) || b.is_NaN() )
- {
- return half::qNaN();
- }
- else if ( (a == (+1.0 / 0.0)) && b.is_neg_inf() )
- {
- return half::qNaN(); // ∞ + -∞ : undefined
- }
- else if ( (b == (-1.0 / 0.0)) && b.is_pos_inf() )
- {
- return half::qNaN(); // -∞ + ∞ : undefined
- }
- else
- {
- return half { a + float32_t(b) };
- }
- }
- //---------------------------------------------------------------------------//
- inline half operator+(half a, float32_t b) noexcept
- {
- //std::cout << "[op+()]" << std::endl;
- if ( a.is_NaN() || isnan(b) )
- {
- return half::qNaN();
- }
- else if ( a.is_pos_inf() && (b == (-1.0 / 0.0)) )
- {
- return half::qNaN(); // ∞ + -∞ : undefined
- }
- else if ( a.is_neg_inf() && (b == (+1.0 / 0.0)) )
- {
- return half::qNaN(); // -∞ + ∞ : undefined
- }
- else
- {
- return half { float32_t(a) + b };
- }
- }
- //---------------------------------------------------------------------------//
- inline half operator-(half a, half b) noexcept
- {
- //std::cout << "[op-()]" << std::endl;
- if ( a.is_NaN() || b.is_NaN() )
- {
- return half::qNaN();
- }
- else if ( a.is_pos_inf() && b.is_pos_inf() )
- {
- return half::qNaN(); // ∞ - ∞ : undefined
- }
- else if ( a.is_neg_inf() && b.is_neg_inf() )
- {
- return half::qNaN(); // -∞ - -∞ : undefined
- }
- else
- {
- return half { float32_t(a) - float32_t(b) };
- }
- }
- //---------------------------------------------------------------------------//
- inline half operator-(float32_t a, half b) noexcept
- {
- //std::cout << "[op-()]" << std::endl;
- if ( isnan(a) || b.is_NaN() )
- {
- return half::qNaN();
- }
- else if ( (a == (+1.0 / 0.0)) && b.is_pos_inf() )
- {
- return half::qNaN(); // ∞ - ∞ : undefined
- }
- else if ( (a == (-1.0 / 0.0)) && b.is_neg_inf() )
- {
- return half::qNaN(); // -∞ - -∞ : undefined
- }
- else
- {
- return half { a - float32_t(b) };
- }
- }
- //---------------------------------------------------------------------------//
- inline half operator-(half a, float32_t b) noexcept
- {
- //std::cout << "[op-()]" << std::endl;
- if ( a.is_NaN() || isnan(b) )
- {
- return half::qNaN();
- }
- else if ( a.is_pos_inf() && (b == (+1.0 / 0.0)) )
- {
- return half::qNaN(); // ∞ - ∞ : undefined
- }
- else if ( a.is_neg_inf() && (b == (-1.0 / 0.0)) )
- {
- return half::qNaN(); // -∞ - -∞ : undefined
- }
- else
- {
- return half { float32_t(a) - b };
- }
- }
- //---------------------------------------------------------------------------//
- inline half operator*(half a, half b) noexcept
- {
- //std::cout << "[op*()]" << std::endl;
- if ( a.is_NaN() || b.is_NaN() )
- {
- return half::qNaN();
- }
- else if ( a.is_infinity() && b.is_zero() )
- {
- return half::qNaN(); // ±∞ × ±0 : undefined
- }
- else if ( a.is_zero() && b.is_infinity() )
- {
- return half::qNaN(); // ±0 × ±∞ : undefined
- }
- else
- {
- return half { float32_t(a) * float32_t(b) };
- }
- }
- //---------------------------------------------------------------------------//
- inline half operator*(float32_t a, half b) noexcept
- {
- //std::cout << "[op*()]" << std::endl;
- if ( isnan(a) || b.is_NaN() )
- {
- return half::qNaN();
- }
- else if ( isinf(a) && b.is_zero() )
- {
- return half::qNaN(); // ±∞ × ±0 : undefined
- }
- else if ( a == 0.0 && b.is_infinity() )
- {
- return half::qNaN(); // ±0 × ±∞ : undefined
- }
- else
- {
- return half { a * float32_t(b) };
- }
- }
- //---------------------------------------------------------------------------//
- inline half operator*(half a, float32_t b) noexcept
- {
- //std::cout << "[op*()]" << std::endl;
- if ( a.is_NaN() || isnan(b) )
- {
- return half::qNaN();
- }
- else if ( a.is_infinity() && b == 0.0 )
- {
- return half::qNaN(); // ±∞ × ±0 : undefined
- }
- else if ( a.is_zero() && isinf(b) )
- {
- return half::qNaN(); // ±0 × ±∞ : undefined
- }
- else
- {
- return half { float32_t(a) * b };
- }
- }
- //---------------------------------------------------------------------------//
- inline half operator/(half a, half b) noexcept
- {
- //std::cout << "[op/()]" << std::endl;
- if ( a.is_NaN() || b.is_NaN() )
- {
- return half::qNaN();
- }
- else if ( a.is_zero() && b.is_zero() )
- {
- return half::qNaN(); // ±0 ÷ ±0 : undefined
- }
- else if ( a.is_infinity() && b.is_infinity() )
- {
- return half::qNaN(); // ±∞ ÷ ±∞ : undefined
- }
- else
- {
- return half { float32_t(a) / float32_t(b) };
- }
- }
- //---------------------------------------------------------------------------//
- inline half operator/(float32_t a, half b) noexcept
- {
- //std::cout << "[op/()]" << std::endl;
- if ( isnan(a) || b.is_NaN() )
- {
- return half::qNaN();
- }
- else if ( (a == 0.0) && b.is_zero() )
- {
- return half::qNaN(); // ±0 ÷ ±0 : undefined
- }
- else if ( isinf(a) && b.is_infinity() )
- {
- return half::qNaN(); // ±∞ ÷ ±∞ : undefined
- }
- else
- {
- return half { a / float32_t(b) };
- }
- }
- //---------------------------------------------------------------------------//
- inline half operator/(half a, float32_t b) noexcept
- {
- //std::cout << "[op/()]" << std::endl;
- if ( a.is_NaN() || isnan(b) )
- {
- return half::qNaN();
- }
- else if ( a.is_zero() && (b == 0.0) )
- {
- return half::qNaN(); // ±0 ÷ ±0 : undefined
- }
- else if ( a.is_infinity() && isinf(b) )
- {
- return half::qNaN(); // ±∞ ÷ ±∞ : undefined
- }
- else
- {
- return half { float32_t(a) / b };
- }
- }
- //---------------------------------------------------------------------------//
- inline bool operator==(half a, half b) noexcept
- {
- //std::cout << "[op==()]" << std::endl;
- if ( a.is_NaN() || b.is_NaN() )
- {
- return false;
- }
- else
- {
- return a.bits() == b.bits();
- }
- }
- //---------------------------------------------------------------------------//
- inline bool operator==(float32_t a, half b) noexcept
- {
- //std::cout << "[op==()]" << std::endl;
- if ( isnan(a) || b.is_NaN() )
- {
- return false;
- }
- else
- {
- return a == float32_t(b);
- }
- }
- //---------------------------------------------------------------------------//
- inline bool operator==(half a, float32_t b) noexcept
- {
- //std::cout << "[op==()]" << std::endl;
- if ( a.is_NaN() || isnan(b) )
- {
- return false;
- }
- else
- {
- return float32_t(a) == b;
- }
- }
- //---------------------------------------------------------------------------//
- inline bool operator!=(half a, half b) noexcept
- {
- //std::cout << "[op!=()]" << std::endl;
- return !operator==(a, b);
- }
- //---------------------------------------------------------------------------//
- inline bool operator!=(float32_t a, half b) noexcept
- {
- //std::cout << "[op!=()]" << std::endl;
- return !operator==(a, b);
- }
- //---------------------------------------------------------------------------//
- inline bool operator!=(half a, float32_t b) noexcept
- {
- //std::cout << "[op!=()]" << std::endl;
- return !operator==(a, b);
- }
- //---------------------------------------------------------------------------//
- inline bool operator<(half a, half b) noexcept
- {
- //std::cout << "[op<()]" << std::endl;
- if ( a.is_NaN() || b.is_NaN() )
- {
- return false;
- }
- else
- {
- return float32_t(a) < float32_t(b);
- }
- }
- //---------------------------------------------------------------------------//
- inline bool operator<(float32_t a, half b) noexcept
- {
- //std::cout << "[op<()]" << std::endl;
- if ( isnan(a) || b.is_NaN() )
- {
- return false;
- }
- else
- {
- return a < float32_t(b);
- }
- }
- //---------------------------------------------------------------------------//
- inline bool operator<(half a, float32_t b) noexcept
- {
- //std::cout << "[op<()]" << std::endl;
- if ( a.is_NaN() || isnan(b) )
- {
- return false;
- }
- else
- {
- return float32_t(a) < b;
- }
- }
- //---------------------------------------------------------------------------//
- inline bool operator>=(half a, half b) noexcept
- {
- //std::cout << "[op>=()]" << std::endl;
- return !operator<(a, b);
- }
- //---------------------------------------------------------------------------//
- inline bool operator>=(float32_t a, half b) noexcept
- {
- //std::cout << "[op>=()]" << std::endl;
- return !operator<(a, b);
- }
- //---------------------------------------------------------------------------//
- inline bool operator>=(half a, float32_t b) noexcept
- {
- //std::cout << "[op>=()]" << std::endl;
- return !operator<(a, b);
- }
- //---------------------------------------------------------------------------//
- inline bool operator>(half a, half b) noexcept
- {
- //std::cout << "[op>()]" << std::endl;
- if ( a.is_NaN() || b.is_NaN() )
- {
- return false;
- }
- else
- {
- return float32_t(a) > float32_t(b);
- }
- }
- //---------------------------------------------------------------------------//
- inline bool operator>(float32_t a, half b) noexcept
- {
- //std::cout << "[op>()]" << std::endl;
- if ( isnan(a) || b.is_NaN() )
- {
- return false;
- }
- else
- {
- return a > float32_t(b);
- }
- }
- //---------------------------------------------------------------------------//
- inline bool operator>(half a, float32_t b) noexcept
- {
- //std::cout << "[op>()]" << std::endl;
- if ( a.is_NaN() || isnan(b) )
- {
- return false;
- }
- else
- {
- return float32_t(a) > b;
- }
- }
- //---------------------------------------------------------------------------//
- inline bool operator<=(half a, half b) noexcept
- {
- //std::cout << "[op<=()]" << std::endl;
- return !operator>(a, b);
- }
- //---------------------------------------------------------------------------//
- inline bool operator<=(float32_t a, half b) noexcept
- {
- //std::cout << "[op<=()]" << std::endl;
- return !operator>(a, b);
- }
- //---------------------------------------------------------------------------//
- inline bool operator<=(half a, float32_t b) noexcept
- {
- //std::cout << "[op>=()]" << std::endl;
- return !operator>(a, b);
- }
- //---------------------------------------------------------------------------//
- // Stream I/O
- //---------------------------------------------------------------------------//
- inline std::ostream& operator<<(std::ostream& stream, half lhs)
- {
- switch ( lhs.bits() )
- {
- case HALF_POS_INF_BIT:
- {
- stream << u8"∞";
- break;
- }
- case HALF_NEG_INF_BIT:
- {
- stream << u8"-∞";
- break;
- }
- case HALF_Q_NAN_BIT:
- {
- stream << u8"qNaN";
- break;
- }
- case HALF_S_NAN_BIT:
- {
- stream << u8"sNaN";
- break;
- }
- default:
- {
- stream << static_cast<float32_t>(lhs);
- break;
- }
- }
- return stream;
- }
- //---------------------------------------------------------------------------//
- inline std::istream& operator>>(std::istream& stream, half& lhs)
- {
- float32_t value;
- stream >> value;
- lhs.operator=(value);
- return stream;
- }
- //---------------------------------------------------------------------------//
- } // namespace IEEE754
- //---------------------------------------------------------------------------//
- #endif
- // half.hpp
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement