Advertisement
Guest User

Untitled

a guest
Dec 20th, 2015
80
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 37.36 KB | None | 0 0
  1. #pragma once
  2.  
  3. #ifndef _HALF_H_
  4. #define _HALF_H_
  5.  
  6. //---------------------------------------------------------------------------//
  7. //
  8. // halp.hpp
  9. // ヘッダファイルだけで使える半精度浮動小数点数
  10. // Portable implementation of IEEE 754 half-precision floating-point format
  11. // Copyright (C) tapetums 2015
  12. //
  13. //---------------------------------------------------------------------------//
  14. //
  15. // Copyright (c) 2006, Industrial Light & Magic, a division of Lucasfilm
  16. // Entertainment Company Ltd. Portions contributed and copyright held by
  17. // others as indicated. All rights reserved.
  18. //
  19. // Redistribution and use in source and binary forms, with or without
  20. // modification, are permitted provided that the following conditions are
  21. // met:
  22. //
  23. // * Redistributions of source code must retain the above
  24. // copyright notice, this list of conditions and the following
  25. // disclaimer.
  26. //
  27. // * Redistributions in binary form must reproduce the above
  28. // copyright notice, this list of conditions and the following
  29. // disclaimer in the documentation and/or other materials provided with
  30. // the distribution.
  31. //
  32. // * Neither the name of Industrial Light & Magic nor the names of
  33. // any other contributors to this software may be used to endorse or
  34. // promote products derived from this software without specific prior
  35. // written permission.
  36. //
  37. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  38. // IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  39. // THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  40. // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  41. // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  42. // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  43. // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  44. // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  45. // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  46. // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  47. // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  48. //
  49. //---------------------------------------------------------------------------//
  50. // Primary authors:
  51. // Florian Kainz <kainz@ilm.com>
  52. // Rod Bogart <rgb@ilm.com>
  53. //
  54. // Modification for portable implementation:
  55. // tapetums <tapetums@live.jp>
  56. //---------------------------------------------------------------------------//
  57.  
  58. #include <cstdint>
  59. #include <cmath>
  60.  
  61. #include <iostream>
  62.  
  63. //---------------------------------------------------------------------------//
  64.  
  65. namespace IEEE754 {
  66.  
  67. //---------------------------------------------------------------------------//
  68.  
  69. using float32_t = float;
  70.  
  71. //---------------------------------------------------------------------------//
  72. // Class
  73. //---------------------------------------------------------------------------//
  74.  
  75. class half
  76. {
  77. private: // types
  78. union uif { uint32_t i; float32_t f; };
  79.  
  80. private: // members
  81. uint16_t data { 0 };
  82.  
  83. public: // ctor / dtor
  84. constexpr half() = default;
  85. ~half() = default;
  86.  
  87. half(const half&) = default;
  88. half& operator=(const half&) = default;
  89.  
  90. half(half&&) noexcept = default;
  91. half& operator=(half&&) noexcept = default;
  92.  
  93. explicit half(float32_t f) noexcept { operator=(f); }
  94.  
  95. public: // operators
  96. operator float32_t() const noexcept;
  97.  
  98. constexpr half operator-() const noexcept;
  99.  
  100. half& operator=(float32_t) noexcept;
  101.  
  102. half& operator+=(half) noexcept;
  103. half& operator+=(float32_t) noexcept;
  104.  
  105. half& operator-=(half) noexcept;
  106. half& operator-=(float32_t) noexcept;
  107.  
  108. half& operator*=(half) noexcept;
  109. half& operator*=(float32_t) noexcept;
  110.  
  111. half& operator/=(half) noexcept;
  112. half& operator/=(float32_t) noexcept;
  113.  
  114. public: // methods
  115. half round(uint8_t digits) const noexcept;
  116.  
  117. public: // properties
  118. constexpr bool is_finite() const noexcept;
  119. constexpr bool is_normalized() const noexcept;
  120. constexpr bool is_denormalized() const noexcept;
  121. constexpr bool is_zero() const noexcept;
  122. constexpr bool is_NaN() const noexcept;
  123. constexpr bool is_infinity() const noexcept;
  124. constexpr bool is_negative() const noexcept;
  125. constexpr bool is_pos_inf() const noexcept;
  126. constexpr bool is_neg_inf() const noexcept;
  127.  
  128. public: // constant values
  129. static constexpr half pos_inf() noexcept { half h; h.data = 0x7C00; return h; }
  130. static constexpr half neg_inf() noexcept { half h; h.data = 0xFC00; return h; }
  131. static constexpr half qNaN() noexcept { half h; h.data = 0x7FFF; return h; }
  132. static constexpr half sNaN() noexcept { half h; h.data = 0x7DFF; return h; }
  133.  
  134. public: // accessors
  135. uint16_t bits() const noexcept { return data; }
  136. half& bits(uint16_t bits) noexcept { data = bits; return *this; }
  137.  
  138. private: // internal methods
  139. static uint16_t convert(int32_t) noexcept;
  140. static float32_t overflow() noexcept;
  141. };
  142.  
  143. //---------------------------------------------------------------------------//
  144. // Limits
  145. //
  146. // Visual C++ will complain if HALF_MIN, HALF_NRM_MIN etc. are not float
  147. // constants, but at least one other compiler (gcc 2.96) produces incorrect
  148. // results if they are.
  149. //---------------------------------------------------------------------------//
  150.  
  151. #if (defined _WIN32 || defined _WIN64) && defined _MSC_VER
  152. static constexpr auto HALF_MIN = 5.96046448e-08f; // Smallest positive half
  153. static constexpr auto HALF_NRM_MIN = 6.10351562e-05f; // Smallest positive normalized half
  154. static constexpr auto HALF_MAX = 65504.0f; // Largest positive half
  155. static constexpr auto HALF_EPSILON = 0.00097656f; // Smallest positive e for which
  156. // half (1.0 + e) != half (1.0)
  157. #else
  158. static constexpr auto HALF_MIN = 5.96046448e-08; // Smallest positive half
  159. static constexpr auto HALF_NRM_MIN = 6.10351562e-05; // Smallest positive normalized half
  160. static constexpr auto HALF_MAX = 65504.0; // Largest positive half
  161. static constexpr auto HALF_EPSILON = 0.00097656; // Smallest positive e for which
  162. // half (1.0 + e) != half (1.0)
  163. #endif
  164.  
  165. static constexpr auto HALF_MANT_DIG = 11; // Number of digits in mantissa
  166. // (significand + hidden leading 1)
  167. static constexpr auto HALF_DIG = 2; // Number of base 10 digits that
  168. // can be represented without change
  169. static constexpr auto HALF_RADIX = 2; // Base of the exponent
  170. static constexpr auto HALF_MIN_EXP = -13; // Minimum negative integer such that
  171. // HALF_RADIX raised to the power of
  172. // one less than that integer is a
  173. // normalized half
  174. static constexpr auto HALF_MAX_EXP = 16; // Maximum positive integer such that
  175. // HALF_RADIX raised to the power of
  176. // one less than that integer is a
  177. // normalized half
  178. static constexpr auto HALF_MIN_10_EXP = -4; // Minimum positive integer such
  179. // that 10 raised to that power is
  180. // a normalized half
  181. static constexpr auto HALF_MAX_10_EXP = 4; // Maximum positive integer such
  182. // that 10 raised to that power is
  183. // a normalized half
  184.  
  185. static constexpr auto HALF_POS_INF_BIT = 0x7C00; // Bit pattern for +∞
  186. static constexpr auto HALF_NEG_INF_BIT = 0xFC00; // Bit pattern for -∞
  187. static constexpr auto HALF_Q_NAN_BIT = 0x7FFF; // Bit pattern for quiet NaN
  188. static constexpr auto HALF_S_NAN_BIT = 0x7DFF; // Bit pattern for signaling NaN
  189.  
  190. //---------------------------------------------------------------------------//
  191. //
  192. // Implementation --
  193. //
  194. // Representation of a float:
  195. //
  196. // We assume that a float, f, is an IEEE 754 single-precision
  197. // floating point number, whose bits are arranged as follows:
  198. //
  199. // 31 (msb)
  200. // |
  201. // | 30 23
  202. // | | |
  203. // | | | 22 0 (lsb)
  204. // | | | | |
  205. // X XXXXXXXX XXXXXXXXXXXXXXXXXXXXXXX
  206. //
  207. // s e m
  208. //
  209. // S is the sign-bit, e is the exponent and m is the significand.
  210. //
  211. // If e is between 1 and 254, f is a normalized number:
  212. //
  213. // s e-127
  214. // f = (-1) * 2 * 1.m
  215. //
  216. // If e is 0, and m is not zero, f is a denormalized number:
  217. //
  218. // s -126
  219. // f = (-1) * 2 * 0.m
  220. //
  221. // If e and m are both zero, f is zero:
  222. //
  223. // f = 0.0
  224. //
  225. // If e is 255, f is an "infinity" or "not a number" (NAN),
  226. // depending on whether m is zero or not.
  227. //
  228. // Examples:
  229. //
  230. // 0 00000000 00000000000000000000000 = 0.0
  231. // 0 01111110 00000000000000000000000 = 0.5
  232. // 0 01111111 00000000000000000000000 = 1.0
  233. // 0 10000000 00000000000000000000000 = 2.0
  234. // 0 10000000 10000000000000000000000 = 3.0
  235. // 1 10000101 11110000010000000000000 = -124.0625
  236. // 0 11111111 00000000000000000000000 = +infinity
  237. // 1 11111111 00000000000000000000000 = -infinity
  238. // 0 11111111 10000000000000000000000 = NAN
  239. // 1 11111111 11111111111111111111111 = NAN
  240. //
  241. // Representation of a half:
  242. //
  243. // Here is the bit-layout for a half number, h:
  244. //
  245. // 15 (msb)
  246. // |
  247. // | 14 10
  248. // | | |
  249. // | | | 9 0 (lsb)
  250. // | | | | |
  251. // X XXXXX XXXXXXXXXX
  252. //
  253. // s e m
  254. //
  255. // S is the sign-bit, e is the exponent and m is the significand.
  256. //
  257. // If e is between 1 and 30, h is a normalized number:
  258. //
  259. // s e-15
  260. // h = (-1) * 2 * 1.m
  261. //
  262. // If e is 0, and m is not zero, h is a denormalized number:
  263. //
  264. // S -14
  265. // h = (-1) * 2 * 0.m
  266. //
  267. // If e and m are both zero, h is zero:
  268. //
  269. // h = 0.0
  270. //
  271. // If e is 31, h is an "infinity" or "not a number" (NAN),
  272. // depending on whether m is zero or not.
  273. //
  274. // Examples:
  275. //
  276. // 0 00000 0000000000 = 0.0
  277. // 0 01110 0000000000 = 0.5
  278. // 0 01111 0000000000 = 1.0
  279. // 0 10000 0000000000 = 2.0
  280. // 0 10000 1000000000 = 3.0
  281. // 1 10101 1111000001 = -124.0625
  282. // 0 11111 0000000000 = +infinity
  283. // 1 11111 0000000000 = -infinity
  284. // 0 11111 1000000000 = NAN
  285. // 1 11111 1111111111 = NAN
  286. //
  287. // Conversion:
  288. //
  289. // Converting from a float to a half requires some non-trivial bit
  290. // manipulations. In some cases, this makes conversion relatively
  291. // slow, but the most common case is accelerated via table lookups.
  292. //
  293. // Converting back from a half to a float is easier because we don't
  294. // have to do any rounding. In addition, there are only 65536
  295. // different half numbers; we can convert each of those numbers once
  296. // and store the results in a table. Later, all conversions can be
  297. // done using only simple table lookups.
  298. //
  299. // <NOTE>
  300. // tapetums <tapetums@live.jp> removed table lookup features.
  301. // This change caused a drop of the speed
  302. // in exchange for improvement of the portability.
  303. //
  304. //---------------------------------------------------------------------------//
  305.  
  306. //---------------------------------------------------------------------------//
  307. // Operators
  308. //---------------------------------------------------------------------------//
  309.  
  310. inline half::operator float32_t() const noexcept
  311. {
  312. if ( data == 0x0000 )
  313. {
  314. return 0.0;
  315. }
  316. else if ( data == 0x8000 )
  317. {
  318. return -0.0;
  319. }
  320.  
  321. int32_t s = (data << 16) & 0x8000'0000;
  322. int32_t e = ((data >> 10) & 0b0001'1111) + (127 - 15);
  323. int32_t m = data & 0x0000'03FF;
  324.  
  325. uif tmp;
  326. tmp.i = s | (e << 23) | (m << (23 - 10));
  327.  
  328. return tmp.f;
  329. }
  330.  
  331. //---------------------------------------------------------------------------//
  332.  
  333. inline constexpr half half::operator-() const noexcept
  334. {
  335. half h;
  336. h.data = data ^ 0x8000;
  337. return h;
  338. }
  339.  
  340. //---------------------------------------------------------------------------//
  341.  
  342. inline half& half::operator=(float32_t f) noexcept
  343. {
  344. uif tmp;
  345. tmp.f = f;
  346.  
  347. if ( f == 0.0 )
  348. {
  349. // Common special case - zero.
  350. // Preserve the zero's sign bit.
  351. data = (tmp.i >> 16);
  352. }
  353. else
  354. {
  355. int32_t s = (tmp.i >> 16) & 0x0000'8000;
  356. int32_t e = ((tmp.i >> 23) & 0x0000'00FF) - (127 - 15);
  357. int32_t m = tmp.i & 0x007F'FFFF;
  358. //std::cout << " s = " << std::dec << s << std::endl;
  359. //std::cout << " e = " << std::dec << e << std::endl;
  360. //std::cout << " m = 0x" << std::hex << m << std::endl;
  361.  
  362. if ( 0 < e && e < 31 )
  363. {
  364. // Simple case - round the significand, m, to 10
  365. // bits and combine it with the sign and exponent.
  366. data = s | (e << 10) | (m >> (23 - 10));
  367. }
  368. else
  369. {
  370. // Difficult case - call a function.
  371. data = convert(tmp.i); // too small
  372. }
  373. }
  374.  
  375. //std::cout << " data = 0x" << std::hex << data << std::endl;
  376. return *this;
  377. }
  378.  
  379. //---------------------------------------------------------------------------//
  380.  
  381. inline half& half::operator+=(half h) noexcept
  382. {
  383. //std::cout << "[op+=()]" << std::endl;
  384. if ( is_NaN() )
  385. {
  386. // Return NaN.
  387. }
  388. else if ( h.is_NaN() )
  389. {
  390. data = h.data; // Propagate NaN.
  391. }
  392. else if ( is_pos_inf() && h.is_neg_inf() )
  393. {
  394. data = HALF_Q_NAN_BIT; // ∞ + -∞ : undefined
  395. }
  396. else if ( is_neg_inf() && h.is_pos_inf() )
  397. {
  398. data = HALF_Q_NAN_BIT; // -∞ + ∞ : undefined
  399. }
  400. else
  401. {
  402. operator=(float32_t(*this) + float32_t(h));
  403. }
  404. return *this;
  405. }
  406.  
  407. //---------------------------------------------------------------------------//
  408.  
  409. inline half& half::operator+=(float32_t f) noexcept
  410. {
  411. //std::cout << "[op+=()]" << std::endl;
  412. if ( is_NaN() )
  413. {
  414. // Return NaN.
  415. }
  416. else if ( isnan(f) )
  417. {
  418. data = HALF_Q_NAN_BIT; // Propagate NaN.
  419. }
  420. else if ( is_pos_inf() && (f == (-1.0 / 0.0)) )
  421. {
  422. data = HALF_Q_NAN_BIT; // Undefined
  423. }
  424. else if ( is_neg_inf() && (f == (+1.0 / 0.0)) )
  425. {
  426. data = HALF_Q_NAN_BIT; // Undefined
  427. }
  428. else
  429. {
  430. operator=(float32_t(*this) + f);
  431. }
  432. return *this;
  433. }
  434.  
  435. //---------------------------------------------------------------------------//
  436.  
  437. inline half& half::operator-=(half h) noexcept
  438. {
  439. //std::cout << "[op-=()]" << std::endl;
  440. if ( is_NaN() )
  441. {
  442. // Return NaN.
  443. }
  444. else if ( h.is_NaN() )
  445. {
  446. data = h.data; // Propagate NaN.
  447. }
  448. else if ( is_pos_inf() && h.is_pos_inf() )
  449. {
  450. data = HALF_Q_NAN_BIT; // ∞ - ∞ : undefined
  451. }
  452. else if ( is_neg_inf() && h.is_neg_inf() )
  453. {
  454. data = HALF_Q_NAN_BIT; // -∞ - -∞ : undefined
  455. }
  456. else
  457. {
  458. operator=(float32_t(*this) - float32_t(h));
  459. }
  460. return *this;
  461. }
  462.  
  463. //---------------------------------------------------------------------------//
  464.  
  465. inline half& half::operator-=(float32_t f) noexcept
  466. {
  467. //std::cout << "[op-=()]" << std::endl;
  468. if ( is_NaN() )
  469. {
  470. // Return NaN.
  471. }
  472. else if ( isnan(f) )
  473. {
  474. data = HALF_Q_NAN_BIT; // Propagate NaN.
  475. }
  476. else if ( is_pos_inf() && (f == (+1.0 / 0.0)) )
  477. {
  478. data = HALF_Q_NAN_BIT; // ∞ - ∞ : undefined
  479. }
  480. else if ( is_neg_inf() && (f == (-1.0 / 0.0)) )
  481. {
  482. data = HALF_Q_NAN_BIT; // -∞ - -∞ : undefined
  483. }
  484. else
  485. {
  486. operator=(float32_t(*this) - f);
  487. }
  488. return *this;
  489. }
  490.  
  491. //---------------------------------------------------------------------------//
  492.  
  493. inline half& half::operator*=(half h) noexcept
  494. {
  495. //std::cout << "[op*=()]" << std::endl;
  496. if ( is_NaN() )
  497. {
  498. // Return NaN.
  499. }
  500. else if ( h.is_NaN() )
  501. {
  502. data = h.data; // Propagate NaN.
  503. }
  504. else if ( is_infinity() && h.is_zero() )
  505. {
  506. data = HALF_Q_NAN_BIT; // ±∞ * ±0 : undefined
  507. }
  508. else if ( is_zero() && h.is_infinity() )
  509. {
  510. data = HALF_Q_NAN_BIT; // ±0 * ±∞ : undefined
  511. }
  512. else
  513. {
  514. operator=(float32_t(*this) * float32_t(h));
  515. }
  516. return *this;
  517. }
  518.  
  519. //---------------------------------------------------------------------------//
  520.  
  521. inline half& half::operator*=(float32_t f) noexcept
  522. {
  523. //std::cout << "[op*=()]" << std::endl;
  524. if ( is_NaN() )
  525. {
  526. // Return NaN.
  527. }
  528. else if ( isnan(f) )
  529. {
  530. data = HALF_Q_NAN_BIT; // Propagate NaN.
  531. }
  532. else if ( is_infinity() && (f == 0.0) )
  533. {
  534. data = HALF_Q_NAN_BIT; // ±∞ * ±0 : undefined
  535. }
  536. else if ( is_zero() && isinf(f) )
  537. {
  538. data = HALF_Q_NAN_BIT; // ±0 * ±∞ : undefined
  539. }
  540. else
  541. {
  542. operator=(float32_t(*this) * f);
  543. }
  544. return *this;
  545. }
  546.  
  547. //---------------------------------------------------------------------------//
  548.  
  549. inline half& half::operator/=(half h) noexcept
  550. {
  551. //std::cout << "[op/=()]" << std::endl;
  552. if ( is_NaN() )
  553. {
  554. // Return NaN.
  555. }
  556. else if ( h.is_NaN() )
  557. {
  558. data = h.data; // Propagate NaN.
  559. }
  560. else if ( is_zero() && h.is_zero() )
  561. {
  562. data = HALF_Q_NAN_BIT; // ±0 ÷ ±0 : undefined
  563. }
  564. else if ( is_infinity() && h.is_infinity() )
  565. {
  566. data = HALF_Q_NAN_BIT; // ±∞ ÷ ±∞ : undefined
  567. }
  568. else
  569. {
  570. operator=(float32_t(*this) / float32_t(h));
  571. }
  572. return *this;
  573. }
  574.  
  575. //---------------------------------------------------------------------------//
  576.  
  577. inline half& half::operator/=(float32_t f) noexcept
  578. {
  579. //std::cout << "[op/=()]" << std::endl;
  580. if ( is_NaN() )
  581. {
  582. // Return NaN.
  583. }
  584. else if ( isnan(f) )
  585. {
  586. data = HALF_Q_NAN_BIT; // Propagate NaN.
  587. }
  588. else if ( is_zero() && (f == 0.0) )
  589. {
  590. data = HALF_Q_NAN_BIT; // ±0 ÷ ±0 : undefined
  591. }
  592. else if ( is_infinity() && isinf(f) )
  593. {
  594. data = HALF_Q_NAN_BIT; // ±∞ ÷ ±∞ : undefined
  595. }
  596. else
  597. {
  598. operator=(float32_t(*this) / f);
  599. }
  600. return *this;
  601. }
  602.  
  603. //---------------------------------------------------------------------------//
  604. // Methods
  605. //---------------------------------------------------------------------------//
  606.  
  607. //---------------------------------------------------------
  608. // Round to n-bit precision (n should be between 0 and 10).
  609. // After rounding, the significand's 10-n least significant
  610. // bits will be zero.
  611. //---------------------------------------------------------
  612. inline half half::round(uint8_t n) const noexcept
  613. {
  614. //std::cout << "[round()]" << std::endl;
  615.  
  616. // Parameter check.
  617. if ( n >= 10 ) { return *this; }
  618.  
  619. // Disassemble h into the sign, s,
  620. // and the combined exponent and significand, e.
  621. uint16_t s = data & 0x8000;
  622. uint16_t e = data & 0x7FFF;
  623.  
  624. // Round the exponent and significand to the nearest value
  625. // where ones occur only in the (10-n) most significant bits.
  626. // Note that the exponent adjusts automatically if rounding
  627. // up causes the significand to overflow.
  628. e >>= 9 - n;
  629. e += e & 1;
  630. e <<= 9 - n;
  631.  
  632. // Check for exponent overflow.
  633. if ( e >= 0x7C00 )
  634. {
  635. // Overflow occurred -- truncate instead of rounding.
  636. e = data;
  637. e >>= 10 - n;
  638. e <<= 10 - n;
  639. }
  640.  
  641. // Put the original sign bit back.
  642. half h;
  643. h.data = s | e;
  644. return h;
  645. }
  646.  
  647. //---------------------------------------------------------------------------//
  648. // Properties
  649. //---------------------------------------------------------------------------//
  650.  
  651. inline constexpr bool half::is_finite() const noexcept
  652. {
  653. uint16_t e = (data >> 10) & 0x001F;
  654. return e < 31;
  655. }
  656.  
  657. //---------------------------------------------------------------------------//
  658.  
  659. inline constexpr bool half::is_normalized() const noexcept
  660. {
  661. uint16_t e = (data >> 10) & 0x001F;
  662. return e > 0 && e < 31;
  663. }
  664.  
  665. //---------------------------------------------------------------------------//
  666.  
  667. inline constexpr bool half::is_denormalized() const noexcept
  668. {
  669. uint16_t e = (data >> 10) & 0x001F;
  670. uint16_t m = data & 0x03FF;
  671. return e == 0 && m != 0;
  672. }
  673.  
  674. //---------------------------------------------------------------------------//
  675.  
  676. inline constexpr bool half::is_zero() const noexcept
  677. {
  678. return (data & 0x7FFF) == 0;
  679. }
  680.  
  681. //---------------------------------------------------------------------------//
  682.  
  683. inline constexpr bool half::is_NaN() const noexcept
  684. {
  685. uint16_t e = (data >> 10) & 0x001F;
  686. uint16_t m = data & 0x03FF;
  687. return e == 31 && m != 0;
  688. }
  689.  
  690. //---------------------------------------------------------------------------//
  691.  
  692. inline constexpr bool half::is_infinity() const noexcept
  693. {
  694. uint16_t e = (data >> 10) & 0x001F;
  695. uint16_t m = data & 0x03FF;
  696. return e == 31 && m == 0;
  697. }
  698.  
  699. //---------------------------------------------------------------------------//
  700.  
  701. inline constexpr bool half::is_negative() const noexcept
  702. {
  703. return (data & 0x8000) != 0;
  704. }
  705.  
  706. //---------------------------------------------------------------------------//
  707.  
  708. inline constexpr bool half::is_pos_inf() const noexcept
  709. {
  710. return data == HALF_POS_INF_BIT;
  711. }
  712.  
  713. //---------------------------------------------------------------------------//
  714.  
  715. inline constexpr bool half::is_neg_inf() const noexcept
  716. {
  717. return data == HALF_NEG_INF_BIT;
  718. }
  719.  
  720. //---------------------------------------------------------------------------//
  721. // Internal Methods
  722. //---------------------------------------------------------------------------//
  723.  
  724. inline uint16_t half::convert(int32_t i) noexcept
  725. {
  726. //std::cout << "[convert()]" << std::endl;
  727.  
  728. int32_t s = (i >> 16) & 0x0000'8000;
  729. int32_t e = ((i >> 23) & 0x0000'00FF) - (127 - 15);
  730. int32_t m = i & 0x007F'FFFF;
  731. //std::cout << " s = " << std::dec << s << std::endl;
  732. //std::cout << " e = " << std::dec << e << std::endl;
  733. //std::cout << " m = 0x" << std::hex << m << std::endl;
  734.  
  735. // Now reassemble s, e and m into a half:
  736. if ( e <= 0 )
  737. {
  738. if ( e < -10 )
  739. {
  740. // E is less than -10. The absolute value of f is
  741. // less than HALF_MIN (f may be a small normalized
  742. // float, a denormalized float or a zero).
  743.  
  744. // We convert f to a half zero with the same sign as f.
  745. return s;
  746. }
  747.  
  748. // E is between -10 and 0. F is a normalized float
  749. // whose magnitude is less than HALF_NRM_MIN.
  750.  
  751. // We convert f to a denormalized half.
  752.  
  753. // Add an explicit leading 1 to the significand.
  754. m = m | 0x00800000;
  755.  
  756. // Round to m to the nearest (10+e)-bit value (with e between
  757. // -10 and 0); in case of a tie, round to the nearest even value.
  758.  
  759. // Rounding may cause the significand to overflow and make
  760. // our number normalized. Because of the way a half's bits
  761. // are laid out, we don't have to treat this case separately;
  762. // the code below will handle it correctly.
  763.  
  764. const auto t = 14 - e;
  765. const auto a = (1 << (t - 1)) - 1;
  766. const auto b = (m >> t) & 1;
  767.  
  768. m = (m + a + b) >> t;
  769.  
  770. // Assemble the half from s, e (zero) and m.
  771. return s | m;
  772. }
  773. else if ( e == 0xFF - (127 - 15) )
  774. {
  775. if ( m == 0 )
  776. {
  777. // F is an infinity; convert f to a half
  778. // infinity with the same sign as f.
  779.  
  780. return s | 0x7C00;
  781. }
  782. else
  783. {
  784. // F is a NAN; we produce a half NAN that preserves
  785. // the sign bit and the 10 leftmost bits of the
  786. // significand of f, with one exception: If the 10
  787. // leftmost bits are all zero, the NAN would turn
  788. // into an infinity, so we have to set at least one
  789. // bit in the significand.
  790.  
  791. m >>= 13;
  792. return s | 0x7C00 | m | (m == 0);
  793. }
  794. }
  795. else
  796. {
  797. // E is greater than zero. F is a normalized float.
  798. // We try to convert f to a normalized half.
  799.  
  800. // Round to m to the nearest 10-bit value. In case of
  801. // a tie, round to the nearest even value.
  802. m = m + 0x00000FFF + ((m >> 13) & 1);
  803. if (m & 0x00800000)
  804. {
  805. m = 0; // overflow in significand,
  806. e += 1; // adjust exponent
  807. }
  808.  
  809. // Handle exponent overflow
  810. if ( e > 30 )
  811. {
  812. overflow(); // Cause a hardware floating point overflow;
  813. return s | 0x7C00; // if this returns, the half becomes an
  814. } // infinity with the same sign as f.
  815.  
  816. // Assemble the half from s, e and m.
  817. return s | (e << 10) | (m >> (23 -10));
  818. }
  819. }
  820.  
  821. //---------------------------------------------------------------------------//
  822.  
  823. inline float32_t half::overflow() noexcept
  824. {
  825. volatile float32_t f { 1e10 };
  826.  
  827. for ( auto i = 0; i < 10; ++i )
  828. {
  829. f *= f; // this will overflow before the for­loop terminates
  830. }
  831.  
  832. return f;
  833. }
  834.  
  835. //---------------------------------------------------------------------------//
  836. // Global Operators
  837. //---------------------------------------------------------------------------//
  838.  
  839. inline half operator+(half a, half b) noexcept
  840. {
  841. //std::cout << "[op+()]" << std::endl;
  842. if ( a.is_NaN() || b.is_NaN() )
  843. {
  844. return half::qNaN();
  845. }
  846. else if ( a.is_pos_inf() && b.is_neg_inf() )
  847. {
  848. return half::qNaN(); // ∞ + -∞ : undefined
  849. }
  850. else if ( a.is_neg_inf() && b.is_pos_inf() )
  851. {
  852. return half::qNaN(); // -∞ + ∞ : undefined
  853. }
  854. else
  855. {
  856. return half { float32_t(a) + float32_t(b) };
  857. }
  858. }
  859.  
  860. //---------------------------------------------------------------------------//
  861.  
  862. inline half operator+(float32_t a, half b) noexcept
  863. {
  864. //std::cout << "[op+()]" << std::endl;
  865. if ( isnan(a) || b.is_NaN() )
  866. {
  867. return half::qNaN();
  868. }
  869. else if ( (a == (+1.0 / 0.0)) && b.is_neg_inf() )
  870. {
  871. return half::qNaN(); // ∞ + -∞ : undefined
  872. }
  873. else if ( (b == (-1.0 / 0.0)) && b.is_pos_inf() )
  874. {
  875. return half::qNaN(); // -∞ + ∞ : undefined
  876. }
  877. else
  878. {
  879. return half { a + float32_t(b) };
  880. }
  881. }
  882.  
  883. //---------------------------------------------------------------------------//
  884.  
  885. inline half operator+(half a, float32_t b) noexcept
  886. {
  887. //std::cout << "[op+()]" << std::endl;
  888. if ( a.is_NaN() || isnan(b) )
  889. {
  890. return half::qNaN();
  891. }
  892. else if ( a.is_pos_inf() && (b == (-1.0 / 0.0)) )
  893. {
  894. return half::qNaN(); // ∞ + -∞ : undefined
  895. }
  896. else if ( a.is_neg_inf() && (b == (+1.0 / 0.0)) )
  897. {
  898. return half::qNaN(); // -∞ + ∞ : undefined
  899. }
  900. else
  901. {
  902. return half { float32_t(a) + b };
  903. }
  904. }
  905.  
  906. //---------------------------------------------------------------------------//
  907.  
  908. inline half operator-(half a, half b) noexcept
  909. {
  910. //std::cout << "[op-()]" << std::endl;
  911. if ( a.is_NaN() || b.is_NaN() )
  912. {
  913. return half::qNaN();
  914. }
  915. else if ( a.is_pos_inf() && b.is_pos_inf() )
  916. {
  917. return half::qNaN(); // ∞ - ∞ : undefined
  918. }
  919. else if ( a.is_neg_inf() && b.is_neg_inf() )
  920. {
  921. return half::qNaN(); // -∞ - -∞ : undefined
  922. }
  923. else
  924. {
  925. return half { float32_t(a) - float32_t(b) };
  926. }
  927. }
  928.  
  929. //---------------------------------------------------------------------------//
  930.  
  931. inline half operator-(float32_t a, half b) noexcept
  932. {
  933. //std::cout << "[op-()]" << std::endl;
  934. if ( isnan(a) || b.is_NaN() )
  935. {
  936. return half::qNaN();
  937. }
  938. else if ( (a == (+1.0 / 0.0)) && b.is_pos_inf() )
  939. {
  940. return half::qNaN(); // ∞ - ∞ : undefined
  941. }
  942. else if ( (a == (-1.0 / 0.0)) && b.is_neg_inf() )
  943. {
  944. return half::qNaN(); // -∞ - -∞ : undefined
  945. }
  946. else
  947. {
  948. return half { a - float32_t(b) };
  949. }
  950. }
  951.  
  952. //---------------------------------------------------------------------------//
  953.  
  954. inline half operator-(half a, float32_t b) noexcept
  955. {
  956. //std::cout << "[op-()]" << std::endl;
  957. if ( a.is_NaN() || isnan(b) )
  958. {
  959. return half::qNaN();
  960. }
  961. else if ( a.is_pos_inf() && (b == (+1.0 / 0.0)) )
  962. {
  963. return half::qNaN(); // ∞ - ∞ : undefined
  964. }
  965. else if ( a.is_neg_inf() && (b == (-1.0 / 0.0)) )
  966. {
  967. return half::qNaN(); // -∞ - -∞ : undefined
  968. }
  969. else
  970. {
  971. return half { float32_t(a) - b };
  972. }
  973. }
  974.  
  975. //---------------------------------------------------------------------------//
  976.  
  977. inline half operator*(half a, half b) noexcept
  978. {
  979. //std::cout << "[op*()]" << std::endl;
  980. if ( a.is_NaN() || b.is_NaN() )
  981. {
  982. return half::qNaN();
  983. }
  984. else if ( a.is_infinity() && b.is_zero() )
  985. {
  986. return half::qNaN(); // ±∞ × ±0 : undefined
  987. }
  988. else if ( a.is_zero() && b.is_infinity() )
  989. {
  990. return half::qNaN(); // ±0 × ±∞ : undefined
  991. }
  992. else
  993. {
  994. return half { float32_t(a) * float32_t(b) };
  995. }
  996. }
  997.  
  998. //---------------------------------------------------------------------------//
  999.  
  1000. inline half operator*(float32_t a, half b) noexcept
  1001. {
  1002. //std::cout << "[op*()]" << std::endl;
  1003. if ( isnan(a) || b.is_NaN() )
  1004. {
  1005. return half::qNaN();
  1006. }
  1007. else if ( isinf(a) && b.is_zero() )
  1008. {
  1009. return half::qNaN(); // ±∞ × ±0 : undefined
  1010. }
  1011. else if ( a == 0.0 && b.is_infinity() )
  1012. {
  1013. return half::qNaN(); // ±0 × ±∞ : undefined
  1014. }
  1015. else
  1016. {
  1017. return half { a * float32_t(b) };
  1018. }
  1019. }
  1020.  
  1021. //---------------------------------------------------------------------------//
  1022.  
  1023. inline half operator*(half a, float32_t b) noexcept
  1024. {
  1025. //std::cout << "[op*()]" << std::endl;
  1026. if ( a.is_NaN() || isnan(b) )
  1027. {
  1028. return half::qNaN();
  1029. }
  1030. else if ( a.is_infinity() && b == 0.0 )
  1031. {
  1032. return half::qNaN(); // ±∞ × ±0 : undefined
  1033. }
  1034. else if ( a.is_zero() && isinf(b) )
  1035. {
  1036. return half::qNaN(); // ±0 × ±∞ : undefined
  1037. }
  1038. else
  1039. {
  1040. return half { float32_t(a) * b };
  1041. }
  1042. }
  1043.  
  1044. //---------------------------------------------------------------------------//
  1045.  
  1046. inline half operator/(half a, half b) noexcept
  1047. {
  1048. //std::cout << "[op/()]" << std::endl;
  1049. if ( a.is_NaN() || b.is_NaN() )
  1050. {
  1051. return half::qNaN();
  1052. }
  1053. else if ( a.is_zero() && b.is_zero() )
  1054. {
  1055. return half::qNaN(); // ±0 ÷ ±0 : undefined
  1056. }
  1057. else if ( a.is_infinity() && b.is_infinity() )
  1058. {
  1059. return half::qNaN(); // ±∞ ÷ ±∞ : undefined
  1060. }
  1061. else
  1062. {
  1063. return half { float32_t(a) / float32_t(b) };
  1064. }
  1065. }
  1066.  
  1067. //---------------------------------------------------------------------------//
  1068.  
  1069. inline half operator/(float32_t a, half b) noexcept
  1070. {
  1071. //std::cout << "[op/()]" << std::endl;
  1072. if ( isnan(a) || b.is_NaN() )
  1073. {
  1074. return half::qNaN();
  1075. }
  1076. else if ( (a == 0.0) && b.is_zero() )
  1077. {
  1078. return half::qNaN(); // ±0 ÷ ±0 : undefined
  1079. }
  1080. else if ( isinf(a) && b.is_infinity() )
  1081. {
  1082. return half::qNaN(); // ±∞ ÷ ±∞ : undefined
  1083. }
  1084. else
  1085. {
  1086. return half { a / float32_t(b) };
  1087. }
  1088. }
  1089.  
  1090. //---------------------------------------------------------------------------//
  1091.  
  1092. inline half operator/(half a, float32_t b) noexcept
  1093. {
  1094. //std::cout << "[op/()]" << std::endl;
  1095. if ( a.is_NaN() || isnan(b) )
  1096. {
  1097. return half::qNaN();
  1098. }
  1099. else if ( a.is_zero() && (b == 0.0) )
  1100. {
  1101. return half::qNaN(); // ±0 ÷ ±0 : undefined
  1102. }
  1103. else if ( a.is_infinity() && isinf(b) )
  1104. {
  1105. return half::qNaN(); // ±∞ ÷ ±∞ : undefined
  1106. }
  1107. else
  1108. {
  1109. return half { float32_t(a) / b };
  1110. }
  1111. }
  1112.  
  1113. //---------------------------------------------------------------------------//
  1114.  
  1115. inline bool operator==(half a, half b) noexcept
  1116. {
  1117. //std::cout << "[op==()]" << std::endl;
  1118. if ( a.is_NaN() || b.is_NaN() )
  1119. {
  1120. return false;
  1121. }
  1122. else
  1123. {
  1124. return a.bits() == b.bits();
  1125. }
  1126. }
  1127.  
  1128. //---------------------------------------------------------------------------//
  1129.  
  1130. inline bool operator==(float32_t a, half b) noexcept
  1131. {
  1132. //std::cout << "[op==()]" << std::endl;
  1133. if ( isnan(a) || b.is_NaN() )
  1134. {
  1135. return false;
  1136. }
  1137. else
  1138. {
  1139. return a == float32_t(b);
  1140. }
  1141. }
  1142.  
  1143. //---------------------------------------------------------------------------//
  1144.  
  1145. inline bool operator==(half a, float32_t b) noexcept
  1146. {
  1147. //std::cout << "[op==()]" << std::endl;
  1148. if ( a.is_NaN() || isnan(b) )
  1149. {
  1150. return false;
  1151. }
  1152. else
  1153. {
  1154. return float32_t(a) == b;
  1155. }
  1156. }
  1157.  
  1158. //---------------------------------------------------------------------------//
  1159.  
  1160. inline bool operator!=(half a, half b) noexcept
  1161. {
  1162. //std::cout << "[op!=()]" << std::endl;
  1163. return !operator==(a, b);
  1164. }
  1165.  
  1166. //---------------------------------------------------------------------------//
  1167.  
  1168. inline bool operator!=(float32_t a, half b) noexcept
  1169. {
  1170. //std::cout << "[op!=()]" << std::endl;
  1171. return !operator==(a, b);
  1172. }
  1173.  
  1174. //---------------------------------------------------------------------------//
  1175.  
  1176. inline bool operator!=(half a, float32_t b) noexcept
  1177. {
  1178. //std::cout << "[op!=()]" << std::endl;
  1179. return !operator==(a, b);
  1180. }
  1181.  
  1182. //---------------------------------------------------------------------------//
  1183.  
  1184. inline bool operator<(half a, half b) noexcept
  1185. {
  1186. //std::cout << "[op<()]" << std::endl;
  1187. if ( a.is_NaN() || b.is_NaN() )
  1188. {
  1189. return false;
  1190. }
  1191. else
  1192. {
  1193. return float32_t(a) < float32_t(b);
  1194. }
  1195. }
  1196.  
  1197. //---------------------------------------------------------------------------//
  1198.  
  1199. inline bool operator<(float32_t a, half b) noexcept
  1200. {
  1201. //std::cout << "[op<()]" << std::endl;
  1202. if ( isnan(a) || b.is_NaN() )
  1203. {
  1204. return false;
  1205. }
  1206. else
  1207. {
  1208. return a < float32_t(b);
  1209. }
  1210. }
  1211.  
  1212. //---------------------------------------------------------------------------//
  1213.  
  1214. inline bool operator<(half a, float32_t b) noexcept
  1215. {
  1216. //std::cout << "[op<()]" << std::endl;
  1217. if ( a.is_NaN() || isnan(b) )
  1218. {
  1219. return false;
  1220. }
  1221. else
  1222. {
  1223. return float32_t(a) < b;
  1224. }
  1225. }
  1226.  
  1227. //---------------------------------------------------------------------------//
  1228.  
  1229. inline bool operator>=(half a, half b) noexcept
  1230. {
  1231. //std::cout << "[op>=()]" << std::endl;
  1232. return !operator<(a, b);
  1233. }
  1234.  
  1235. //---------------------------------------------------------------------------//
  1236.  
  1237. inline bool operator>=(float32_t a, half b) noexcept
  1238. {
  1239. //std::cout << "[op>=()]" << std::endl;
  1240. return !operator<(a, b);
  1241. }
  1242.  
  1243. //---------------------------------------------------------------------------//
  1244.  
  1245. inline bool operator>=(half a, float32_t b) noexcept
  1246. {
  1247. //std::cout << "[op>=()]" << std::endl;
  1248. return !operator<(a, b);
  1249. }
  1250.  
  1251. //---------------------------------------------------------------------------//
  1252.  
  1253. inline bool operator>(half a, half b) noexcept
  1254. {
  1255. //std::cout << "[op>()]" << std::endl;
  1256. if ( a.is_NaN() || b.is_NaN() )
  1257. {
  1258. return false;
  1259. }
  1260. else
  1261. {
  1262. return float32_t(a) > float32_t(b);
  1263. }
  1264. }
  1265.  
  1266. //---------------------------------------------------------------------------//
  1267.  
  1268. inline bool operator>(float32_t a, half b) noexcept
  1269. {
  1270. //std::cout << "[op>()]" << std::endl;
  1271. if ( isnan(a) || b.is_NaN() )
  1272. {
  1273. return false;
  1274. }
  1275. else
  1276. {
  1277. return a > float32_t(b);
  1278. }
  1279. }
  1280.  
  1281. //---------------------------------------------------------------------------//
  1282.  
  1283. inline bool operator>(half a, float32_t b) noexcept
  1284. {
  1285. //std::cout << "[op>()]" << std::endl;
  1286. if ( a.is_NaN() || isnan(b) )
  1287. {
  1288. return false;
  1289. }
  1290. else
  1291. {
  1292. return float32_t(a) > b;
  1293. }
  1294. }
  1295.  
  1296. //---------------------------------------------------------------------------//
  1297.  
  1298. inline bool operator<=(half a, half b) noexcept
  1299. {
  1300. //std::cout << "[op<=()]" << std::endl;
  1301. return !operator>(a, b);
  1302. }
  1303.  
  1304. //---------------------------------------------------------------------------//
  1305.  
  1306. inline bool operator<=(float32_t a, half b) noexcept
  1307. {
  1308. //std::cout << "[op<=()]" << std::endl;
  1309. return !operator>(a, b);
  1310. }
  1311.  
  1312. //---------------------------------------------------------------------------//
  1313.  
  1314. inline bool operator<=(half a, float32_t b) noexcept
  1315. {
  1316. //std::cout << "[op>=()]" << std::endl;
  1317. return !operator>(a, b);
  1318. }
  1319.  
  1320. //---------------------------------------------------------------------------//
  1321. // Stream I/O
  1322. //---------------------------------------------------------------------------//
  1323.  
  1324. inline std::ostream& operator<<(std::ostream& stream, half lhs)
  1325. {
  1326. switch ( lhs.bits() )
  1327. {
  1328. case HALF_POS_INF_BIT:
  1329. {
  1330. stream << u8"∞";
  1331. break;
  1332. }
  1333. case HALF_NEG_INF_BIT:
  1334. {
  1335. stream << u8"-∞";
  1336. break;
  1337. }
  1338. case HALF_Q_NAN_BIT:
  1339. {
  1340. stream << u8"qNaN";
  1341. break;
  1342. }
  1343. case HALF_S_NAN_BIT:
  1344. {
  1345. stream << u8"sNaN";
  1346. break;
  1347. }
  1348. default:
  1349. {
  1350. stream << static_cast<float32_t>(lhs);
  1351. break;
  1352. }
  1353. }
  1354. return stream;
  1355. }
  1356.  
  1357. //---------------------------------------------------------------------------//
  1358.  
  1359. inline std::istream& operator>>(std::istream& stream, half& lhs)
  1360. {
  1361. float32_t value;
  1362. stream >> value;
  1363. lhs.operator=(value);
  1364. return stream;
  1365. }
  1366.  
  1367. //---------------------------------------------------------------------------//
  1368.  
  1369. } // namespace IEEE754
  1370.  
  1371. //---------------------------------------------------------------------------//
  1372.  
  1373. #endif
  1374.  
  1375. // half.hpp
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement