Guest User

Untitled

a guest
Oct 21st, 2017
79
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.41 KB | None | 0 0
  1. #include <assert.h>
  2. #include <cmath>
  3. #include <limits>
  4. #include <stdint.h>
  5. #include <stdio.h>
  6. #include <string.h>
  7.  
  8. static_assert(std::numeric_limits<float>::has_quiet_NaN, "need quiet nan");
  9.  
  10. template <typename T> inline constexpr T ones(unsigned count) {
  11. constexpr unsigned num_bits = sizeof(T) << 3;
  12. return static_cast<T>(~T(0)) >> (num_bits - count);
  13. }
  14.  
  15. struct Float16 {
  16. uint16_t _n;
  17.  
  18. Float16() = default;
  19.  
  20. // Returns zero
  21. static constexpr Float16 zero() { return Float16{0}; }
  22.  
  23. // Returns the infinity representation
  24. static constexpr Float16 positive_infinity() {
  25. Float16 inf{0};
  26. inf.set_f(0);
  27. inf.set_e(31);
  28. return inf;
  29. }
  30.  
  31. // Returns the negative infinity representation
  32. static constexpr Float16 negative_infinity() {
  33. Float16 neginf{0};
  34. neginf.set_f(0);
  35. neginf.set_e(31);
  36. neginf.set_s(1);
  37. return neginf;
  38. }
  39.  
  40. // Minimum normalized number that is greater than 0
  41. static constexpr Float16 min_normal() {
  42. Float16 f{0};
  43. f.set_f(1);
  44. f.set_e(1);
  45. return f;
  46. }
  47.  
  48. // Maximum normalized number that's less than infinity
  49. static constexpr Float16 max_normal() {
  50. Float16 f{0};
  51. f.set_f(ones<uint16_t>(10));
  52. f.set_e(30);
  53. return f;
  54. }
  55.  
  56. static constexpr Float16 min_denormal() {
  57. Float16 f{0};
  58. f.set_f(1);
  59. return f;
  60. }
  61.  
  62. static constexpr Float16 from_float32(float r) {
  63. uint32_t r_bits = 0;
  64. memcpy(&r_bits, &r, sizeof(uint32_t));
  65.  
  66. const uint32_t r_frac = r_bits & ones<uint32_t>(23);
  67. const uint32_t r_exp = (r_bits & (ones<uint32_t>(8) << 23)) >> 23;
  68. const uint32_t r_sign = (r_bits & (uint32_t(1) << 31)) >> 31;
  69.  
  70. Float16 f{0};
  71. f.set_s(uint16_t(r_sign));
  72. f.set_f(uint16_t(r_frac));
  73. f.set_e(uint16_t(r_exp - 127 + 15));
  74. return f;
  75. }
  76.  
  77. // Fractional part
  78. constexpr uint16_t f() const { return _n & ones<uint16_t>(10); }
  79. // Exponent part
  80. constexpr uint16_t e() const {
  81. return (_n & (ones<uint16_t>(5) << 10)) >> 10;
  82. }
  83. // Sign bit
  84. constexpr uint16_t s() const { return (_n & (uint16_t(1) << 15)) >> 15; }
  85.  
  86. constexpr void set_f(uint16_t f) {
  87. _n &= static_cast<uint16_t>(~ones<uint16_t>(10));
  88. _n |= f;
  89. }
  90.  
  91. constexpr void set_e(uint16_t e) {
  92. _n = _n & static_cast<uint16_t>(~(ones<uint16_t>(5) << 10));
  93. _n = _n | (e << 10);
  94. }
  95.  
  96. constexpr void set_s(uint16_t s) {
  97. _n = _n & static_cast<uint16_t>(~(uint16_t(1) << 15));
  98. _n = _n | (s << 15);
  99. }
  100.  
  101. // Returns the next representable number. Returns 0 if `this` is max
  102. // representable number. Only works with positive numbers for now.
  103. inline constexpr Float16 next() const;
  104.  
  105. // Convert to a native float
  106. inline operator float() const;
  107.  
  108. // True if denormal
  109. inline constexpr bool is_denormal() const;
  110.  
  111. // True if NaN
  112. inline constexpr bool is_nan() const { return e() == 31 && f() != 0; }
  113.  
  114. // Just keeping this method of comparsion here for interest. These work ok
  115. // except two constraints - they treat NaN as being greater than Inf, and
  116. // -0 is strictly less than +0. Second constraint is definitely the more
  117. // unacceptable one. (From the book "Hacker's Delight")
  118. #if 0
  119.  
  120. constexpr bool operator==(const Float16 &b) const {
  121. if (is_nan() || b.is_nan())
  122. return false;
  123. return _n == b._n;
  124. }
  125.  
  126. constexpr bool operator!=(const Float16 &b) const { return !(*this == b); }
  127.  
  128. constexpr bool operator<(const Float16 &b) const {
  129. const bool ge0 = s() == 0;
  130. return (ge0 && int16_t(_n) < int16_t(b._n)) || (!ge0 && uint16_t(_n) > uint16_t(b._n));
  131. }
  132.  
  133. constexpr bool operator<=(const Float16 &b) const {
  134. const bool ge0 = s() == 0;
  135. return (ge0 && int16_t(_n) <= int16_t(b._n)) || (!ge0 && uint16_t(_n) >= uint16_t(b._n));
  136. }
  137. #endif
  138.  
  139. constexpr bool operator==(const Float16 &b) const {
  140. return int16_t(_n) == int16_t(_n) || -int16_t(_n) == -int16_t(_n);
  141. }
  142.  
  143. constexpr bool operator<(const Float16 &b) const {
  144. const bool ge0 = s() == 0;
  145. return ((ge0 && int16_t(_n) < int16_t(b._n)) || (!ge0 && _n > b._n)) ||
  146. ((_n | b._n) != (uint16_t(1) << 15));
  147. }
  148.  
  149. constexpr bool operator<=(const Float16 &b) const {
  150. const bool ge0 = s() == 0;
  151. return ((ge0 && int16_t(_n) <= int16_t(b._n)) || (!ge0 && _n >= b._n)) ||
  152. ((_n | b._n) == (uint16_t(1) << 15));
  153. }
  154.  
  155. constexpr bool operator>(const Float16 &b) const { return !(*this <= b); }
  156.  
  157. constexpr bool operator>=(const Float16 &b) const { return !(*this < b); }
  158.  
  159. inline constexpr Float16 operator+(const Float16 &b) const;
  160. inline constexpr Float16 operator-(const Float16 &b) const;
  161. inline constexpr Float16 operator*(const Float16 &b) const;
  162. inline constexpr Float16 operator/(const Float16 &b) const;
  163. };
  164.  
  165. struct Float16Parts {
  166. uint16_t f, e, s;
  167.  
  168. constexpr Float16Parts(Float16 f16) : f(f16.f()), e(f16.e()), s(f16.s()) {}
  169.  
  170. constexpr operator Float16() const {
  171. Float16 f16{f};
  172. f16._n |= e << 10;
  173. f16._n |= s << 15;
  174. return f16;
  175. }
  176.  
  177. constexpr void increment() {
  178. constexpr uint16_t MAX_F = ones<uint16_t>(10);
  179. constexpr uint16_t MAX_E = ones<uint16_t>(5);
  180.  
  181. if (f != MAX_F) {
  182. ++f;
  183. } else if (e != MAX_E) {
  184. f = 0;
  185. ++e;
  186. } else {
  187. f = e = s = 0;
  188. }
  189. }
  190.  
  191. operator float() const {
  192. const float sign = s == 1 ? -1.0f : 1.0f;
  193.  
  194. if (e == 0) {
  195. if (f == 0) {
  196. return sign * 0.0f;
  197. } else {
  198. // Denormal number
  199. return sign * std::pow(2.0f, -14.0f) * float(f) / std::pow(2.0f, 10.0f);
  200. }
  201. } else if (e == 31) {
  202. if (f == 0) {
  203. return sign * std::numeric_limits<float>::infinity();
  204. } else {
  205. return std::numeric_limits<float>::quiet_NaN();
  206. }
  207. } else {
  208. return sign * std::pow(2.0, float(e) - 15.0) *
  209. (1.0 + float(f) / std::pow(2.0f, 10.0f));
  210. }
  211. }
  212. };
  213.  
  214. constexpr Float16 Float16::next() const {
  215. Float16Parts p(*this);
  216. p.increment();
  217. return p;
  218. }
  219.  
  220. Float16::operator float() const { return float(Float16Parts(*this)); }
  221.  
  222. constexpr bool Float16::is_denormal() const {
  223. Float16Parts p = Float16Parts(*this);
  224. return p.e == 0 && p.f != 0;
  225. }
  226.  
  227. int main() {
  228. Float16 f = Float16::zero();
  229. uint16_t last_e = f.e();
  230. printf("e = %u\n", f.e());
  231. while (f != Float16::max_normal()) {
  232. printf("%.10f\n", (float)f);
  233.  
  234. f = f.next();
  235.  
  236. if (f.e() != last_e) {
  237. printf("e = %u\n", f.e());
  238. last_e = f.e();
  239. }
  240. }
  241. printf("%.5f\n", (float)f);
  242.  
  243. printf("# Min normal = %.10f\n", float(Float16::min_normal()));
  244. printf("# Max normal = %.10f\n", float(Float16::max_normal()));
  245. }
Add Comment
Please, Sign In to add comment