Advertisement
zaqimon

math-sll.h

Nov 12th, 2015
245
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 28.78 KB | None | 0 0
  1. /*
  2.  
  3. mod sllatan() and _sllexp() for correct result.
  4.  
  5. */
  6.  
  7.  
  8. /*
  9.  * $Id: math-sll.c,v 1.15 2002/08/20 18:01:54 andrewm Exp $
  10.  *
  11.  * Change: CHUI
  12.  *
  13.  * Purpose
  14.  *  A fixed point (31.32 bit) math library.
  15.  *
  16.  * Description
  17.  *  Floating point packs the most accuracy in the available bits, but it
  18.  *  often provides more accuracy than is required.  It is time consuming to
  19.  *  carry the extra precision around, particularly on platforms that don't
  20.  *  have a dedicated floating point processor.
  21.  *
  22.  *  This library is a compromise.  All math is done using the 64 bit signed
  23.  *  "long long" format (sll), and is not intended to be portable, just as
  24.  *  fast as possible.  Since "long long" is a elementary type, it can be
  25.  *  passed around without resorting to the use of pointers.  Since the
  26.  *  format used is fixed point, there is never a need to do time consuming
  27.  *  checks and adjustments to maintain normalized numbers, as is the case
  28.  *  in floating point.
  29.  *
  30.  *  Simply put, this library is limited to handling numbers with a whole
  31.  *  part of up to 2^31 - 1 = 2.147483647e9 in magnitude, and fractional
  32.  *  parts down to 2^-32 = 2.3283064365e-10 in magnitude.  This yields a
  33.  *  decent range and accuracy for many applications.
  34.  *
  35.  * IMPORTANT
  36.  *  No checking for arguments out of range (error).
  37.  *  No checking for divide by zero (error).
  38.  *  No checking for overflow (error).
  39.  *  No checking for underflow (warning).
  40.  *  Chops, doesn't round.
  41.  *
  42.  * Functions
  43.  *  sll dbl2sll(double x)           double -> sll
  44.  *  double slldbl(sll x)            sll -> double
  45.  *
  46.  *  sll slladd(sll x, sll y)        x + y
  47.  *  sll sllsub(sll x, sll y)        x - y
  48.  *  sll sllmul(sll x, sll y)        x * y
  49.  *  sll slldiv(sll x, sll y)        x / y
  50.  *
  51.  *  sll sllinv(sll v)           1 / x
  52.  *  sll sllmul2(sll x)          x * 2
  53.  *  sll sllmul4(sll x)          x * 4
  54.  *  sll sllmul2n(sll x, int n)      x * 2^n, 0 <= n <= 31
  55.  *  sll slldiv2(sll x)          x / 2
  56.  *  sll slldiv4(sll x)          x / 4
  57.  *  sll slldiv2n(sll x, int n)      x / 2^n, 0 <= n <= 31
  58.  *
  59.  *  sll sllcos(sll x)           cos x
  60.  *  sll sllsin(sll x)           sin x
  61.  *  sll slltan(sll x)           tan x
  62.  *  sll sllatan(sll x)          atan x
  63.  *
  64.  *  sll sllexp(sll x)           e^x
  65.  *  sll slllog(sll x)           ln x
  66.  *
  67.  *  sll sllpow(sll x, sll y)        x^y
  68.  *  sll sllsqrt(sll x)          x^(1 / 2)
  69.  *
  70.  * History
  71.  *  * Aug 20 2002 Nicolas Pitre <nico@cam.org> v1.15
  72.  *  - Replaced all shifting assembly with C equivalents
  73.  *  - Reformated ARM asm and changed comments to begin with @
  74.  *  - Updated C version of sllmul()
  75.  *  - Removed the unsupported architecture #error - should be portable now
  76.  *
  77.  *  * Aug 17 2002 Andrew E. Mileski <andrewm@isoar.ca> v1.14
  78.  *  - Fixed sign handling of ARM sllmul()
  79.  *  - Ported sllmul() to x86 - it can be inlined now
  80.  *  - Updated the sllmul() comments to reflect my changes
  81.  *  - Updated the header comments
  82.  *
  83.  *  * Aug 17 2002 Nicolas Pitre <nico@cam.org> v1.13
  84.  *  - Corrected and expanded upon Andrew's sllmul() comments
  85.  *  - Added in an non-optimal but portable C version of sllmul()
  86.  *
  87.  *  * Aug 16 2002 Andrew E. Mileski <andrewm@isoar.ca> v1.12
  88.  *  - Added in corrected optimized sllmul() for ARM by Nicolas Pitre
  89.  *  - Changed comments on multiplication to describe Nicolas's method
  90.  *
  91.  *  * Jun 17 2002 Andrew E. Mileski <andrewm@isoar.ca> v1.11
  92.  *  - Reverted optimized sllmul() for ARM because of bug
  93.  *
  94.  *  * Jun 17 2002 Andrew E. Mileski <andrewm@isoar.ca> v1.10
  95.  *  - Added in optimized sllmul() for ARM by Nicolas Pitre
  96.  *  - Changed comments on multiplication to describe Nicolas's method
  97.  *  - Optimized multiplications and divisions by powers of 2
  98.  *
  99.  *  * Feb  5 2002 Andrew E. Mileski <andrewm@isoar.ca> v1.9
  100.  *  - Optimized multiplcations and divisions by powers of 2
  101.  *
  102.  *  * Feb  5 2002 Andrew E. Mileski <andrewm@isoar.ca> v1.8
  103.  *  - Consolidated constants
  104.  *  - Added macro for _slladd() _sllsub()
  105.  *  - Removed __inline__ from slladd() sllsub()
  106.  *  - Renamed umul() to ullmul() and made global
  107.  *  - Added function prototypes
  108.  *  - Corrected header comment about fractional range
  109.  *  - Added warning for non-Linux operating systems
  110.  *
  111.  *  * Feb  5 2002 Andrew E. Mileski <andrewm@isoar.ca> v1.7
  112.  *  - Corrected some i386 assembly comments
  113.  *  - Renamed calc_*() to _sll*()
  114.  *  - Moved _sllexp() closer to sllexp()
  115.  *
  116.  *  * Feb  5 2002 Andrew E. Mileski <andrewm@isoar.ca> v1.6
  117.  *  - Added sllmul2() sllmul4() sllmul2n() for i386
  118.  *  - Added slldiv2() slldiv4() slldiv2n() for i386
  119.  *  - Removed input constraints on sllmul2() sllmul4() sllmul2n() for ARM
  120.  *  - Removed input constraints on slldiv2() slldiv4() slldiv2n() for ARM
  121.  *  - Modified ARM assembly for WYSIWYG output
  122.  *  - Changed asm to __asm__
  123.  *
  124.  *  * Feb  5 2002 Andrew E. Mileski <andrewm@isoar.ca> v1.5
  125.  *  - Fixed umul() for i386
  126.  *  - Fixed dbl2sll() and sll2dbl() - I forgot ARM doubles are big-endian
  127.  *  - Very lightly tested on ARM and i386 and it seems okay
  128.  *
  129.  *  * Feb  4 2002 Andrew E. Mileski <andrewm@isoar.ca> v1.4
  130.  *  - Added umul() for i386
  131.  *
  132.  *  * Jan 20 2002 Andrew E. Mileski <andrewm@isoar.ca> v1.3
  133.  *  - Added fast multiplication functions sllmul2(), sllmul4(), sllmul2n()
  134.  *  - Added fast division functions slldiv2() slldiv(), slldiv4n()
  135.  *  - Added square root function sllsqrt()
  136.  *  - Added library roll-call
  137.  *  - Reformatted the history to RPM format (ick)
  138.  *  - Moved sllexp() closer to related functions
  139.  *  - Added algorithm description to sllpow()
  140.  *
  141.  *  * Jan 19 2002 Andrew E. Mileski <andrewm@isoar.ca> v1.1
  142.  *  - Corrected constants, thanks to Mark A. Lisher for noticing
  143.  *  - Put source under CVS control
  144.  *
  145.  *  * Jan 18 2002 Andrew E. Mileski <andrewm@isoar.ca>
  146.  *  - Added some more explanation to calc_cos() and calc_sin()
  147.  *  - Added sllatan() and documented it fairly verbosely
  148.  *
  149.  *  * July 13 2000 Andrew E. Mileski <andrewm@isoar.ca>
  150.  *  - Corrected documentation for multiplication algorithm
  151.  *
  152.  *  * May 21 2000 Andrew E. Mileski <andrewm@isoar.ca>
  153.  *  - Rewrote slltanx() to avoid scaling argument for both sine and cosine
  154.  *
  155.  *  * May 19 2000  Andrew E. Mileski <andrewm@isoar.ca>
  156.  *  - Unrolled loops
  157.  *  - Added sllinv(), and sllneg()
  158.  *  - Changed constants to type "LL" (was "UL" - oops)
  159.  *  - Changed all routines to use inverse constants instead of division
  160.  *
  161.  *  * May 15, 2000 - Andrew E. Mileski <andrewm@isoar.ca>
  162.  *  - Fixed slltan() - used sin/cos instead of sllsin/sllcos.  Doh!
  163.  *  - Added slllog(x) and sllpow(x,y)
  164.  *
  165.  *  * May 11, 2000 - Andrew E. Mileski <andrewm@isoar.ca>
  166.  *  - Added simple tan(x) that could stand some optimization
  167.  *  - Added more constants (see <math.h>)
  168.  *
  169.  *  * May 3, 2000 - Andrew E. Mileski <andrewm@isoar.ca>
  170.  *  - Added sllsin(), sllcos(), and trig constants
  171.  *
  172.  *  * May 2, 2000 - Andrew E. Mileski <andrewm@isoar.ca>
  173.  *  - All routines and macros now have sll their identifiers
  174.  *  - Changed mul() to umul() and added sllmul() to handle signed numbers
  175.  *  - Added and tested sllexp(), sllint(), and sllfrac()
  176.  *  - Added some constants
  177.  *
  178.  *  * Apr 26, 2000 - Andrew E. Mileski <andrewm@isoar.ca>
  179.  *  - Added mul(), and began testing it (unsigned only)
  180.  *
  181.  *  * Apr 25, 2000 - Andrew E. Mileski <andrewm@isoar.ca>
  182.  *  - Added sll2dbl() [convert a signed long long to a double]
  183.  *  - Began testing.  Well gee whiz it works! :)
  184.  *
  185.  *  * Apr 24, 2000 - Andrew E. Mileski <andrewm@isoar.ca>
  186.  *  - Added dbl2sll() [convert a double to signed long long]
  187.  *  - Began documenting
  188.  *
  189.  *  * Apr ??, 2000 - Andrew E. Mileski <andrewm@isoar.ca>
  190.  *  - Conceived, written, and fiddled with
  191.  *
  192.  *
  193.  *      Copyright (C) 2000 Andrew E. Mileski
  194.  *
  195.  *  This library is free software; you can redistribute it and/or
  196.  *  modify it under the terms of the GNU Lesser General Public
  197.  *  License version 2 as published by the Free Software Foundation.
  198.  *
  199.  *  This library is distributed in the hope that it will be useful,
  200.  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  201.  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  202.  *  Lesser General Public License for more details.
  203.  *
  204.  *  You should have received a copy of the GNU Lesser General Public
  205.  *  License along with this library; if not, write to the Free Software
  206.  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307  USA
  207.  */
  208.  
  209. #ifndef MATHSLL_H
  210. #define MATHSLL_H
  211.  
  212. #ifdef __cplusplus
  213. extern "C" {
  214. #endif
  215.  
  216.  
  217. #ifndef USE_FIXED_POINT
  218.  
  219. #include<math.h>
  220.    
  221. typedef double sll;
  222. typedef double ull;
  223.  
  224. #define sllvalue(X)     ((double)X)
  225. #define int2sll(X)  ((sll)(X))
  226. #define sll2int(X)  ((int)(X))
  227. #define sll_abs(X)  (fabs(X))
  228. #define sllneg(X)       (-(X))
  229.  
  230.  
  231. static __inline__ double sll2dbl(sll x)
  232. {
  233.     return (double)x;
  234. }
  235.  
  236. static __inline__ sll slladd(sll x, sll y)
  237. {
  238.     return x + y;
  239. }
  240.  
  241. static __inline__ sll sllsub(sll x, sll y)
  242. {
  243.     return x - y;
  244. }
  245.  
  246. static __inline__ sll sllmul(sll x, sll y)
  247. {
  248.     return x * y;
  249. }
  250.  
  251. static __inline__ sll slldiv(sll x, sll y)
  252. {
  253.     return x / y;
  254. }
  255.  
  256. static __inline__ sll sllinv(sll x)
  257. {
  258.     return 1 / x;
  259. }
  260.  
  261.  
  262. static __inline__ float sll2float(sll s)
  263. {
  264.     return (float)s;
  265. }
  266.  
  267. static __inline__ sll float2sll(float f)
  268. {
  269.     return (sll)f;
  270. }
  271.  
  272. static __inline__ sll sllsin(sll x)
  273. {
  274.     return sin(x);
  275. }
  276.  
  277. static __inline__ sll sllcos(sll x)
  278. {
  279.     return cos(x);
  280. }
  281.  
  282. static __inline__ sll slltan(sll x)
  283. {
  284.     return tan(x);
  285. }
  286.  
  287. static __inline__ sll sllsqrt(sll x)
  288. {
  289.     return sqrt(x);
  290. }
  291.  
  292. static __inline__ sll dbl2sll(double dbl)
  293. {
  294.     return (float)dbl;
  295. }
  296.  
  297. static __inline__ sll sllpow(sll x, sll y)
  298. {
  299.     return pow(x,y);
  300. }
  301.  
  302. #else
  303.  
  304. /* Data types */
  305. typedef signed long long sll;
  306. typedef unsigned long long ull;
  307.  
  308. /* Macros */
  309. #define int2sll(X)  (((sll) (X)) << 32)
  310. #define sllvalue(X)     (X)
  311. #define sll2int(X)  ((int) ((X) >> 32))
  312. #define sll_abs(X)  ((X) & 0xefffffffffffffffLL)
  313. #define sllint(X)   ((X) & 0xffffffff00000000LL)
  314. #define sllfrac(X)  ((X) & 0x00000000ffffffffLL)
  315. #define sllneg(X)   (-(X))
  316. #define _slladd(X,Y)    ((X) + (Y))
  317. #define _sllsub(X,Y)    ((X) - (Y))
  318.  
  319. /* Constants (converted from double) */
  320. #define CONST_0     0x0000000000000000LL
  321. #define CONST_1     0x0000000100000000LL
  322. #define CONST_2     0x0000000200000000LL
  323. #define CONST_3     0x0000000300000000LL
  324. #define CONST_4     0x0000000400000000LL
  325. #define CONST_10    0x0000000a00000000LL
  326. #define CONST_1_2   0x0000000080000000LL
  327. #define CONST_1_3   0x0000000055555555LL
  328. #define CONST_1_4   0x0000000040000000LL
  329. #define CONST_1_5   0x0000000033333333LL
  330. #define CONST_1_6   0x000000002aaaaaaaLL
  331. #define CONST_1_7   0x0000000024924924LL
  332. #define CONST_1_8   0x0000000020000000LL
  333. #define CONST_1_9   0x000000001c71c71cLL
  334. #define CONST_1_10  0x0000000019999999LL
  335. #define CONST_1_11  0x000000001745d174LL
  336. #define CONST_1_12  0x0000000015555555LL
  337. #define CONST_1_20  0x000000000cccccccLL
  338. #define CONST_1_30  0x0000000008888888LL
  339. #define CONST_1_42  0x0000000006186186LL
  340. #define CONST_1_56  0x0000000004924924LL
  341. #define CONST_1_72  0x00000000038e38e3LL
  342. #define CONST_1_90  0x0000000002d82d82LL
  343. #define CONST_1_110 0x000000000253c825LL
  344. #define CONST_1_132 0x0000000001f07c1fLL
  345. #define CONST_1_156 0x0000000001a41a41LL
  346. #define CONST_E     0x00000002b7e15162LL
  347. #define CONST_1_E   0x000000005e2d58d8LL
  348. #define CONST_SQRTE 0x00000001a61298e1LL
  349. #define CONST_1_SQRTE   0x000000009b4597e3LL
  350. #define CONST_LOG2_E    0x0000000171547652LL
  351. #define CONST_LOG10_E   0x000000006f2dec54LL
  352. #define CONST_LN2   0x00000000b17217f7LL
  353. #define CONST_LN10  0x000000024d763776LL
  354. #define CONST_PI    0x00000003243f6a88LL
  355. #define CONST_PI_2  0x00000001921fb544LL
  356. #define CONST_PI_4  0x00000000c90fdaa2LL
  357. #define CONST_1_PI  0x00000000517cc1b7LL
  358. #define CONST_2_PI  0x00000000a2f9836eLL
  359. #define CONST_2_SQRTPI  0x0000000120dd7504LL
  360. #define CONST_SQRT2 0x000000016a09e667LL
  361. #define CONST_1_SQRT2   0x00000000b504f333LL
  362.  
  363. static __inline__ sll slladd(sll x, sll y)
  364. {
  365.     return (x + y);
  366. }
  367.  
  368. static __inline__ sll sllsub(sll x, sll y)
  369. {
  370.     return (x - y);
  371. }
  372.  
  373. /*
  374.  * Let a = A * 2^32 + a_hi * 2^0 + a_lo * 2^(-32)
  375.  * Let b = B * 2^32 + b_hi * 2^0 + b_lo * 2^(-32)
  376.  *
  377.  * Where:
  378.  *   *_hi is the integer part
  379.  *   *_lo the fractional part
  380.  *   A and B are the sign (0 for positive, -1 for negative).
  381.  *
  382.  * a * b = (A * 2^32 + a_hi * 2^0 + a_lo * 2^-32)
  383.  *       * (B * 2^32 + b_hi * 2^0 + b_lo * 2^-32)
  384.  *
  385.  * Expanding the terms, we get:
  386.  *
  387.  *   = A * B * 2^64 + A * b_h * 2^32 + A * b_l * 2^0
  388.  *   + a_h * B * 2^32 + a_h * b_h * 2^0 + a_h * b_l * 2^-32
  389.  *   + a_l * B * 2^0 + a_l * b_h * 2^-32 + a_l * b_l * 2^-64
  390.  *
  391.  * Grouping by powers of 2, we get:
  392.  *
  393.  *   = A * B * 2^64
  394.  *   Meaningless overflow from sign extension - ignore
  395.  *
  396.  *   + (A * b_h + a_h * B) * 2^32
  397.  *   Overflow which we can't handle - ignore
  398.  *
  399.  *   + (A * b_l + a_h * b_h + a_l * B) * 2^0
  400.  *   We only need the low 32 bits of this term, as the rest is overflow
  401.  *
  402.  *   + (a_h * b_l + a_l * b_h) * 2^-32
  403.  *   We need all 64 bits of this term
  404.  *
  405.  *   +  a_l * b_l * 2^-64
  406.  *   We only need the high 32 bits of this term, as the rest is underflow
  407.  *
  408.  * Note that:
  409.  *   a > 0 && b > 0: A =  0, B =  0 and the third term is a_h * b_h
  410.  *   a < 0 && b > 0: A = -1, B =  0 and the third term is a_h * b_h - b_l
  411.  *   a > 0 && b < 0: A =  0, B = -1 and the third term is a_h * b_h - a_l
  412.  *   a < 0 && b < 0: A = -1, B = -1 and the third term is a_h * b_h - a_l - b_l
  413.  */
  414. #if defined(__arm__)
  415. static __inline__ sll sllmul(sll left, sll right)
  416. {
  417.     /*
  418.      * From gcc/config/arm/arm.h:
  419.      *   In a pair of registers containing a DI or DF value the 'Q'
  420.      *   operand returns the register number of the register containing
  421.      *   the least significant part of the value.  The 'R' operand returns
  422.      *   the register number of the register containing the most
  423.      *   significant part of the value.
  424.      */
  425.     sll retval;
  426.  
  427.     __asm__ (
  428.         "@ sllmul\n\t"
  429.         "umull  %R0, %Q0, %Q1, %Q2\n\t"
  430.         "mul    %R0, %R1, %R2\n\t"
  431.         "umlal  %Q0, %R0, %Q1, %R2\n\t"
  432.         "umlal  %Q0, %R0, %Q2, %R1\n\t"
  433.             "tst    %R1, #0x80000000\n\t"
  434.             "subne  %R0, %R0, %Q2\n\t"
  435.             "tst    %R2, #0x80000000\n\t"
  436.             "subne  %R0, %R0, %Q1\n\t"
  437.         : "=&r" (retval)
  438.         : "%r" (left), "r" (right)
  439.         : "cc"
  440.     );
  441.  
  442.     return retval;
  443. }
  444. #elif defined(__i386__)
  445. static __inline__ sll sllmul(sll left, sll right)
  446. {
  447.     register sll retval;
  448.     __asm__(
  449.         "# sllmul\n\t"
  450.         "   movl    %1, %%eax\n\t"
  451.         "   mull    %3\n\t"
  452.         "   movl    %%edx, %%ebx\n\t"
  453.         "\n\t"
  454.         "   movl    %2, %%eax\n\t"
  455.         "   mull    %4\n\t"
  456.         "   movl    %%eax, %%ecx\n\t"
  457.         "\n\t"
  458.         "   movl    %1, %%eax\n\t"
  459.         "   mull    %4\n\t"
  460.         "   addl    %%eax, %%ebx\n\t"
  461.         "   adcl    %%edx, %%ecx\n\t"
  462.         "\n\t"
  463.         "   movl    %2, %%eax\n\t"
  464.         "   mull    %3\n\t"
  465.         "   addl    %%ebx, %%eax\n\t"
  466.         "   adcl    %%ecx, %%edx\n\t"
  467.         "\n\t"
  468.         "   btl $31, %2\n\t"
  469.         "   jnc 1f\n\t"
  470.         "   subl    %3, %%edx\n\t"
  471.         "1: btl $31, %4\n\t"
  472.         "   jnc 1f\n\t"
  473.         "   subl    %1, %%edx\n\t"
  474.         "1:\n\t"
  475.         : "=&A" (retval)
  476.         : "m" (left), "m" (((unsigned *) &left)[1]),
  477.           "m" (right), "m" (((unsigned *) &right)[1])
  478.         : "ebx", "ecx", "cc"
  479.     );
  480.     return retval;
  481. }
  482. #else
  483. /* Plain C version: not optimal but portable. */
  484. #warning Fixed Point no optimal
  485. static __inline__ sll sllmul(sll a, sll b)
  486. {
  487.     unsigned int a_lo, b_lo;
  488.     signed int a_hi, b_hi;
  489.     sll x;
  490.  
  491.     a_lo = a;
  492.     a_hi = (ull) a >> 32;
  493.     b_lo = b;
  494.     b_hi = (ull) b >> 32;
  495.  
  496.     x = ((ull) (a_hi * b_hi) << 32)
  497.       + (((ull) a_lo * b_lo) >> 32)
  498.       + (sll) a_lo * b_hi
  499.       + (sll) b_lo * a_hi;
  500.  
  501.     return x;
  502. }
  503. #endif
  504.  
  505. static __inline__ sll sllinv(sll v)
  506. {
  507.     int sgn = 0;
  508.     sll u;
  509.     ull s = -1;
  510.  
  511.     /* Use positive numbers, or the approximation won't work */
  512.     if (v < CONST_0) {
  513.         v = sllneg(v);
  514.         sgn = 1;
  515.     }
  516.  
  517.     /* An approximation - must be larger than the actual value */
  518.     for (u = v; u; ((ull)u) >>= 1)
  519.         s >>= 1;
  520.  
  521.     /* Newton's Method */
  522.     u = sllmul(s, _sllsub(CONST_2, sllmul(v, s)));
  523.     u = sllmul(u, _sllsub(CONST_2, sllmul(v, u)));
  524.     u = sllmul(u, _sllsub(CONST_2, sllmul(v, u)));
  525.     u = sllmul(u, _sllsub(CONST_2, sllmul(v, u)));
  526.     u = sllmul(u, _sllsub(CONST_2, sllmul(v, u)));
  527.     u = sllmul(u, _sllsub(CONST_2, sllmul(v, u)));
  528.  
  529.     return ((sgn) ? sllneg(u): u);
  530. }
  531.  
  532. static __inline__ sll slldiv(sll left, sll right)
  533. {
  534.     return sllmul(left, sllinv(right));
  535. }
  536.  
  537. static __inline__ sll sllmul2(sll x)
  538. {
  539.     return x << 1;
  540. }
  541.  
  542. static __inline__ sll sllmul4(sll x)
  543. {
  544.     return x << 2;
  545. }
  546.  
  547. static __inline__ sll sllmul2n(sll x, int n)
  548. {
  549.     sll y;
  550.  
  551. #if defined(__arm__)
  552.     /*
  553.      * On ARM we need to do explicit assembly since the compiler
  554.      * doesn't know the range of n is limited and decides to call
  555.      * a library function instead.
  556.      */
  557.     __asm__ (
  558.         "@ sllmul2n\n\t"
  559.         "mov    %R0, %R1, lsl %2\n\t"
  560.         "orr    %R0, %R0, %Q1, lsr %3\n\t"
  561.         "mov    %Q0, %Q1, lsl %2\n\t"
  562.         : "=r" (y)
  563.         : "r" (x), "rM" (n), "rM" (32 - n)
  564.     );
  565. #else
  566.     y = x << n;
  567. #endif
  568.  
  569.     return y;
  570. }
  571.  
  572. static __inline__ sll slldiv2(sll x)
  573. {
  574.     return x >> 1;
  575. }
  576.  
  577. static __inline__ sll slldiv4(sll x)
  578. {
  579.     return x >> 2;
  580. }
  581.  
  582. static __inline__ sll slldiv2n(sll x, int n)
  583. {
  584.     sll y;
  585.  
  586. #if defined(__arm__)
  587.     /*
  588.      * On ARM we need to do explicit assembly since the compiler
  589.      * doesn't know the range of n is limited and decides to call
  590.      * a library function instead.
  591.      */
  592.     __asm__ (
  593.         "@ slldiv2n\n\t"
  594.         "mov    %Q0, %Q1, lsr %2\n\t"
  595.         "orr    %Q0, %Q0, %R1, lsl %3\n\t"
  596.         "mov    %R0, %R1, asr %2\n\t"
  597.         : "=r" (y)
  598.         : "r" (x), "rM" (n), "rM" (32 - n)
  599.     );
  600. #else
  601.     y = x >> n;
  602. #endif
  603.  
  604.     return y;
  605. }
  606.  
  607. /*
  608.  * Unpack the IEEE floating point double format and put it in fixed point
  609.  * sll format.
  610.  */
  611. static __inline__ sll dbl2sll(double dbl)
  612. {
  613.     union {
  614.         double d;
  615.         unsigned u[2];
  616.         ull ull;
  617.         sll sll;
  618.     } in, retval;
  619.     register unsigned exp;
  620.  
  621.     /* Move into memory as args might be passed in regs */
  622.     in.d = dbl;
  623.  
  624. #if defined(__arm__)
  625.  
  626.     /* ARM architecture has a big-endian double */
  627.     exp = in.u[0];
  628.     in.u[0] = in.u[1];
  629.     in.u[1] = exp;
  630.  
  631. #endif /* defined(__arm__) */
  632.  
  633.     /* Leading 1 is assumed by IEEE */
  634.     retval.u[1] = 0x40000000;
  635.  
  636.     /* Unpack the mantissa into the unsigned long */
  637.     retval.u[1] |= (in.u[1] << 10) & 0x3ffffc00;
  638.     retval.u[1] |= (in.u[0] >> 22) & 0x000003ff;
  639.     retval.u[0] = in.u[0] << 10;
  640.  
  641.     /* Extract the exponent and align the decimals */
  642.     exp = (in.u[1] >> 20) & 0x7ff;
  643.     if (exp)
  644.         retval.ull >>= 1053 - exp;
  645.     else
  646.         return 0L;
  647.  
  648.     /* Negate if negative flag set */
  649.     if (in.u[1] & 0x80000000)
  650.         retval.sll = -retval.sll;
  651.  
  652.     return retval.sll;
  653. }
  654.  
  655. static __inline__ sll float2sll(float f)
  656. {
  657.     return dbl2sll((double)f);
  658. }
  659.  
  660. static __inline__ double sll2dbl(sll s)
  661. {
  662.     union {
  663.         double d;
  664.         unsigned u[2];
  665.         ull ull;
  666.         sll sll;
  667.     } in, retval;
  668.     register unsigned exp;
  669.     register unsigned flag;
  670.  
  671.     if (s == 0)
  672.         return 0.0;
  673.  
  674.     /* Move into memory as args might be passed in regs */
  675.     in.sll = s;
  676.  
  677.     /* Handle the negative flag */
  678.     if (in.sll < 1) {
  679.         flag = 0x80000000;
  680.         in.ull = sllneg(in.sll);
  681.     } else
  682.         flag = 0x00000000;
  683.  
  684.     /* Normalize */
  685.     for (exp = 1053; in.ull && (in.u[1] & 0x80000000) == 0; exp--) {
  686.         in.ull <<= 1;
  687.     }
  688.     in.ull <<= 1;
  689.     exp++;
  690.     in.ull >>= 12;
  691.     retval.ull = in.ull;
  692.     retval.u[1] |= flag | (exp << 20);
  693.  
  694. #if defined(__arm__)
  695.  
  696.     /* ARM architecture has a big-endian double */
  697.     exp = retval.u[0];
  698.     retval.u[0] = retval.u[1];
  699.     retval.u[1] = exp;
  700.  
  701. #endif /* defined(__arm__) */
  702.  
  703.     return retval.d;
  704. }
  705.  
  706. static __inline__ float sll2float(sll s)
  707. {
  708.     return ((float)sll2dbl(s));
  709. }
  710.  
  711. /*
  712.  * Calculate cos x where -pi/4 <= x <= pi/4
  713.  *
  714.  * Description:
  715.  *  cos x = 1 - x^2 / 2! + x^4 / 4! - ... + x^(2N) / (2N)!
  716.  *  Note that (pi/4)^12 / 12! < 2^-32 which is the smallest possible number.
  717.  */
  718. static __inline__ sll _sllcos(sll x)
  719. {
  720.     sll retval, x2;
  721.     x2 = sllmul(x, x);
  722.     /*
  723.      * cos x = t0 + t1 + t2 + t3 + t4 + t5 + t6
  724.      *
  725.      * f0 =  0! =  1
  726.      * f1 =  2! =  2 *  1 * f0 =   2 * f0
  727.      * f2 =  4! =  4 *  3 * f1 =  12 x f1
  728.      * f3 =  6! =  6 *  5 * f2 =  30 * f2
  729.      * f4 =  8! =  8 *  7 * f3 =  56 * f3
  730.      * f5 = 10! = 10 *  9 * f4 =  90 * f4
  731.      * f6 = 12! = 12 * 11 * f5 = 132 * f5
  732.      *
  733.      * t0 = 1
  734.      * t1 = -t0 * x2 /   2 = -t0 * x2 * CONST_1_2
  735.      * t2 = -t1 * x2 /  12 = -t1 * x2 * CONST_1_12
  736.      * t3 = -t2 * x2 /  30 = -t2 * x2 * CONST_1_30
  737.      * t4 = -t3 * x2 /  56 = -t3 * x2 * CONST_1_56
  738.      * t5 = -t4 * x2 /  90 = -t4 * x2 * CONST_1_90
  739.      * t6 = -t5 * x2 / 132 = -t5 * x2 * CONST_1_132
  740.      */
  741.     retval = _sllsub(CONST_1, sllmul(x2, CONST_1_132));
  742.     retval = _sllsub(CONST_1, sllmul(sllmul(x2, retval), CONST_1_90));
  743.     retval = _sllsub(CONST_1, sllmul(sllmul(x2, retval), CONST_1_56));
  744.     retval = _sllsub(CONST_1, sllmul(sllmul(x2, retval), CONST_1_30));
  745.     retval = _sllsub(CONST_1, sllmul(sllmul(x2, retval), CONST_1_12));
  746.     retval = _sllsub(CONST_1, slldiv2(sllmul(x2, retval)));
  747.     return retval;
  748. }
  749.  
  750. /*
  751.  * Calculate sin x where -pi/4 <= x <= pi/4
  752.  *
  753.  * Description:
  754.  *  sin x = x - x^3 / 3! + x^5 / 5! - ... + x^(2N+1) / (2N+1)!
  755.  *  Note that (pi/4)^13 / 13! < 2^-32 which is the smallest possible number.
  756.  */
  757. static __inline__ sll _sllsin(sll x)
  758. {
  759.     sll retval, x2;
  760.     x2 = sllmul(x, x);
  761.     /*
  762.      * sin x = t0 + t1 + t2 + t3 + t4 + t5 + t6
  763.      *
  764.      * f0 =  0! =  1
  765.      * f1 =  3! =  3 *  2 * f0 =   6 * f0
  766.      * f2 =  5! =  5 *  4 * f1 =  20 x f1
  767.      * f3 =  7! =  7 *  6 * f2 =  42 * f2
  768.      * f4 =  9! =  9 *  8 * f3 =  72 * f3
  769.      * f5 = 11! = 11 * 10 * f4 = 110 * f4
  770.      * f6 = 13! = 13 * 12 * f5 = 156 * f5
  771.      *
  772.      * t0 = 1
  773.      * t1 = -t0 * x2 /   6 = -t0 * x2 * CONST_1_6
  774.      * t2 = -t1 * x2 /  20 = -t1 * x2 * CONST_1_20
  775.      * t3 = -t2 * x2 /  42 = -t2 * x2 * CONST_1_42
  776.      * t4 = -t3 * x2 /  72 = -t3 * x2 * CONST_1_72
  777.      * t5 = -t4 * x2 / 110 = -t4 * x2 * CONST_1_110
  778.      * t6 = -t5 * x2 / 156 = -t5 * x2 * CONST_1_156
  779.      */
  780.     retval = _sllsub(x, sllmul(x2, CONST_1_156));
  781.     retval = _sllsub(x, sllmul(sllmul(x2, retval), CONST_1_110));
  782.     retval = _sllsub(x, sllmul(sllmul(x2, retval), CONST_1_72));
  783.     retval = _sllsub(x, sllmul(sllmul(x2, retval), CONST_1_42));
  784.     retval = _sllsub(x, sllmul(sllmul(x2, retval), CONST_1_20));
  785.     retval = _sllsub(x, sllmul(sllmul(x2, retval), CONST_1_6));
  786.     return retval;
  787. }
  788.  
  789. static __inline__ sll sllcos(sll x)
  790. {
  791.     int i;
  792.     sll retval;
  793.  
  794.     /* Calculate cos (x - i * pi/2), where -pi/4 <= x - i * pi/2 <= pi/4  */
  795.     i = sll2int(_slladd(sllmul(x, CONST_2_PI), CONST_1_2));
  796.     x = _sllsub(x, sllmul(int2sll(i), CONST_PI_2));
  797.  
  798.     switch (i & 3) {
  799.         default:
  800.         case 0:
  801.             retval = _sllcos(x);
  802.             break;
  803.         case 1:
  804.             retval = sllneg(_sllsin(x));
  805.             break;
  806.         case 2:
  807.             retval = sllneg(_sllcos(x));
  808.             break;
  809.         case 3:
  810.             retval = _sllsin(x);
  811.             break;
  812.     }
  813.     return retval;
  814. }
  815.  
  816. static __inline__ sll sllsin(sll x)
  817. {
  818.     int i;
  819.     sll retval;
  820.  
  821.     /* Calculate sin (x - n * pi/2), where -pi/4 <= x - i * pi/2 <= pi/4 */
  822.     i = sll2int(_slladd(sllmul(x, CONST_2_PI), CONST_1_2));
  823.     x = _sllsub(x, sllmul(int2sll(i), CONST_PI_2));
  824.  
  825.     switch (i & 3) {
  826.         default:
  827.         case 0:
  828.             retval = _sllsin(x);
  829.             break;
  830.         case 1:
  831.             retval = _sllcos(x);
  832.             break;
  833.         case 2:
  834.             retval = sllneg(_sllsin(x));
  835.             break;
  836.         case 3:
  837.             retval = sllneg(_sllcos(x));
  838.             break;
  839.     }
  840.     return retval;
  841. }
  842.  
  843. static __inline__ sll slltan(sll x)
  844. {
  845.     int i;
  846.     sll retval;
  847.     i = sll2int(_slladd(sllmul(x, CONST_2_PI), CONST_1_2));
  848.     x = _sllsub(x, sllmul(int2sll(i), CONST_PI_2));
  849.     switch (i & 3) {
  850.         default:
  851.         case 0:
  852.         case 2:
  853.             retval = slldiv(_sllsin(x), _sllcos(x));
  854.             break;
  855.         case 1:
  856.         case 3:
  857.             retval = sllneg(slldiv(_sllcos(x), _sllsin(x)));
  858.             break;
  859.     }
  860.     return retval;
  861. }
  862.  
  863. /*
  864.  * atan x = SUM[n=0,) (-1)^n * x^(2n + 1)/(2n + 1), |x| < 1
  865.  *
  866.  * Two term approximation
  867.  *  a = x - x^3/3
  868.  * Gives us
  869.  *  atan x = a + ??
  870.  * Let ?? = arctan ?
  871.  *  atan x = a + arctan ?
  872.  * Rearrange
  873.  *  atan x - a = arctan ?
  874.  * Apply tan to both sides
  875.  *  tan (atan x - a) = tan arctan ?
  876.  *  tan (atan x - a) = ?
  877.  * Applying the standard formula
  878.  *  tan (u - v) = (tan u - tan v) / (1 + tan u * tan v)
  879.  * Gives us
  880.  *  tan (atan x - a) = (tan atan x - tan a) / (1 + tan arctan x * tan a)
  881.  * Let t = tan a
  882.  *  tan (atan x - a) = (x - t) / (1 + x * t)
  883.  * So finally
  884.  *  arctan x = a + arctan ((tan x - t) / (1 + x * t))
  885.  * And the typical worst case is x = 1.0 which converges in 3 iterations.
  886.  */
  887. static __inline__ sll _sllatan(sll x)
  888. {
  889.     sll a, t, retval;
  890.  
  891.     /* First iteration */
  892.     a = sllmul(x, _sllsub(CONST_1, sllmul(x, sllmul(x, CONST_1_3))));
  893.     retval = a;
  894.  
  895.     /* Second iteration */
  896.     t = slldiv(_sllsin(a), _sllcos(a));
  897.     x = slldiv(_sllsub(x, t), _slladd(CONST_1, sllmul(t, x)));
  898.     a = sllmul(x, _sllsub(CONST_1, sllmul(x, sllmul(x, CONST_1_3))));
  899.     retval = _slladd(retval, a);
  900.  
  901.     /* Third  iteration */
  902.     t = slldiv(_sllsin(a), _sllcos(a));
  903.     x = slldiv(_sllsub(x, t), _slladd(CONST_1, sllmul(t, x)));
  904.     a = sllmul(x, _sllsub(CONST_1, sllmul(x, sllmul(x, CONST_1_3))));
  905.     return _slladd(retval, a);
  906. }
  907.  
  908. static __inline__ sll sllatan(sll x)
  909. {
  910.     sll retval;
  911.  
  912.     if (x < sllneg(CONST_1))
  913.     /* if x < -1 then atan x = PI/2 + atan 1/x */
  914.         retval = sllneg(CONST_PI_2);
  915.     else if (x > CONST_1)
  916.     /* if x > 1 then atan x = PI/2 - atan 1/x */
  917.         retval = CONST_PI_2;
  918.     else
  919.         return _sllatan(x);
  920.     return _sllsub(retval, _sllatan(sllinv(x)));
  921. }
  922.  
  923. /*
  924.  * Calculate e^x where -0.5 <= x <= 0.5
  925.  *
  926.  * Description:
  927.  *  e^x = x^0 / 0! + x^1 / 1! + ... + x^N / N!
  928.  *  Note that 0.5^11 / 11! < 2^-32 which is the smallest possible number.
  929.  */
  930. static __inline__ sll _sllexp(sll x)
  931. {
  932.     sll retval;
  933.     retval = _slladd(CONST_1, sllmul(x, CONST_1_11)); /* merge 2 redundant line */
  934.     retval = _slladd(CONST_1, sllmul(retval, sllmul(x, CONST_1_10)));
  935.     retval = _slladd(CONST_1, sllmul(retval, sllmul(x, CONST_1_9)));
  936.     retval = _slladd(CONST_1, sllmul(retval, slldiv2n(x, 3)));
  937.     retval = _slladd(CONST_1, sllmul(retval, sllmul(x, CONST_1_7)));
  938.     retval = _slladd(CONST_1, sllmul(retval, sllmul(x, CONST_1_6)));
  939.     retval = _slladd(CONST_1, sllmul(retval, sllmul(x, CONST_1_5)));
  940.     retval = _slladd(CONST_1, sllmul(retval, slldiv4(x)));
  941.     retval = _slladd(CONST_1, sllmul(retval, sllmul(x, CONST_1_3)));
  942.     retval = _slladd(CONST_1, sllmul(retval, slldiv2(x)));
  943.     retval = _slladd(CONST_1, sllmul(retval, x)); /* the retval is incorrect without this line */
  944.     return retval;
  945. }
  946.  
  947. /*
  948.  * Calculate e^x where x is arbitrary
  949.  */
  950. static __inline__ sll sllexp(sll x)
  951. {
  952.     int i;
  953.     sll e, retval;
  954.  
  955.     e = CONST_E;
  956.  
  957.     /* -0.5 <= x <= 0.5  */
  958.     i = sll2int(_slladd(x, CONST_1_2));
  959.     retval = _sllexp(_sllsub(x, int2sll(i)));
  960.  
  961.     /* i >= 0 */
  962.     if (i < 0) {
  963.         i = -i;
  964.         e = CONST_1_E;
  965.     }
  966.  
  967.     /* Scale the result */
  968.     for (;i; i >>= 1) {
  969.         if (i & 1)
  970.             retval = sllmul(retval, e);
  971.         e = sllmul(e, e);
  972.     }
  973.     return retval;
  974. }
  975.  
  976. /*
  977.  * Calculate natural logarithm using Netwton-Raphson method
  978.  */
  979. static __inline__ sll slllog(sll x)
  980. {
  981.     sll x1, ln = 0;
  982.  
  983.     /* Scale: e^(-1/2) <= x <= e^(1/2) */
  984.     while (x < CONST_1_SQRTE) {
  985.         ln = _sllsub(ln, CONST_1);
  986.         x = sllmul(x, CONST_E);
  987.     }
  988.     while (x > CONST_SQRTE) {
  989.         ln = _slladd(ln, CONST_1);
  990.         x = sllmul(x, CONST_1_E);
  991.     }
  992.  
  993.     /* First iteration */
  994.     x1 = sllmul(_sllsub(x, CONST_1), slldiv2(_sllsub(x, CONST_3)));
  995.     ln = _sllsub(ln, x1);
  996.     x = sllmul(x, _sllexp(x1));
  997.  
  998.     /* Second iteration */
  999.     x1 = sllmul(_sllsub(x, CONST_1), slldiv2(_sllsub(x, CONST_3)));
  1000.     ln = _sllsub(ln, x1);
  1001.     x = sllmul(x, _sllexp(x1));
  1002.  
  1003.     /* Third iteration */
  1004.     x1 = sllmul(_sllsub(x, CONST_1), slldiv2(_sllsub(x, CONST_3)));
  1005.     ln = _sllsub(ln, x1);
  1006.  
  1007.     return ln;
  1008. }
  1009.  
  1010. /*
  1011.  * ln x^y = y * log x
  1012.  * e^(ln x^y) = e^(y * log x)
  1013.  * x^y = e^(y * ln x)
  1014.  */
  1015. static __inline__ sll sllpow(sll x, sll y)
  1016. {
  1017.     if (y == CONST_0)
  1018.         return CONST_1;
  1019.     return sllexp(sllmul(y, slllog(x)));
  1020. }
  1021.  
  1022. /*
  1023.  * Consider a parabola centered on the y-axis
  1024.  *  y = a * x^2 + b
  1025.  * Has zeros (y = 0)  at
  1026.  *  a * x^2 + b = 0
  1027.  *  a * x^2 = -b
  1028.  *  x^2 = -b / a
  1029.  *  x = +- (-b / a)^(1 / 2)
  1030.  * Letting a = 1 and b = -X
  1031.  *  y = x^2 - X
  1032.  *  x = +- X^(1 / 2)
  1033.  * Which is convenient since we want to find the square root of X, and we can
  1034.  * use Newton's Method to find the zeros of any f(x)
  1035.  *  xn = x - f(x) / f'(x)
  1036.  * Applied Newton's Method to our parabola
  1037.  *  f(x) = x^2 - X
  1038.  *  xn = x - (x^2 - X) / (2 * x)
  1039.  *  xn = x - (x - X / x) / 2
  1040.  * To make this converge quickly, we scale X so that
  1041.  *  X = 4^N * z
  1042.  * Taking the roots of both sides
  1043.  *  X^(1 / 2) = (4^n * z)^(1 / 2)
  1044.  *  X^(1 / 2) = 2^n * z^(1 / 2)
  1045.  * Let N = 2^n
  1046.  *  x^(1 / 2) = N * z^(1 / 2)
  1047.  * We want this to converge to the positive root, so we must start at a point
  1048.  *  0 < start <= x^(1 / 2)
  1049.  * or
  1050.  *  x^(1/2) <= start <= infinity
  1051.  * since
  1052.  *  (1/2)^(1/2) = 0.707
  1053.  *  2^(1/2) = 1.414
  1054.  * A good choice is 1 which lies in the middle, and takes 4 iterations to
  1055.  * converge from either extreme.
  1056.  */
  1057. static __inline__ sll sllsqrt(sll x)
  1058. {
  1059.     sll n, xn;
  1060.        
  1061.     /* Start with a scaling factor of 1 */
  1062.     n = CONST_1;
  1063.  
  1064.     /* Quick solutions for the simple cases */
  1065.     if (x <= CONST_0 || x == CONST_1)
  1066.         return x;
  1067.  
  1068.     /* Scale x so that 0.5 <= x < 2 */
  1069.     while (x >= CONST_2) {
  1070.         x = slldiv4(x);
  1071.         n = sllmul2(n);
  1072.     }
  1073.     while (x < CONST_1_2) {
  1074.         x = sllmul4(x);
  1075.         n = slldiv2(n);
  1076.     }
  1077.  
  1078.     /* Simple solution if x = 4^n */
  1079.     if (x == CONST_1)
  1080.         return n;
  1081.  
  1082.     /* The starting point */
  1083.     xn = CONST_1;
  1084.  
  1085.     /* Four iterations will be enough */
  1086.     xn = _sllsub(xn, slldiv2(_sllsub(xn, slldiv(x, xn))));
  1087.     xn = _sllsub(xn, slldiv2(_sllsub(xn, slldiv(x, xn))));
  1088.     xn = _sllsub(xn, slldiv2(_sllsub(xn, slldiv(x, xn))));
  1089.     xn = _sllsub(xn, slldiv2(_sllsub(xn, slldiv(x, xn))));
  1090.  
  1091.     /* Scale the result */
  1092.     return sllmul(n, xn);
  1093. }
  1094.  
  1095. #endif
  1096.  
  1097. #ifdef __cplusplus
  1098. }
  1099. #endif
  1100.  
  1101. #endif /* MATHSLL_H */
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement