daily pastebin goal
47%
SHARE
TWEET

eZ80 Single Precision Multiply

Zeda Jan 30th, 2015 246 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. mlt_single:
  2. ;;Expect ADL mode
  3. ;;Single precision floating point multiply
  4. ;;Floats are similar to IEEE-754 standard except how it is stored on tape, taking advantage of the little-endian processor
  5. ;;  CCCCCCCC BBBBBBBB AAAAAAAS EEEEEEEE
  6. ;; Cs refer to bits 0:7 of the mantissa
  7. ;; Bs refer to bits 8:15 of the mantissa
  8. ;; As refer to bits 16:22 of the mantissa, with an implicit bit 23
  9. ;; S is the sign and is stored in the high bit of the third byte of the mantissa
  10. ;; E is the 8-bit exponent with bias of 127
  11. ;;Inputs: IX points to the first float, IX+4 points to the second float
  12. ;;        cmath flags indicate desired rounding mode
  13. ;;          rnd_default = 1 if using "round to nearest, ties to even"
  14. ;;                        0 otherwise
  15. ;;          if "bit rndDefault,(iy+cmath)" = 0, then
  16. ;;                 rndBit0,rndBit1 = 0,0 implies round to nearest, ties round away from zero
  17. ;;                 rndBit0,rndBit1 = 1,0 implies truncate
  18. ;;                 rndBit0,rndBit1 = 0,1 implies round up
  19. ;;                 rndBit0,rndBit1 = 1,1 implies round down
  20. ;;Outputs: HL is the sign and mantissa, A is the exponent
  21.  
  22. ;Get the sign of the result
  23.     ld a,(ix+2) \ xor (ix+6) \ push af
  24. ;set implicit bits
  25.     set 7,(ix+2) \ set 7,(ix+6)
  26.     call.is mlt24
  27.     pop de
  28.     ld hl,(t2+1)
  29.     ld c,(ix+3) \ ld b,(ix+7)
  30.     ld a,c \ xor b \ jp m,spos
  31. ;signs are the same, struggle is real
  32.     xor b \ add a,b
  33.     jr nc,$+8 \ jp m,overflow \ jr spos+2
  34.     jp p,underflow \ jr spos+2
  35. .spos:
  36.     xor b \ add a,b \ add a,0x80 \ ld d,a
  37.     ld a,(t2)
  38.     call cmathAdjust
  39.     add hl,bc
  40.    
  41.    
  42. cmathAdjust:
  43. ;;ADL mode
  44. ;;Inputs: HL as the 24-bit mantissa
  45. ;;        A[7:5] are the sticky bits for rounding
  46. ;;        D is the exponent
  47. ;;        bit 7 of E is the sign
  48. ;;        cmath flags set as desired
  49. ;;Output: HL is the sign and mantissa, A is the exponent.
  50. ;;        Normalized, rounded
  51.  
  52. ;time to round based on rounding mode
  53. ;optimized based on logic
  54.     ld bc,0
  55.     and 0xC0
  56.     add a,a
  57.     jr nc,roundready
  58.  
  59.     bit rndDefault,(iy+cmath)
  60.     jr z,roundDef
  61.     bit rndBit1,(iy+cmath)
  62.     jr z,roundready-1
  63.  
  64.     ld a,e \ add a,a
  65.     bit rndBit0,d
  66.     jr nz,rd
  67. ;rndBit0,rndBit1 = 0,1 implies round up
  68.     jr nc,roundready-1
  69.     dec bc
  70.     jr roundready
  71. .rd
  72. ;rndBit0,rndBit1 = 1,1 implies round down
  73.     jr c,roundready-1
  74.     dec bc
  75.     jr roundready
  76.  
  77. .roundDef:
  78.     add a,a
  79.     jr c,$+6
  80.     ld a,l
  81.     rra
  82.     jr nc,$+3
  83.     inc c
  84.  
  85. .roundready:
  86.     or a
  87.     adc hl,bc
  88.     ld bc,0x800000
  89. ;if zero, increment exponent, and finish
  90. ;if sign=1, finish
  91. ;if sign=0, renormalize
  92.     jr nz,.cs
  93.     add hl,bc
  94.     inc d
  95.     jp z, overflow
  96. .exit
  97.     ld a,d
  98.     rlc e
  99.     ret c
  100.     add hl,bc
  101.     ret
  102. .cs
  103.     jp m,exit
  104.     or a
  105.     dec d \ jp z,underflow \ adc hl,hl \ jp p,$-7 \ jr exit
  106. mlt24:
  107. ;; expected in Z80 mode
  108. ;Primarily intended for use with single precision floats where the mantissa is little-endian and the floats are stored consecutively in 32-bit spaces
  109. ;Inputs: IX points to the LSB of the first float
  110. ;Outputs: t0,t1,t2 are consecutive 16-bit words in RAM and contain the 48-bit result in little-endian
  111. ;182cc up to 206cc
  112. ;90 bytes
  113.     ld h,(ix) \ ld l,(ix+4) \ mlt hl \ ld (t0),hl   ;4+4+6+5 = 19
  114.     ld hl,(ix+1) \ ld bc,(ix+5) \ call mulBC_HL     ;5+5+59(+2)=69(+2)
  115.     ld (t1),bc \ ld (t2),de                         ;6+6     = 12
  116.  
  117.     ld hl,(ix+1) \ ld e,(ix+4) \ ld d,l \ ld l,e \ mlt hl \ mlt de ;5+4+1+1+6+6 = 23
  118.     ld a,(t0+1) \ add a,e \ ld bc,(t1) \ adc hl,bc \ ex de,hl      ;4+1+6+2+1   = 14
  119.     jr nc,- \ ld hl,(t2) \ inc hl \ ld (t2),hl                     ;3 or 2+5+1+5= 3 or 13
  120. .-
  121.     ld hl,(ix+1) \ ld c,(ix+4) \ ld b,l \ ld l,c \ mlt hl \ mlt bc ;5+4+1+1+6+6 = 23
  122.     add a,e \ ld (t0+1),a \ adc hl,de \ ld (t1),hl \ ret nc        ;1+4+2+5+7   = 19
  123.     ld hl,(t2) \ inc hl \ ld (t2),hl \ ret                         ;-5+5+1+5+6  = or +12
  124.  
  125. mltBC_HL:
  126. ;;Assume 16-bit Z80-mode
  127. ;Returns BC*HL in DEBC
  128. ;54 or 56 t-states.
  129. ;30 bytes
  130.     ld d,c \ ld e,l \ mlt de \ push de ;11
  131.     ld d,h \ ld e,b \ mlt de           ;8
  132.     ld a,l \ ld l,c \ ld c,a           ;3
  133.     mlt hl \ mlt bc \ add hl,bc        ;13
  134.     jr nc,$+3 \ inc de \ pop bc        ;6
  135.     ld a,b \ add a,l \ ld b,a          ;3
  136.     ld a,e \ adc a,h \ ld e,a          ;3
  137.     ret nc \ inc d \ ret               ;7 (+2 for carry)
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top