daily pastebin goal
17%
SHARE
TWEET

eZ80 Single Precision Multiply

Zeda Jan 30th, 2015 239 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. mlt_single:
  2. ;;Expect ADL mode
  3. ;;Single precision floating point multiply
  4. ;;Floats are similar to IEEE-754 standard except how it is stored on tape, taking advantage of the little-endian processor
  5. ;;  CCCCCCCC BBBBBBBB AAAAAAAS EEEEEEEE
  6. ;; Cs refer to bits 0:7 of the mantissa
  7. ;; Bs refer to bits 8:15 of the mantissa
  8. ;; As refer to bits 16:22 of the mantissa, with an implicit bit 23
  9. ;; S is the sign and is stored in the high bit of the third byte of the mantissa
  10. ;; E is the 8-bit exponent with bias of 127
  11. ;;Inputs: IX points to the first float, IX+4 points to the second float
  12. ;;        cmath flags indicate desired rounding mode
  13. ;;          rnd_default = 1 if using "round to nearest, ties to even"
  14. ;;                        0 otherwise
  15. ;;          if "bit rndDefault,(iy+cmath)" = 0, then
  16. ;;                 rndBit0,rndBit1 = 0,0 implies round to nearest, ties round away from zero
  17. ;;                 rndBit0,rndBit1 = 1,0 implies truncate
  18. ;;                 rndBit0,rndBit1 = 0,1 implies round up
  19. ;;                 rndBit0,rndBit1 = 1,1 implies round down
  20. ;;Outputs: HL is the sign and mantissa, A is the exponent
  21.  
  22. ;Get the sign of the result
  23.     ld a,(ix+2) \ xor (ix+6) \ push af
  24. ;set implicit bits
  25.     set 7,(ix+2) \ set 7,(ix+6)
  26.     call.is mlt24
  27.     pop de
  28.     ld hl,(t2+1)
  29.     ld c,(ix+3) \ ld b,(ix+7)
  30.     ld a,c \ xor b \ jp m,spos
  31. ;signs are the same, struggle is real
  32.     xor b \ add a,b
  33.     jr nc,$+8 \ jp m,overflow \ jr spos+2
  34.     jp p,underflow \ jr spos+2
  35. .spos:
  36.     xor b \ add a,b \ add a,0x80 \ ld d,a
  37.     ld a,(t2)
  38.     call cmathAdjust
  39.     add hl,bc
  40.    
  41.    
  42. cmathAdjust:
  43. ;;ADL mode
  44. ;;Inputs: HL as the 24-bit mantissa
  45. ;;        A[7:5] are the sticky bits for rounding
  46. ;;        D is the exponent
  47. ;;        bit 7 of E is the sign
  48. ;;        cmath flags set as desired
  49. ;;Output: HL is the sign and mantissa, A is the exponent.
  50. ;;        Normalized, rounded
  51.  
  52. ;time to round based on rounding mode
  53. ;optimized based on logic
  54.     ld bc,0
  55.     and 0xC0
  56.     add a,a
  57.     jr nc,roundready
  58.  
  59.     bit rndDefault,(iy+cmath)
  60.     jr z,roundDef
  61.     bit rndBit1,(iy+cmath)
  62.     jr z,roundready-1
  63.  
  64.     ld a,e \ add a,a
  65.     bit rndBit0,d
  66.     jr nz,rd
  67. ;rndBit0,rndBit1 = 0,1 implies round up
  68.     jr nc,roundready-1
  69.     dec bc
  70.     jr roundready
  71. .rd
  72. ;rndBit0,rndBit1 = 1,1 implies round down
  73.     jr c,roundready-1
  74.     dec bc
  75.     jr roundready
  76.  
  77. .roundDef:
  78.     add a,a
  79.     jr c,$+6
  80.     ld a,l
  81.     rra
  82.     jr nc,$+3
  83.     inc c
  84.  
  85. .roundready:
  86.     or a
  87.     adc hl,bc
  88.     ld bc,0x800000
  89. ;if zero, increment exponent, and finish
  90. ;if sign=1, finish
  91. ;if sign=0, renormalize
  92.     jr nz,.cs
  93.     add hl,bc
  94.     inc d
  95.     jp z, overflow
  96. .exit
  97.     ld a,d
  98.     rlc e
  99.     ret c
  100.     add hl,bc
  101.     ret
  102. .cs
  103.     jp m,exit
  104.     or a
  105.     dec d \ jp z,underflow \ adc hl,hl \ jp p,$-7 \ jr exit
  106. mlt24:
  107. ;; expected in Z80 mode
  108. ;Primarily intended for use with single precision floats where the mantissa is little-endian and the floats are stored consecutively in 32-bit spaces
  109. ;Inputs: IX points to the LSB of the first float
  110. ;Outputs: t0,t1,t2 are consecutive 16-bit words in RAM and contain the 48-bit result in little-endian
  111. ;182cc up to 206cc
  112. ;90 bytes
  113.     ld h,(ix) \ ld l,(ix+4) \ mlt hl \ ld (t0),hl   ;4+4+6+5 = 19
  114.     ld hl,(ix+1) \ ld bc,(ix+5) \ call mulBC_HL     ;5+5+59(+2)=69(+2)
  115.     ld (t1),bc \ ld (t2),de                         ;6+6     = 12
  116.  
  117.     ld hl,(ix+1) \ ld e,(ix+4) \ ld d,l \ ld l,e \ mlt hl \ mlt de ;5+4+1+1+6+6 = 23
  118.     ld a,(t0+1) \ add a,e \ ld bc,(t1) \ adc hl,bc \ ex de,hl      ;4+1+6+2+1   = 14
  119.     jr nc,- \ ld hl,(t2) \ inc hl \ ld (t2),hl                     ;3 or 2+5+1+5= 3 or 13
  120. .-
  121.     ld hl,(ix+1) \ ld c,(ix+4) \ ld b,l \ ld l,c \ mlt hl \ mlt bc ;5+4+1+1+6+6 = 23
  122.     add a,e \ ld (t0+1),a \ adc hl,de \ ld (t1),hl \ ret nc        ;1+4+2+5+7   = 19
  123.     ld hl,(t2) \ inc hl \ ld (t2),hl \ ret                         ;-5+5+1+5+6  = or +12
  124.  
  125. mltBC_HL:
  126. ;;Assume 16-bit Z80-mode
  127. ;Returns BC*HL in DEBC
  128. ;54 or 56 t-states.
  129. ;30 bytes
  130.     ld d,c \ ld e,l \ mlt de \ push de ;11
  131.     ld d,h \ ld e,b \ mlt de           ;8
  132.     ld a,l \ ld l,c \ ld c,a           ;3
  133.     mlt hl \ mlt bc \ add hl,bc        ;13
  134.     jr nc,$+3 \ inc de \ pop bc        ;6
  135.     ld a,b \ add a,l \ ld b,a          ;3
  136.     ld a,e \ adc a,h \ ld e,a          ;3
  137.     ret nc \ inc d \ ret               ;7 (+2 for carry)
RAW Paste Data
Top