eZ80 Single Precision Multiply

mlt_single:
;;Expect ADL mode
;;Single precision floating point multiply
;;Floats are similar to IEEE-754 standard except how it is stored on tape, taking advantage of the little-endian processor
;;  CCCCCCCC BBBBBBBB AAAAAAAS EEEEEEEE
;; Cs refer to bits 0:7 of the mantissa
;; Bs refer to bits 8:15 of the mantissa
;; As refer to bits 16:22 of the mantissa, with an implicit bit 23
;; S is the sign and is stored in the high bit of the third byte of the mantissa
;; E is the 8-bit exponent with bias of 127
;;Inputs: IX points to the first float, IX+4 points to the second float
;;        cmath flags indicate desired rounding mode
;;          rnd_default = 1 if using "round to nearest, ties to even"
;;                        0 otherwise
;;          if "bit rndDefault,(iy+cmath)" = 0, then
;;                 rndBit0,rndBit1 = 0,0 implies round to nearest, ties round away from zero
;;                 rndBit0,rndBit1 = 1,0 implies truncate
;;                 rndBit0,rndBit1 = 0,1 implies round up
;;                 rndBit0,rndBit1 = 1,1 implies round down
;;Outputs: HL is the sign and mantissa, A is the exponent

;Get the sign of the result
    ld a,(ix+2) \ xor (ix+6) \ push af
;set implicit bits
    set 7,(ix+2) \ set 7,(ix+6)
    call.is mlt24
    pop de
    ld hl,(t2+1)
    ld c,(ix+3) \ ld b,(ix+7)
    ld a,c \ xor b \ jp m,spos
;signs are the same, struggle is real
    xor b \ add a,b
    jr nc,$+8 \ jp m,overflow \ jr spos+2
    jp p,underflow \ jr spos+2
.spos:
    xor b \ add a,b \ add a,0x80 \ ld d,a
    ld a,(t2)
    call cmathAdjust
    add hl,bc


cmathAdjust:
;;ADL mode
;;Inputs: HL as the 24-bit mantissa
;;        A[7:5] are the sticky bits for rounding
;;        D is the exponent
;;        bit 7 of E is the sign
;;        cmath flags set as desired
;;Output: HL is the sign and mantissa, A is the exponent.
;;        Normalized, rounded

;time to round based on rounding mode
;optimized based on logic
    ld bc,0
    and 0xC0
    add a,a
    jr nc,roundready

    bit rndDefault,(iy+cmath)
    jr z,roundDef
    bit rndBit1,(iy+cmath)
    jr z,roundready-1

    ld a,e \ add a,a
    bit rndBit0,d
    jr nz,rd
;rndBit0,rndBit1 = 0,1 implies round up
    jr nc,roundready-1
    dec bc
    jr roundready
.rd
;rndBit0,rndBit1 = 1,1 implies round down
    jr c,roundready-1
    dec bc
    jr roundready

.roundDef:
    add a,a
    jr c,$+6
    ld a,l
    rra
    jr nc,$+3
    inc c

.roundready:
    or a
    adc hl,bc
    ld bc,0x800000
;if zero, increment exponent, and finish
;if sign=1, finish
;if sign=0, renormalize
    jr nz,.cs
    add hl,bc
    inc d
    jp z, overflow
.exit
    ld a,d
    rlc e
    ret c
    add hl,bc
    ret
.cs
    jp m,exit
    or a
    dec d \ jp z,underflow \ adc hl,hl \ jp p,$-7 \ jr exit
mlt24:
;; expected in Z80 mode
;Primarily intended for use with single precision floats where the mantissa is little-endian and the floats are stored consecutively in 32-bit spaces
;Inputs: IX points to the LSB of the first float
;Outputs: t0,t1,t2 are consecutive 16-bit words in RAM and contain the 48-bit result in little-endian
;182cc up to 206cc
;90 bytes
    ld h,(ix) \ ld l,(ix+4) \ mlt hl \ ld (t0),hl   ;4+4+6+5 = 19
    ld hl,(ix+1) \ ld bc,(ix+5) \ call mulBC_HL     ;5+5+59(+2)=69(+2)
    ld (t1),bc \ ld (t2),de                         ;6+6     = 12

    ld hl,(ix+1) \ ld e,(ix+4) \ ld d,l \ ld l,e \ mlt hl \ mlt de ;5+4+1+1+6+6 = 23
    ld a,(t0+1) \ add a,e \ ld bc,(t1) \ adc hl,bc \ ex de,hl      ;4+1+6+2+1   = 14
    jr nc,- \ ld hl,(t2) \ inc hl \ ld (t2),hl                     ;3 or 2+5+1+5= 3 or 13
.-
    ld hl,(ix+1) \ ld c,(ix+4) \ ld b,l \ ld l,c \ mlt hl \ mlt bc ;5+4+1+1+6+6 = 23
    add a,e \ ld (t0+1),a \ adc hl,de \ ld (t1),hl \ ret nc        ;1+4+2+5+7   = 19
    ld hl,(t2) \ inc hl \ ld (t2),hl \ ret                         ;-5+5+1+5+6  = or +12

mltBC_HL:
;;Assume 16-bit Z80-mode
;Returns BC*HL in DEBC
;54 or 56 t-states.
;30 bytes
    ld d,c \ ld e,l \ mlt de \ push de ;11
    ld d,h \ ld e,b \ mlt de           ;8
    ld a,l \ ld l,c \ ld c,a           ;3
    mlt hl \ mlt bc \ add hl,bc        ;13
    jr nc,$+3 \ inc de \ pop bc        ;6
    ld a,b \ add a,l \ ld b,a          ;3
    ld a,e \ adc a,h \ ld e,a          ;3
    ret nc \ inc d \ ret               ;7 (+2 for carry)