Advertisement
Zeda

distance

Feb 3rd, 2015
639
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. distance:
  2. ;;Inputs: B is the difference between X coordinates
  3. ;;        C is the difference between Y coordinates
  4.     ld h,b \ ld e,b \ call H_Times_E \ push hl
  5.     ld h,c \ ld e,c \ call H_Times_E \ pop de
  6.     add hl,de
  7. sqrt16:
  8. ;;Inputs: HL is the number to find the square root of
  9. ;;Outputs: A is the square root
  10. ;;111 bytes
  11. ;;555 t-states worst case
  12. ;zero some registers
  13.    xor a
  14.    ld c,a
  15.    ld d,a
  16.  
  17. ;move the LSB of the input into E for later use, then shift the LSB into L and load H with 0.
  18. ;H will be a carry register, where the bits in L are rotated in
  19.    ld e,l
  20.    ld l,h
  21.    ld h,c
  22.  
  23. ;Iteration 1 is optimised
  24. ; C is treated as the accumulator
  25.    add hl,hl
  26.    add hl,hl
  27.    sub h
  28.    jr nc,$+5
  29.    inc c
  30.    cpl
  31.    ld h,a
  32.  
  33. ;Iteration 2
  34. ; rotate in 2 more bits from the MSB of the input into H
  35.    add hl,hl
  36.    add hl,hl
  37. ; shift the accumulator
  38.    rl c
  39.    ld a,c
  40.    rla
  41. ; A is now double the shifted accumulator
  42.    sub h
  43. ; doubles as a comparison of the carry register (H) to double the accumulator
  44.    jr nc,$+5
  45. ; If the carry is > 2*accumulator, the bit in the accumulator needs to be 1:
  46.    inc c
  47. ; We need to perform H-(2C+1), but A=2C-H.
  48. ; We could do NEG to get A=H-2C, then DEC A, but NEG = CPL \ INC A
  49. ; NEG \ DEC A  =  CPL \ INC A \ DEC A
  50. ; So just use CPL, saving 8 t-states, 1 byte
  51.    cpl
  52.    ld h,a
  53.  
  54. ;Iteration 3
  55.    add hl,hl
  56.    add hl,hl
  57.    rl c
  58.    ld a,c
  59.    rla
  60.    sub h
  61.    jr nc,$+5
  62.    inc c
  63.    cpl
  64.    ld h,a
  65.  
  66. ;Iteration 4
  67.    add hl,hl
  68.    add hl,hl
  69.    rl c
  70.    ld a,c
  71.    rla
  72.    sub h
  73.    jr nc,$+5
  74.    inc c
  75.    cpl
  76.    ld h,a
  77.  
  78. ;L is 0, H is the current carry
  79. ;E is the lower 8 bits
  80. ; Load the next set of bits (LSB of input) into L so that they can be rotated into H
  81.    ld l,e
  82.  
  83. ;Iteration 5
  84.    add hl,hl
  85.    add hl,hl
  86.    rl c
  87.    ld a,c
  88.    rla
  89.    sub h
  90.    jr nc,$+5
  91.    inc c
  92.    cpl
  93.    ld h,a
  94.  
  95. ;Iteration 6
  96.    add hl,hl
  97.    add hl,hl
  98.    rl c
  99.    ld a,c
  100.    rla
  101.    sub h
  102.    jr nc,$+5
  103.    inc c
  104.    cpl
  105.    ld h,a
  106.  
  107. ;Iteration 7
  108. ; Now we need to start worrying about 8 bit overflow.
  109. ; In particular, the carry register, H should be ideally 9 bits for this iteration, 10 for the last.
  110. ; The accumulator, C, is 8 bits, but we need to compare H to 2*C, and 2*C is up to 9 bits on the last iteration.
  111. ;l has 4 more bits to rotate into h
  112.  
  113.    sla c \ ld a,c \ add a,a
  114.    add hl,hl
  115.    add hl,hl
  116.    jr nc,$+6
  117.    sub h \ jp $+6
  118.    sub h
  119.    jr nc,$+5
  120.    inc c
  121.    cpl
  122.    ld h,a
  123.  
  124. ;Iteration 8
  125. ; A lot of fancy stuff here
  126. ; D is 0, from way back at the beginning
  127. ; now I put H->E so that DE can hold the potentially 10-bit number
  128. ; Now C->A, L->H
  129. ; H thus has the last two bits of the input that need to be rotated into DE
  130. ; L has the value of the accumualtor which needs to be multiplied by 4 for a comparison to DE
  131. ; So 2 shifts of HL into DE results in DE holding the carry, HL holding 4*accumulated result!    
  132.    ld e,h
  133.    ld h,l
  134.    ld l,c
  135.       ld a,l
  136.    add hl,hl \ rl e \ rl d
  137.    add hl,hl \ rl e \ rl d
  138.    sbc hl,de
  139. ;the c flag now has the state of the last bit of the result, HL does not need to be restored.
  140.    rla
  141.    ret
  142. H_Times_E:
  143.     ld l,0 \ ld d,l
  144.     sla h \ jr nc,$+3 \ ld l,e
  145.     add hl,de \ jr nc,$+3 \ add hl,de
  146.     add hl,de \ jr nc,$+3 \ add hl,de
  147.     add hl,de \ jr nc,$+3 \ add hl,de
  148.     add hl,de \ jr nc,$+3 \ add hl,de
  149.     add hl,de \ jr nc,$+3 \ add hl,de
  150.     add hl,de \ jr nc,$+3 \ add hl,de
  151.     add hl,de \ ret nc \ add hl,de \ ret
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement