Advertisement
Guest User

Untitled

a guest
Dec 17th, 2017
470
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. ; ------------------------------------------------------------------------------
  2. ; Nemesis decompression routine
  3. ; ------------------------------------------------------------------------------
  4. ; Optimized by vladikcomper, further optimizations & comments by carljr17
  5. ; ------------------------------------------------------------------------------
  6.  
  7. NemDec_RAM:
  8.  
  9.         ; INPUT:
  10.         ;   a0 -> number of patterns + mode, followed by compressed data
  11.         ;   a4 -> RAM to output patterns to
  12.  
  13.         movem.l d0-d6/a0-a1/a3-a5,-(sp)     ; 8 + 8*12 = 104
  14.  
  15.         lea     NemDec_WriteRowToRAM(pc),a3 ; 8
  16.  
  17.         bra.s   NemDec_Main                 ; 10
  18.  
  19. ; ------------------------------------------------------------------------------
  20.  
  21. NemDec:
  22.  
  23.         ; INPUT:
  24.         ;   a0 -> number of patterns + mode, followed by compressed data
  25.  
  26.         movem.l d0-d6/a0-a1/a3-a5,-(sp)
  27.  
  28.         lea     NemDec_WriteRowToVDP(pc),a3 ;  8 a3  = jump address for pixel row writes
  29.         lea     $C00000,a4                  ; 12 a4 -> [vdp_data_port]
  30. NemDec_Main:
  31.         ; INPUT:
  32.         ;   a3  = NemDec_WriteRowToRAM/VDP
  33.         ;   a4 -> [$C00000] (vdp_data_port) (or anywhere in RAM)
  34.  
  35.         lea     $FFFFAA00.w,a1          ;  8 a1 -> [$FFAA00] (Nemesis decompression buffer)
  36.  
  37.         move.w  (a0)+,d3                ;  8 d3 <- #/patterns (MSB set for Mode 1)
  38.         bpl.s   @0                      ; 10/8 d3.15 == 1 ? no, so in Mode 0 (not based on changes between rows)
  39.  
  40.         lea     NemDec_WriteRowToVDP_XOR-NemDec_WriteRowToVDP(a3),a3 ;  8 yes, so in Mode 1 (each row XOR'd with last, only changes recorded)
  41. @0:
  42.         lsl.w   #3,d3                   ; 6 + 2*3 = 12 -.
  43.         subq.w  #1,d3                   ;            4 -' d3 = #/patterns * 8 (= number of rows to plot), minus 1
  44.  
  45.         bsr.w   NemDec4                 ; 18 d0-d2/d5/a5 build opcode-to-count/color lookup table
  46.  
  47.         moveq   #0,d2                   ; for use with Mode 1 only (XOR with first row)
  48.         moveq   #1,d4                   ; set stop bit (nybble counter) -- 8 pixels per row
  49.  
  50.         move.b  (a0)+,d5                ; -. get first two bytes of compressed data
  51.         asl.w   #8,d5                   ;  : (can't we replace this with move.w (a0)+,d5 !?)
  52.         move.b  (a0)+,d5                ; -'
  53.  
  54.         moveq   #16,d6                  ; 16 bits ready, set initial shift value
  55.  
  56.         bsr.s   NemDec2                 ; read in from bit stream of opcodes/in-line data, output rows
  57.  
  58.         movem.l (sp)+,d0-d6/a0-a1/a3-a5
  59.  
  60.         rts
  61.  
  62. ; ---------------------------------------------------------------------------
  63. ; Part of the Nemesis decompressor, processes the actual compressed data
  64. ; ---------------------------------------------------------------------------
  65.  
  66. NemDec3: ; this is an entry point for compatibility with code in sonic.asm
  67.  
  68.         move.w  a5,d3                   ;  4 -. number of rows to plot from a5 (for compatibility)
  69.         subq.w  #1,d3                   ;  4 -'
  70.         moveq   #1,d4                   ;  4 reset 8-pixel row, set stop bit/nybble counter
  71.  
  72.         eor.b   d1,d4                   ;  4 cheaper than a branch around next instruction
  73. NemDec_WritePixelLoop:
  74.         eor.b   d1,d4                   ;  4 d4.l = ........ ........ .......! 00001111 etc.
  75. NemDec3_1:
  76.         dbra    d0,NemDec_WritePixel    ; 10/12 --repeat count == -1 ? no, so plot another pixel
  77. NemDec2:
  78.         ; INPUT:
  79.  
  80.         ;   a0 -> stream of bits -- opcodes and/or in-line data
  81.         ;   a1 -> [$FFAA000] (LUT: opcode to palette, repeat count, & opcode width, 512 bytes)
  82.         ;   a3  = NemDec_WriteRowToRAM/VDP/_XOR
  83.         ;   a4 -> [$C00000] (vdp_data_port) (or anywhere in RAM)
  84.  
  85.         ;   d2.l  = previous row of 8 pixels, or 0 if first row; XOR'd with new row (mode 1 only)
  86.         ;   d3.w  = number of patterns * 8 (= number of rows to plot), minus 1
  87.         ;   d4.l  = holds 8 pixel nibbles, initially 1 (used as a stop bit or nybble counter)
  88.         ;   d5.w  = [a0 - 2] (first 16 bits of stream, read left to right)
  89.         ;   d6.b  = #/bits remaining to process, initially 16
  90.  
  91.         ; TRASHES: d0-d1
  92.  
  93.         sub.b   #9,d6                   ;  8 get right shift value to peek ahead at next 9 bits
  94.         move.w  d5,d0                   ;  4    -. left-justify opcode (high bit in bit #8),
  95.         lsr.w   d6,d0                   ;  6+2n -' followed by 1 or more unrelated, unprocessed bits
  96.  
  97.         andi.w  #$01FE,d0               ;  8 isolate 8 bits for lookup: opcode and 1 or more unrelated bits
  98.         sub.b   0(a1,d0.w),d6           ; 14 subtract opcode width, minus 9, from #/bits remaining
  99.         move.b  1(a1,d0.w),d0           ; 14 d0.w <- .......? irrrpppp
  100.         bpl.s   NemDec_NotInline        ; if i == 1, inline: next 7 bits are palette + count (rrrpppp)
  101.  
  102.         cmpi.b  #9,d6                   ; 9 or more bits still available ?
  103.         bcc.s   @0                      ; no,  so not enough room to read next byte
  104.  
  105.         addq.b  #8,d6                   ; 8 new bits, about to be read in below
  106.         asl.w   #8,d5                   ; shift all remaining bits into high byte
  107.         move.b  (a0)+,d5                ; get next 8 bits into low of d5
  108. @0:
  109.         subq.b  #7,d6                   ; 7 bits needed for inline data itself (palette + count) * (could be combined in above sub)
  110.         move.w  d5,d0
  111.         lsr.w   d6,d0                   ; shift so that low bit rrrpppp is in bit position 0
  112. NemDec_NotInline:
  113.         cmpi.b  #9,d6                   ; 9 or more bits still available ?
  114.         bcc.s   @1                      ; no,  so not enough room for read next byte
  115.  
  116.         addq.b  #8,d6                   ; 8 new bits, about to be read in below
  117.         asl.w   #8,d5                   ; shift all remaining bits into high byte
  118.         move.b  (a0)+,d5                ; get next 8 bits into low of d5
  119. @1:
  120.         move.b  d0,d1                   ; d1.w  = ???????? ?rrrpppp
  121.         andi.b  #$0F,d1                 ;       = ???????? ....pppp (palette index)
  122.         andi.w  #$0070,d0               ; d0.w  = ........ .rrr....
  123.         lsr.w   #4,d0                   ;       = ........ .....rrr (repeat count, minus 1; clear bits 8-15)
  124. NemDec_WritePixel:
  125.         lsl.l   #4,d4                   ; d4.l = ........ ........ .......! 0000....
  126.         bcc.s   NemDec_WritePixelLoop   ; 10/8
  127.  
  128.         or.b    d1,d4                   ; d4.l = 00001111 22223333 44445555 66667777
  129.  
  130.         jmp     (a3)                    ; 8
  131.  
  132. ; ---------------------------------------------------------------------------
  133.  
  134. NemDec_WriteRowToVDP:
  135.  
  136. loc_1502:
  137.  
  138.         move.l  d4,(a4)                 ; write 8-pixel row to VDP control port
  139.  
  140.         moveq   #1,d4                   ;  4 reset 8-pixel row, set stop bit/nybble counter
  141.  
  142.         dbra    d3,NemDec3_1            ; 10/12 have all 8-pixel rows been written ? if yes, branch
  143.  
  144.         rts
  145.  
  146. NemDec_WriteRowToVDP_XOR:
  147.  
  148.         eor.l   d4,d2                   ; XOR the previous row by the current row
  149.         move.l  d2,(a4)                 ; write new current row to VDP control port
  150.  
  151.         moveq   #1,d4                   ;  4 reset 8-pixel row, set stop bit/nybble counter
  152.  
  153.         dbra    d3,NemDec3_1            ; 10/12 have all 8-pixel rows been written ? if yes, branch
  154.  
  155.         rts
  156.  
  157. ; ---------------------------------------------------------------------------
  158.  
  159. NemDec_WriteRowToRAM:
  160.  
  161.         move.l  d4,(a4)+                ; write 8-pixel row to RAM, with post-increment
  162.  
  163.         moveq   #1,d4                   ;  4 reset 8-pixel row, set stop bit/nybble counter
  164.  
  165.         dbra    d3,NemDec3_1            ; 10/12 have all 8-pixel rows been written ? if yes, branch
  166.  
  167.         rts
  168.  
  169. NemDec_WriteRowToRAM_XOR:
  170.  
  171.         eor.l   d4,d2                   ; XOR the previous row by the current row
  172.         move.l  d2,(a4)+                ; write new current row to RAM with post-increment
  173.  
  174.         moveq   #1,d4                   ;  4 reset 8-pixel row, set stop bit/nybble counter
  175.  
  176.         dbra    d3,NemDec3_1            ; 10/12 have all 8-pixel rows been written ? if yes, branch
  177.  
  178.         rts
  179.  
  180. ; ---------------------------------------------------------------------------
  181. ; Part of the Nemesis decompressor, builds the code table (in RAM)
  182. ; ---------------------------------------------------------------------------
  183.  
  184. NemDec4:
  185.  
  186.         ; INPUT:
  187.  
  188.         ;   a0 -> !...pppp (palette index -- 1st need not have bit #7 set)
  189.         ;         .rrrcccc (repeat count + code length)
  190.         ;         cccccccc (opcode, mysteriously right-justified)
  191.         ;         .rrrcccc (repeat count + code length)
  192.         ;         cccccccc (opcode, mysteriously right-justified)
  193.         ;         ...
  194.         ;         $FF      (end-of-table)
  195.         ;   a1 -> [$FFAA000] (nemesis decompression buffer -- actually 512 byte opcode-to-count/color LUT)
  196.  
  197.         ; TRASHES: d0-2/d5/a5
  198.  
  199.         lea     $01F8(a1),a5            ;  8 point to last four entries of table (used to flag inline data)
  200.         move.l  #$FDFFFDFF,d5           ; 12 set cccc (opcode width) = 6 (minus 9), i (inline) = 1, rrrpppp = don't care
  201.         move.l  d5,(a5)+                ; 12 [$01F8 + 0/2].w <- !!!!cccc irrrpppp
  202.         move.l  d5,(a5)+                ; 12 [$01F8 + 4/6].w <- !!!!cccc irrrpppp
  203.  
  204.         move.b  (a0)+,d0                ;  8 read 1st byte (????pppp or $FF for end-of-table)
  205.  
  206.         bra.s   @ChkEnd                 ; 10 if d0.7 == 1, end of table ($FF) OR new palette index (!???pppp)
  207.  
  208. @ItemLoop:
  209.  
  210.         move.b  d0,d5                   ;  4 d5.w = ???????? .rrrcccc (rrr = repeat count, cccc = opcode width)
  211.         andi.w  #$000F,d2               ;  8 d2.w = ........ ....pppp
  212.         andi.w  #$0070,d0               ;  8 d0.w = ........ .rrr.... (note bits 8-15 cleared for below)
  213.         or.b    d0,d2                   ;  4 d2.w = ........ .rrrpppp
  214.  
  215.         eor.b   d0,d5                   ;  4 d5.w = ???????? ....cccc
  216.  
  217.         moveq   #8,d1                   ;  4 -. d1.b = 8 - opcode width = shift left count (for left-justify)
  218.         sub.b   d5,d1                   ;  4 -' i.e., 12345678 -> 76543210
  219.  
  220.         sub.b   #9,d5                   ;  8 opcode width - 9 needed for NemDec2 *
  221.         lsl.w   #8,d5                   ; 6 + 2*8 = 22 d5.w = !!!!cccc ........
  222.         or.w    d5,d2                   ;  4           d2.w = !!!!cccc .rrrpppp (table entry)
  223.  
  224.         move.b  (a0)+,d0                ;  8 get actual opcode (mysteriously not shifted into position)
  225.         lsl.b   d1,d0                   ; 6 + 2*d1 d0.b = opcode << ( 8 - opcode width ) (= left justify opcode; but why isn't this just pre-shifted ?)
  226.         add.w   d0,d0                   ;  4 double to index into table of words
  227.  
  228.         moveq   #0,d5                   ;  4 -.
  229.         bset    d1,d5                   ;  6  : d5 = 1 << ( 8 - opcode length ) - 1
  230.         subq.b  #1,d5                   ;  4 -'    = #/times to repeat table entry
  231.  
  232.         lea     (a1,d0.w),a5            ; 12 (versus 4 move.w a1, a5 + 8 add.w d0, a5 = 12)
  233. @ItemShortCodeLoop:
  234.         move.w  d2,(a5)+                ;  8 store entry: !!!!cccc .rrrpppp
  235.         dbra    d5,@ItemShortCodeLoop   ; 10/14 repeat for required number of entries
  236. @ChkNext:
  237.         move.b  (a0)+,d0                ;  8 palette index, repeat count/opcode width, or end-of-table
  238.         bpl.s   @ItemLoop               ; 10/8 if d0.7 == 0, repeat count/opcode width
  239. @ChkEnd:
  240.         move.b  d0,d2                   ;  4 d2.w = ???????? ????pppp (pppp = palette index)
  241.  
  242.         not.b   d0                      ;  4 reached end-of-table ?
  243.         bne.s   @ChkNext                ; 10/8 yes, so return to caller (RTS)
  244.  
  245.         rts                             ; return to caller
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement