Advertisement
xiahanlu

GPU.inl

Jun 27th, 2019
580
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 128.32 KB | None | 0 0
  1. /* Copyripht 2019 moecmks (agalis01@outlook.com)
  2.    This file is part of ImpulseBoy.
  3.  
  4.    This program is free software; you can redistribute it and/or modify
  5.    it under the terms of the GNU General Public License as published by
  6.    the Free Software Foundation; either version 2, or (at your option)
  7.    any later version.
  8.  
  9.    This program is distributed in the hope that it will be useful,
  10.    but WITHOUT ANY WARRANTY; without even the implied warranty of
  11.    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  12.    GNU General Public License for more details.
  13.  
  14.    You should have received a copy of the GNU General Public License
  15.    along with this program; if not, write to the Free Software Foundation,
  16.    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  17.  
  18. #ifndef GPU_INL_H
  19. #define GPU_INL_H 1
  20.  
  21. #ifdef __cplusplus
  22. extern "C" {        
  23. #endif
  24.  
  25. #ifdef _WIN32
  26. # define _CRT_SECURE_NO_DEPRECATE
  27. #endif
  28.  
  29. #ifdef _WIN32
  30. # include <intrin.h>
  31. #endif
  32.  
  33. #include "stdint.h"
  34. #include <stdio.h>
  35. #include <stdlib.h>
  36. #include <string.h>
  37. #include <process.h>
  38. #include <assert.h>
  39. #include <string.h>
  40. #include <math.h>
  41.  
  42. /* XXX: This union requires memory continuity */
  43. union iopad_blk16 {
  44.   uint8_t blk8[2];
  45.   int8_t sblk8[2];
  46.   uint16_t blk;
  47.   int16_t sblk;
  48. };
  49. /* XXX: This union requires memory continuity */
  50. union iopad_blk32 {
  51.   uint8_t blk8[4];
  52.   int8_t sblk8[4];
  53.   uint16_t blk16[2];
  54.   int16_t sblk16[2];
  55.   uint32_t blk;
  56.   int32_t sblk;
  57. };
  58. union iopad_blk8 {
  59.   uint8_t blk;
  60.   int8_t sblk;
  61. };
  62.  
  63. #define fto_uint8p(x)((uint8_t *)(&(x)))
  64. #define fto_uint16p(x)((uint16_t *)(&(x)))
  65. #define fto_uint32p(x)((uint32_t *)(&(x)))
  66. #define fto_uint8pv(x)(*(uint8_t *)(&(x)))
  67. #define fto_uint16pv(x)(*(uint16_t *)(&(x)))
  68. #define fto_uint32pv(x)(*(uint32_t *)(&(x)))
  69. #define fto_uint64p(x)((uint64_t *)(&(x)))
  70. #define ref_to_uint8p(x)((uint8_t *)((x)))
  71. #define ref_to_uint16p(x)((uint16_t *)((x)))
  72. #define ref_to_uint32p(x)((uint32_t *)((x)))
  73. #define ref_to_uint64p(x)((uint64_t *)((x)))
  74.  
  75. #ifndef NULL_DEFINED
  76. # ifdef __cplusplus
  77. #  define null nullptr
  78. # else
  79. #  define null ((void *)0)
  80. # endif  
  81. #endif
  82.  
  83. typedef uintptr_t kable;
  84. #if !defined (BOOL_DEFINED) \
  85.  && !defined (__cplusplus)
  86.   # define true 1
  87.   # define false 0
  88. #endif
  89.  
  90. # define USR_IMPL
  91. #if defined (_MSC_VER) || defined (__ICC) || defined (__INTEL_COMPILER)
  92. # define finline static  __forceinline
  93. # define dinline         __declspec(noinline)
  94. # define callstd         __stdcall
  95. # define callc           __cdecl
  96. # define calign(x)      __declspec(align(x))
  97. # define cimpl          __declspec(dllexport)
  98. # define ctls                       __declspec(thread)
  99. #elif defined (__GNUC__) || defined (__GNUG__)
  100. # define finline static  __attribute__((always_inline))
  101. # define dinline         __attribute__((noinline))
  102. # define callstd         __attribute__((stdcall))
  103. # define callc           __attribute__((cdecl))
  104. # define calign(x)      __attribute__((aligned(x)))
  105. # define cimpl          __attribute__((dllexport))
  106. # define ctls                       __thread
  107. #else
  108. # error unsupported compiler!
  109. #endif  
  110.  
  111. #ifdef _WIN32
  112. # ifdef _MSC_VER
  113. #  ifdef _DEBUG
  114. #    define DEBUG_BREAK() __debugbreak()
  115. #  else
  116. #    define DEBUG_BREAK() ((void)0)
  117. #  endif
  118. # else
  119. #  define DEBUG_BREAK() ((void)0)  
  120. # endif
  121. #endif
  122.  
  123. #ifdef _WIN32
  124. # ifdef _MSC_VER
  125. #  define FORCE_BREAK() do { __asm { int 3 } } while (0)
  126. # else
  127. #  define FORCE_BREAK() ((void)0)  
  128. # endif
  129. #endif
  130.  
  131. #ifdef _DEBUG0
  132. # define DEBUG_OUT(...) printf (__VA_ARGS__)
  133. #else
  134. # define DEBUG_OUT(...) ((void)0)
  135. #endif
  136.  
  137. #define GPU_COL32_X8B8G8R8 0
  138. #define GPU_COL32_X8R8G8B8 1
  139. #define GPU_COL16_X1B5G5R5 2
  140. #define GPU_COL16_X1R5G5B5 3
  141. #define GPU_COL_DEFAULT GPU_COL16_X1R5G5B5
  142. typedef intptr_t GPU_COL_MOLD;
  143.  
  144. /* for gba::wait */
  145. #define WAIT_204_INSTRUCTION_CACHE_OPEN_MASK 0x4000
  146.  
  147. /* for gpu::status */
  148. #define GPU_STAT_VBL 0x01
  149. #define GPU_STAT_HBL 0x02
  150. #define GPU_STAT_LYCS 0x04
  151. #define GPU_STAT_VBL_IRQ 0x08
  152. #define GPU_STAT_HBL_IRQ 0x10
  153. #define GPU_STAT_LYC_IRQ 0x20
  154.  
  155. typedef int IO_WRITE_BLOCK;
  156. #define IO_WRITE_8 0
  157. #define IO_WRITE_16 1
  158. #define IO_WRITE_32 2
  159.  
  160. struct arm7 {
  161.   uint32_t regs[17];   /* Register r0-r12, SP, LR LINK, PC, CPSR */
  162.   uint32_t r812_t[10];/* Register Bank Cache r8-12 for FIQ mode*/
  163.   uint32_t r1314_t[12]; /* Register Bank Cache r13-r14*/
  164.   uint32_t spsr_t[6]; /* Register SPSR Bank*/
  165.   uint32_t opcode[2]; /*  Pipeline Opcode */
  166.   union iopad_blk32 ime;
  167.   union iopad_blk16 ie;
  168.   union iopad_blk16 ifs;
  169.   uint32_t waitState; /*  GamePAK's wait state */
  170.   uint32_t nextNoSeqFetch;  
  171.   struct gba *agb;
  172.   uint32_t unused;
  173. };
  174.  
  175. struct gpu_channel {
  176.   union iopad_blk16 ctl;
  177.   union iopad_blk32 loopy_dmy;
  178.   union iopad_blk32 loopy_dmx;
  179.   union iopad_blk16 loopy_x_shadow;
  180.   union iopad_blk16 loopy_y_shadow;
  181.   union iopad_blk16 loopy_x;
  182.   union iopad_blk16 loopy_y;
  183.   union iopad_blk32 dx_shadow;
  184.   union iopad_blk32 dmx_shadow;
  185.   union iopad_blk32 dy_shadow;
  186.   union iopad_blk32 dmy_shadow;
  187.   union iopad_blk16 dx;
  188.   union iopad_blk16 dmx;
  189.   union iopad_blk16 dy;
  190.   union iopad_blk16 dmy;
  191.   union iopad_blk32 ref_x;
  192.   union iopad_blk32 ref_y;
  193. };
  194. /* host device callback for lcd video */
  195. struct ppu_framebuffer {
  196.   int32_t w;
  197.   int32_t h;
  198.   union {
  199.     int16_t *pixel_16; /* gb, gbc, gba use X1B5G5R5 pixel format */
  200.     int32_t *pixel_32;
  201.     void *pixel;
  202.   };
  203.   int32_t pitch;
  204.   GPU_COL_MOLD clr_request;
  205. };
  206. /* Graphic Processors */
  207. struct gpu {
  208.   union iopad_blk16 ctl;
  209.   union iopad_blk16 col_swap;
  210.   union iopad_blk16 status;
  211.   union iopad_blk16 line;
  212.   union iopad_blk16 clip_x[2];
  213.   union iopad_blk16 clip_y[2];
  214.   union iopad_blk16 win_in;
  215.   union iopad_blk16 win_out;
  216.   union iopad_blk16 mosaic;
  217.   union iopad_blk16 bld_ctl;
  218.   union iopad_blk16 bld_args;
  219.   union iopad_blk16 bri_args;
  220.   union iopad_blk16 ugpio_4E;
  221.   union iopad_blk16 ugpio_56;
  222.  
  223.   uint8_t oam[0x400+32];/* 1K Object attr mem (OAM) */
  224.   uint8_t vram[0x20000+32]; /* 96K video memory (nametable and chr or bitmap data)*/
  225.   uint8_t palette[0x400+32]; /* 1K bg/sp palette */
  226.   uint8_t palette2[0x400+32]; /* dummy palette */
  227.   uint8_t *palette16_b; /* bank for col16*/
  228.   uint16_t palette3[0x400+32]; /* dummy palette */
  229.   uint16_t palette4[0x400+32]; /* dummy palette */
  230.  
  231.   union {
  232.     uint16_t vbufb[1024*640];
  233.     uint32_t vbufb32[1024*640];
  234.   };
  235.   union {
  236.     uint16_t vptr_cahce[1024];
  237.     uint32_t vptr_cahce32[1024];
  238.   };
  239.   union {
  240.     uint16_t *vbuf;
  241.     uint32_t vbuf32;
  242.   };
  243.   union {
  244.     uint16_t vptr_pitch;
  245.     uint32_t vptr_pitch32;
  246.   };
  247.   uint16_t *vattr;      
  248.   uint16_t *vptrcc;
  249.   uint16_t vattrb[4096+32];
  250.  
  251.   /*  d0:objwin
  252.        d1:win0
  253.        d2:win1
  254.        d3:sprite mask
  255.        d4:chan 0 mask
  256.        d5:chan 1 mask
  257.        d6:chan 2 mask
  258.        d7:chan 3 mask
  259.     */
  260. #define SOLID_MODE_OBJWIN_MASK 0x01
  261. #define SOLID_MODE_WIN0_MASK 0x02
  262. #define SOLID_MODE_WIN1_MASK 0x04
  263. #define SOLID_MODE_FULL_MASK (SOLID_MODE_OBJWIN_MASK |  SOLID_MODE_WIN0_MASK | SOLID_MODE_WIN1_MASK)
  264. #define SOLID_CHAN_SPRITE_MASK 0x08
  265. #define SOLID_CHAN_BG_BASE_MASK 0x10
  266. #define SOLID_CHAN_0_MASK 0x10
  267. #define SOLID_CHAN_1_MASK 0x20
  268. #define SOLID_CHAN_2_MASK 0x40
  269. #define SOLID_CHAN_3_MASK 0x80
  270. #define SOLID_CHAN_FULL_MASK (SOLID_CHAN_0_MASK | SOLID_CHAN_1_MASK | SOLID_CHAN_2_MASK | SOLID_CHAN_3_MASK | SOLID_CHAN_SPRITE_MASK)
  271.  
  272.   struct gpu_channel chan[5];
  273.   struct ppu_framebuffer vid_buf;
  274.   struct gba *agb;
  275.  
  276.   GPU_COL_MOLD mold;
  277. };
  278. struct gba {
  279.   struct arm7 arm7;
  280.   struct gpu gpu;
  281. };
  282.  
  283.  
  284. /* Copyripht 2019 moecmks (agalis01@outlook.com)
  285.    This file is part of ImpulseBoy.
  286.  
  287.    This program is free software; you can redistribute it and/or modify
  288.    it under the terms of the GNU General Public License as published by
  289.    the Free Software Foundation; either version 2, or (at your option)
  290.    any later version.
  291.  
  292.    This program is distributed in the hope that it will be useful,
  293.    but WITHOUT ANY WARRANTY; without even the implied warranty of
  294.    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  295.    GNU General Public License for more details.
  296.  
  297.    You should have received a copy of the GNU General Public License
  298.    along with this program; if not, write to the Free Software Foundation,
  299.    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
  300.  
  301. /* GPU Graphics Explanation
  302.  * The GPU of GBA is very complex.
  303.  * I'll try to explain something, if you can understand it.
  304.  *
  305.  * The display memory of GBA is 96KB, the first 64KB is used for map scrolling,
  306.  * and the latter 32K has been used for sprite rendering (OAM).
  307.  *
  308.  * GBA has four scroll (Bg0~Bg3) renderers and behaves differently in different modes
  309.  * There are six modes for map scanning
  310.  *
  311.  * Mode-0 Alive BG : Bg0 Bg1 Bg2 Bg3 - 2D Mapper Base NES/CGB Tile extend (4Bit/8Bit Switchable)  
  312.  * Mode-1 Alive BG : Bg0 Bg1 Bg2(2 is AffineBG), Bg3 unused
  313.  * Mode-2 Alive BG : Bg2 Bg3(2, 3 is AffineBG) Bg0, Bg1 unused
  314.  * Mode-3 240x160 pixels, 32768 colors 75KB (Only Bg2 alive)
  315.  * Mode-4 240x160 pixels, 255 colors 2 frame (Only Bg2 alive)
  316.  * Mode-5 160x128 pixels, 32768 colors 2 frame (Only Bg2 alive)
  317.  
  318.  * Imaging principle of map
  319.  * Maps can display four types of pixels
  320.  * PT 0: 16 PAL (Mode-0~Mode-2)
  321.  * PT 1: Common 256 PAL (Mode-0~Mode-2)
  322.  * PT 2: rgb15 pixel (Only for Mode-3 and Mode-5)
  323.  * PT 3: Special 256 PAL (Mode-4)
  324.  
  325.  * The map and the sprite each have a readable and writable rgb15 palette indexing 256 colors palette
  326.  * color format : d15                  d14       ....          d0
  327.  *                unused               bbbbb   ggggg rrrrr
  328.  *
  329.  
  330.  * PT0, PT1 Render (Not Affine Mode)
  331.  * In this mode, the minimum operating unit of a graph is an 8*8 set of small tile pixels,
  332.  * not a single pixel, as we often call it "Tile".
  333.  *
  334.  * a Tile := 8*8 Pixels set
  335.  * This Tile takes 32 bytes in 4Bit palette mode
  336.  * 64 bytes under 8Bit palette
  337.  *
  338.  *                             Pos 0  .........                Pos 7
  339.  * A 8Bit Mode's Tile Decode:= Line 0 Start:= Byte0  ... End Byte7
  340.                                  .
  341.                                  .
  342.                                  .
  343.                                Line 4 Start:= Byte32  ... End Byte39
  344.                                  .
  345.                                  .
  346.                                Line 7 Start:= Byte56  ... End Byte63
  347.                                
  348.                                Pos 0  .........                Pos 7
  349.  * A 4Bit Mode's Tile Decode:= Line 0 Start:= Byte0  ... End Byte3 (Per Byte's Low4Bit for left pos, other 4bit for right pos,
  350.                                  .                                     e.g. Byte 3, low4Bit for pos 6 high_4bit for pos7 )
  351.                                  .
  352.                                  .
  353.                                  .
  354.                                Line 4 Start:= Byte16  ... End Byte19
  355.                                  .
  356.                                  .
  357.                                Line 7 Start:= Byte29  ... End Byte31
  358.                        
  359.  * For 8Bit Mode, Directly indexed to the color value of the 15-bit palette, (No. 0 palette is always transparent, Even in 4bit mode)
  360.  * Fot 4Bit Mode, The palette color that starts with base_16 (4Bit nums) mul 16 add 16-bit index is indexed
  361.  * 4Bit Mode addressing e.g. base_16:= 5 4bit:= 3 result pixel := bg_palette[5*16+3]
  362.  *                  bg_palette[base_16]'s Pixel is always transparent (This base_16 will be explained in the next stage.)
  363.    
  364.  * the display memory has something called "Tile Id/attr table".
  365.  * We simply call it "NameTable" (Same as NES)
  366.  *
  367.  * NameTable a unit has two bytes
  368.  *
  369.  * NameTable's 2 Bytes attr (Not Affine Mode)
  370.  *
  371.  *     0-9   Tile ID    
  372.  *      10    Horizontal Flip
  373.  *      11    Vertical Flip  
  374.  *      12-15 Palette Number  base_16 4bit for 4bit mode render (8bit mode unused, Ignore It!)
  375.  *
  376.  * The GPU can address an 8*8 pixel tile block based on the Tile ID number inside the naming table,
  377.  * and then do a little flip according to the set of the other bit domains,
  378.  * or the programmer can rewrite base_16 to switch 16 bit palette slots (if some flickering effect is made), if it is in 4bit mode.
  379.  
  380.  * the display memory has something called "Character Table".
  381.  * We simply call it "ChrTable" (Same as NES)
  382.  *
  383.  * This is where the 8bit/4bit palette index is stored.
  384.  *
  385.  * for 8bit mode addressing char index := base_char_address + tid *64 (Per 8bit mode tile block have 64bytes for 8*8 pixel)
  386.  * for 4bit mode addressing char index := base_char_address + tid *32 (Per 4bit mode tile block have 32bytes for 8*8 pixel)  
  387.  * base_char_address are provided by GPU registers
  388.  
  389.  * Map Scroll and Nametable
  390.  * The unit size of a map in each map rendering channel is 256*256 display Pixels
  391.  * Maximum four units can be set (512*512 Pixels), only for 4 mode Selectable
  392.  
  393.  * Type 0 256 *256 pixels (Single Screen Mapper), 32*32 Tile (2K NameTable Bytes)
  394.    Type 1 512 *256 pixels (Horizontal Double Screen), 64*32 Tile (4K NameTable Bytes)
  395.    Type 2 256 *512 pixels (Vertical Double Screen), 32*64 Tile (4K NameTable Bytes)
  396.    Type 3 512 *512 pixels (Quadruple Screen), 64*64 Tile (8K NameTable Bytes)
  397.    
  398.    For Type 0 Scroll Exceed 256, Backup to pos 0 (both Horizontal Scroll, Vertical Scroll)
  399.    
  400.    For Type 1 Vertical Scroll Exceed 256, Backup to pos 0
  401.                Horizontal Scroll Exceed 512, Backup to pos 0
  402.                
  403.    For Type 2 Horizontal Scroll Exceed 256, Backup to pos 0
  404.                Vertical Scroll Exceed 512, Backup to pos 0
  405.                
  406.    For Type 3 Horizontal Scroll Exceed 512, Backup to pos 0
  407.                Vertical Scroll Exceed 512, Backup to pos 0  
  408.    
  409.    Address Mapper := BaseAddress + Unit ID *2K (Base addresses are provided by GPU registers)
  410.    
  411.    Type 0: Only 1 Unit
  412.    Type 1: 2 Units, left unit id:= 0 right unit id:= 1
  413.    Type 2: 2 Units, top unit id:= 0 bottom unit id:= 1
  414.    
  415.    Type 3: 4 Units, left-top unit id:= 0 right-top unit id:= 1  
  416.                      left-bottom unit id:= 2 right-bottom unit id:= 3
  417.  
  418.  * Sprite
  419.  * All 32K memory used is a character table
  420.  * Another 1K memory is used to describe the properties of the Sprites.
  421.  * Whether in 4bit/8bit mode, the Sprite's Tile * ID addressing is only * 32
  422.  * For the 8 bit palette, his lsb will be ignored (& - 2)
  423.  
  424.  * e.g. 8bit mode
  425.         char_base := 0x10000
  426.         tile id:= 126
  427.         char address := 0x10000 + 126*32
  428.    
  429.    
  430.  * In addition, Sprite's other feature is that many tiles can be glued together to
  431.     form a large collection of Tiles (that is, the complete console game character pixels).
  432.  
  433.     Tile id always represents the set's first Tile in the upper left corner
  434.      
  435.       Horizontal increase := 8Bit +2, 4Bit +1
  436.      
  437.       The remaining Tiles's ID can be addressed in two ways
  438.      
  439.       1: Fixed Pitch Mapper Addressing.
  440.          Pitch Always 32 in 4bit/8bit mode .
  441.          
  442.          e.g. 0 1 2 3 4 5 6 7 line 1
  443.               32 33 34 35 ..39 line 2 (this is 4bit mode, for 8bit Next incremental addressing in each horizontal direction+2)
  444.              
  445.          e.g. Tile Block Set := 2*4 Tiles (16 *64 Pixels)
  446.               Tile Id := 252
  447.               8Bit mode
  448.              
  449.               Pos (1, 2)'s ID := 2*32 (Y Pos 2 , Mul Pitch)
  450.                            +     1*2  (X Pos 1 , * 2 [8Bit per +2])
  451.                            +     252  (Base Tile ID)
  452.                                              
  453.          e.g. Tile Block Set := 4*4 Tiles (16 *64 Pixels)
  454.               Tile Id := 177
  455.               4Bit mode
  456.              
  457.               Pos (3, 3)'s ID := 3*32 (Y Pos 3 , Mul Pitch)
  458.                            +     3*1  (X Pos 1 , * 1 [4Bit per +1])
  459.                            +     177  (Base Tile ID)                                
  460.          
  461.       2: Linear  Mapper Addressing.
  462.          
  463.          Even on the next line, TileID is only +2, +1 each time.
  464.          
  465.          e.g. 1 2 3 4 5 6 7 line 1
  466.               8 9 10 ......15 line 2 (this is 4bit mode, for 8bit Next incremental addressing in each horizontal direction+2)
  467.  
  468.          e.g. Tile Block Set := 4*8 Tiles (16 *64 Pixels)
  469.               Tile Id := 66
  470.               8Bit mode
  471.              
  472.               Pos (0, 1)'s ID := 2*8 (Y Pos 1 , To Next line, 8+1*2)
  473.                            +     0  (X Pos 0 nodone)
  474.                            +     66  (Base Tile ID)          
  475.      
  476.  
  477.       * Sprite Affine
  478.       * The direction of the coordinate vector of the fairy affine is
  479.       *                                
  480.                        Y Negtive
  481.                       /|\
  482.                      
  483.                        |
  484.                        |
  485.     X Negtive <-  -----|-----  -> X Postive
  486.                        |\
  487.                        | \_____________ Origin (0, 0) is Sprite's central coordinate
  488.                      
  489.                       \|/
  490.                        Y Postive
  491.                    
  492.     For affine, Sprite only needs these four parameters              
  493.     The direction of expansion of vectors increases from the center to the outside
  494.    
  495.     PA - dx Incremental position vector for each horizontal pixel (X)
  496.     PB - dmx Incrementally incremental position vectors for each switch to the next row of pixels (X)
  497.     PC - dy Incremental position vector for each horizontal pixel (Y)
  498.     PD - dmy Incrementally incremental position vectors for each switch to the next row of pixels (Y)
  499.  
  500.     e.g. dx := 1
  501.          dmy := 1
  502.          dy := 0
  503.          dmx := 0 <-------------------- Standard render
  504.          
  505.     e.g. dx := -1
  506.          dmy := 1
  507.          dy := 0
  508.          dmx := 0 <-------------------- Horizontal Flip render        
  509.          
  510.     e.g. dx := 1
  511.          dmy :=-1
  512.          dy := 0
  513.          dmx := 0 <-------------------- Vertical Flip render      
  514.          
  515.     (Note that the last 8 bits of the parameters in GBA are all non-integers (used to simulate decimals))
  516.    
  517.     The matrix expression is as follows (from Nintendo's official programming manual)
  518.    
  519.     0: Center pos
  520.     1: Origin Pos
  521.     2: New Pos
  522.     a: x direction pixel stretch
  523.     b: y direction pixel stretch
  524.    
  525.     | x2 |     | PA PB |   |x1 - x0|   |x0|
  526.     |    | :=  |       | * |       | + |  |
  527.     | y2 |     | PC PD |   |y1 - y0|   |y0|
  528.  
  529.           1
  530.     PA := cos(Angle)
  531.           a
  532.  
  533.           1
  534.     PB := sin(Angle)
  535.           a
  536.      
  537.          -1
  538.     PC :=  sin(Angle)
  539.           b
  540.    
  541.           1
  542.     PD :=  cos(Angle)
  543.           b
  544.  
  545.     Pixels that rotate beyond their original coordinate range are clipped.
  546.    
  547.      * Double Sprite
  548.      * X and Y are double canvas size
  549.      * Note that this adds only the area of the canvas and does not magnify the original sprite pixels.
  550.      * If in Double Sprite mode, position is unchanged, the coordinates of the center point will be added half the size of the X and Y vectors.
  551.    
  552.      * e.g.
  553.    
  554.       dx := 1
  555.       dmy := 1
  556.       dy := 0
  557.       dmx := 0 <-------------------- Standard render
  558.  
  559.       pos x:= 128
  560.       pos y:= 128  
  561.  
  562.       sprite size := 64, 64
  563.  
  564.       middle point := 160, 160
  565.       canvas range x:= 128~192
  566.       canvas range y:= 128~192  
  567.  
  568.       double it !
  569.  
  570.       pos x:= 128
  571.       pos y:= 128  
  572.  
  573.       sprite size := 64, 64
  574.       middle point := 192, 192
  575.       canvas range x:= 128~256
  576.       canvas range y:= 128~256
  577.  
  578.       BUG:
  579.       In Double Sprite Mode
  580.  
  581.       If the sprite is in the scanning line 128-160,
  582.       This part of the pixel sprite will not be displayed, of course,
  583.       rest pixel of the this a sprite is normal displayed.
  584.      
  585.      
  586.      */
  587.  
  588.      /* drawsolid status and pri
  589.          winobj -> winout, winobj draw
  590.          win0-> winout, winobj, win1, win0
  591.          win1-> winout, winobj, win1,
  592.          winout-> only winout.
  593.  
  594.          pri 0 win0
  595.          pri 1 win1
  596.          pri 2 winobj
  597.          pri 3 winout
  598.       */
  599.  
  600. /* TODO: mosaic and more mode */
  601. /* TODO: OAM 128*128 Pixel BUG, (for Bios Boot dra3w logo)*/
  602. /* TODO: Imm Write Pixel when pixel is backdrop */
  603. struct rasterizer_caps {
  604.   uintptr_t order;
  605.   uintptr_t sub_order;
  606.   uintptr_t chanId;
  607.   uintptr_t chr_base;
  608.   uintptr_t opca; /* 0: normal filter 1: alpha 2: bright_inc 3: bright_dec */
  609.   uintptr_t winId;
  610.   uint16_t solid;
  611.   uintptr_t interp_yvec;
  612.   uintptr_t interp_xvec;
  613.   kable interp_x;
  614.   kable interp_y;
  615.   struct gpu_channel *chan;
  616.   struct gpu *gpu;
  617.   uintptr_t ntbank[4];
  618. };
  619.  
  620. finline void
  621. bg_mosaic (struct rasterizer_caps *rtc, uint16_t attr_mask) {
  622.   if (rtc->chan->ctl.blk >> 6 & 1) {
  623.     uintptr_t xvec = rtc->gpu->mosaic.blk & 15;
  624.     uintptr_t yvec = rtc->gpu->mosaic.blk >> 4 & 15;
  625.  
  626.  
  627.  
  628.  
  629.  
  630.  
  631.   }
  632.  
  633.  
  634.  
  635.  
  636. }
  637.  
  638. finline void
  639. sp_mosaic (uint16_t attr_mask, uint16_t x_pos, uint16_t vec) {
  640.  
  641.  
  642.  
  643.  
  644.  
  645.  
  646.  
  647.  
  648. }
  649.  
  650.  
  651.  
  652.  
  653.  
  654.  
  655.  
  656. finline
  657. void nt_bank_settings (struct rasterizer_caps *rtc) {
  658.   uintptr_t nt_base =  (rtc->chan->ctl.blk >> 8 & 31) * 0x800;
  659.   switch (rtc->chan->ctl.blk & 0xC000) {
  660.   case 0x0000:
  661.     /* Single nametable 256 * 256 */
  662.     rtc->ntbank[0] = 0x0000;
  663.     rtc->ntbank[1] = 0x0000;
  664.     rtc->ntbank[2] = 0x0000;
  665.     rtc->ntbank[3] = 0x0000;
  666.     break;
  667.   case 0x4000:
  668.     /* Double nametable 512 * 256 (hori) */
  669.     rtc->ntbank[0] = 0x0000;
  670.     rtc->ntbank[1] = 0x0800;
  671.     rtc->ntbank[2] = 0x0000;
  672.     rtc->ntbank[3] = 0x0800;
  673.     break;
  674.   case 0x8000:
  675.     /* Double nametable 256 * 512 (vert) */
  676.     rtc->ntbank[0] = 0x0000;
  677.     rtc->ntbank[1] = 0x0000;
  678.     rtc->ntbank[2] = 0x0800;
  679.     rtc->ntbank[3] = 0x0800;
  680.     break;
  681.   case 0xC000:
  682.     /* Quad nametable 512 * 512 (four screen) */
  683.     rtc->ntbank[0] = 0x0000;
  684.     rtc->ntbank[1] = 0x0800;
  685.     rtc->ntbank[2] = 0x1000;
  686.     rtc->ntbank[3] = 0x1800;
  687.     break;
  688.   default:
  689.     DEBUG_BREAK ();
  690.   }
  691.   rtc->ntbank[0] += nt_base;
  692.   rtc->ntbank[1] += nt_base;
  693.   rtc->ntbank[2] += nt_base;
  694.   rtc->ntbank[3] += nt_base;
  695. }
  696.  
  697. finline
  698. uint16_t GetAttrWord (struct rasterizer_caps *rtc, uintptr_t tile_x, uintptr_t tile_y) {
  699.   uintptr_t hori_bank = tile_x >> 5 & 1;
  700.   uintptr_t vert_bank = tile_y >> 5 & 1;
  701.   uintptr_t offx_page = tile_x & 31;
  702.   uintptr_t offy_page = tile_y & 31;
  703.   uintptr_t addr_base =  rtc->ntbank[(vert_bank<<1)+hori_bank];
  704.   uintptr_t addr_total = addr_base + (offy_page << 6) + (offx_page << 1);
  705.   return * (uint16_t *) &rtc->gpu->vram[addr_total & 0xFFFF];
  706. }
  707.  
  708. finline
  709. void *GetBG_Chr16 (struct rasterizer_caps *rtc, uint16_t attrWord, uintptr_t offsetY) {
  710.   uintptr_t chrAddr = (rtc->chr_base + (attrWord & 1023) *32);
  711.   chrAddr += offsetY <<  2;
  712.   return & rtc->gpu->vram[chrAddr&0xFFFF];
  713. }
  714.  
  715. finline
  716. void *GetBG_Chr256 (struct rasterizer_caps *rtc, uint16_t attrWord, uintptr_t offsetY) {
  717.   uintptr_t chrAddr = (rtc->chr_base + (attrWord & 1023) *64);
  718.   chrAddr += offsetY <<  3;
  719.   return & rtc->gpu->vram[chrAddr&0xFFFF];
  720. }
  721. finline
  722. void *GetSP_Chr16 (struct rasterizer_caps *rtc, uint16_t attrWord, uintptr_t offsetY) {
  723.   uintptr_t chrAddr = ((attrWord & 1023) *32);
  724.   chrAddr += offsetY <<  2;
  725.   return & rtc->gpu->vram[0x10000+chrAddr];
  726. }
  727. finline
  728. void *GetSP_Chr256 (struct rasterizer_caps *rtc, uint16_t attrWord, uintptr_t offsetY) {
  729.   uintptr_t chrAddr = ((attrWord & 1023) *32);
  730.   chrAddr += offsetY <<  3;
  731.   return & rtc->gpu->vram[0x10000+chrAddr];
  732. }
  733. finline
  734. uint16_t alpha16 (uint16_t tilePixel, uint16_t backdrop, uint16_t coeff1, uint16_t coeff2, uint16_t back_attr, uint16_t bld_map) {
  735.  
  736.   uint32_t u_out;
  737.   uint32_t v_out;
  738.   uint32_t   out;
  739.  
  740.   if (((back_attr & bld_map) == 0x00) || ((back_attr & SOLID_CHAN_FULL_MASK) == 0))
  741.     return tilePixel; /* TODO: BackDrop pixel ??*/
  742.  
  743.   u_out = tilePixel | tilePixel << 16;
  744.   u_out&= 0x3E07C1F;
  745.   u_out*= coeff1;
  746.   v_out = backdrop | backdrop << 16;
  747.   v_out&= 0x3E07C1F;
  748.   v_out*= coeff2;
  749.   /* 0000 0011 1110 0000 0111 1100 0001 1111*/
  750.   out = v_out +u_out >> 4;
  751.   /* Check pixel saturation */
  752. #if 0
  753.   if (out & 0x20)
  754.     out |= 0x1F; /* mod:1 */
  755.   if (out & 0x8000)
  756.     out |= 0x7C00;/* mod:4 */
  757.   if (out & 0x4000000)/* mod:2 */
  758.     out |=   0x3E00000;
  759. #else
  760.     out |= 0x4008020 - (out >> 5 & 0x200401);
  761. #endif
  762.   out&=    0x3E07C1F;
  763.   return out | out >> 16;
  764. }
  765. finline
  766. uint16_t brightness_inc16 (uint16_t tilePixel, uint16_t backdrop, uint16_t coeff1, uint16_t coeff2, uint16_t back_attr, uint16_t bld_map) {
  767.   uint32_t u_out;
  768.   uint32_t v_out;
  769.  
  770.   u_out = tilePixel | tilePixel << 16;
  771.   u_out&= 0x3E07C1F;
  772.   v_out = u_out ^ 0x3E07C1F;
  773.   v_out*= coeff1;
  774.   v_out += u_out << 4;
  775.   v_out>>= 4;
  776.   v_out&= 0x3E07C1F;
  777.   return v_out | v_out >> 16;
  778. }
  779. finline
  780. uint16_t brightness_dec16 (uint16_t tilePixel, uint16_t backdrop, uint16_t coeff1, uint16_t coeff2, uint16_t back_attr, uint16_t bld_map) {
  781.   int32_t out;
  782.   out = tilePixel | tilePixel << 16;
  783.   out&= 0x3E07C1F;
  784.   out*= 16 - coeff1;
  785.   out>>= 4;
  786.   out&= 0x3E07C1F;
  787.   return out | out >> 16;
  788. }
  789. finline
  790. uint16_t pixcpy (uint16_t tilePixel, uint16_t backdrop, uint16_t coeff1, uint16_t coeff2, uint16_t back_attr, uint16_t bld_map) {
  791.   return tilePixel;
  792. }
  793.  
  794. finline
  795. void CommonBG_Render (struct rasterizer_caps *rtc,        
  796.                             uint16_t ch_mode_mask,
  797.                             uint16_t bld_map,
  798.                             uint16_t coeff1,
  799.                                uint16_t coeff2,
  800.                              kable (*solid) (uint16_t back_attr),
  801.                               uint16_t (*filter) ( uint16_t tilePixel,
  802.                                                              uint16_t backdrop,
  803.                                                             uint16_t coeff1, uint16_t coeff2, uint16_t back_attr, uint16_t bld_map))
  804. {
  805.   const struct gpu *gpu= rtc->gpu;
  806.   const struct gpu_channel *chan = rtc->chan;
  807.            uint16_t *pal256 = (uint16_t *)& gpu->palette16_b[0];
  808.            uint8_t *vram = & rtc->gpu->vram[0];
  809.  
  810.   uintptr_t blk_x;
  811.   uintptr_t blk_y;
  812.   intptr_t vapos;
  813.   uint16_t *vptr;
  814.   uint16_t *vattr;                              
  815.   uintptr_t x_calc;
  816.   uintptr_t y_calc;
  817.   uintptr_t ymini_calc;                              
  818.   uintptr_t cnt;
  819.   uintptr_t id;
  820.   uint16_t vcache_bank[2];
  821.   uint16_t vchr_base;
  822.   uint16_t vchr_base_rev;
  823.  
  824.   if (rtc->interp_y != false) {
  825.     uintptr_t interp_mod;
  826.     blk_x = chan->loopy_x_shadow.blk & 511;
  827.     blk_y = chan->loopy_y_shadow.blk & 511;
  828.     vapos = - (intptr_t)(blk_x & 7);
  829.     vptr= & gpu->vbuf[rtc->gpu->vptr_pitch/2*gpu->line.blk + 8+vapos  ];    
  830.     vattr = & gpu->vattr[8+vapos ];                                        
  831.     x_calc = blk_x >> 3;
  832.     y_calc = gpu->line.blk + blk_y & 511;
  833.     interp_mod = y_calc % (rtc->interp_yvec + 1);
  834.     if (interp_mod != 0)
  835.       y_calc -= interp_mod;
  836.     y_calc &= 511;
  837.     ymini_calc = y_calc  & 7;    
  838.     y_calc = y_calc >> 3;
  839.   } else {
  840.     blk_x = chan->loopy_x_shadow.blk & 511;
  841.     blk_y = chan->loopy_y_shadow.blk & 511;
  842.     vapos = - (intptr_t)(blk_x & 7);
  843.     vptr= & gpu->vbuf[rtc->gpu->vptr_pitch/2*gpu->line.blk + 8+vapos  ];    
  844.     vattr = & gpu->vattr[8+vapos ];                                        
  845.     x_calc = blk_x >> 3;
  846.     y_calc = (gpu->line.blk + blk_y & 511) >> 3;
  847.     ymini_calc = blk_y +  gpu->line.blk & 7;                                  
  848.   }
  849.  
  850.   rtc->chr_base = (chan->ctl.blk >> 2 & 3) * 0x4000;
  851.   nt_bank_settings (rtc);
  852.  
  853.   cnt = (y_calc >> 5 & 1) <<1;
  854.   vcache_bank[0]= rtc->ntbank[cnt] + ((y_calc & 31) << 6);
  855.   vcache_bank[1]= rtc->ntbank[cnt + 1]+ ((y_calc & 31) << 6);
  856.  
  857.   if ((rtc->chan->ctl.blk & 0x80) == 0) {
  858.     vchr_base = rtc->chr_base + (ymini_calc << 2);
  859.     vchr_base_rev = rtc->chr_base + (7 - ymini_calc << 2);
  860.     for (cnt = 0; cnt != 248; cnt+= 8) {
  861.       uint16_t attr;
  862.       uint16_t *pal16;
  863.       uint8_t *pix16;
  864.       uint16_t *vpp;
  865.       uint16_t *app;
  866.  
  867.       /* get  attr word */
  868.       attr =* (uint16_t *) & vram [vcache_bank[x_calc >> 5 & 1] + ((x_calc & 31) << 1)& 0xFFFF];
  869.       /* get palette bank */
  870.       pal16 = & pal256[attr >> 12 << 4];
  871.       /* get chr pixel ptr */
  872.       if (attr & 0x800)
  873.         pix16 = & vram[vchr_base_rev + ((attr & 1023) << 5) & 0xFFFF];
  874.       else
  875.         pix16 = & vram[vchr_base + ((attr & 1023) << 5)& 0xFFFF];
  876.       vpp = & vptr[cnt];
  877.       app = & vattr[cnt];
  878.       x_calc++;
  879.  
  880.       if (attr & 0x400) {
  881. #if 0
  882.         for (id = 3; id!= (uintptr_t)-1; id--) {
  883.           uintptr_t hi = pix16[id] & 15;
  884.           uintptr_t lo = pix16[id] >> 4;
  885.           uintptr_t pos = 3-id  << 1;
  886.          
  887.           if (solid (app[pos])) {
  888.             if (lo) {
  889.               vpp[pos] = filter (pal16[lo], vpp[pos], coeff1, coeff2, app[pos], bld_map);  
  890.               app[pos] = ch_mode_mask;
  891.             }
  892.           }
  893.           if (solid (app[pos+1])) {
  894.             if (hi) {
  895.               vpp[pos+1] = filter (pal16[hi], vpp[pos+1], coeff1, coeff2, app[pos+1], bld_map);
  896.               app[pos+1] = ch_mode_mask;
  897.             }
  898.           }
  899.         }
  900. #else
  901. #undef T8SU_GU_
  902. #define T8SU_GU_(x, n)\
  903.   { \
  904.   uint8_t chr_mixer = pix16[x];\
  905.   uint8_t lo = chr_mixer >> 4;\
  906.   uint8_t hi = chr_mixer  &15;\
  907.   \
  908.   if (solid (app[n])) { \
  909.     if (lo) {\
  910.       vpp[n] = filter (pal16[lo], vpp[n], coeff1, coeff2, app[n], bld_map);  \
  911.       app[n] = ch_mode_mask;\
  912.     }\
  913.   } \
  914.   if (solid (app[n+1])) { \
  915.     if (hi) {\
  916.       vpp[n+1] = filter (pal16[hi], vpp[n+1], coeff1, coeff2, app[n+1], bld_map); \
  917.       app[n+1] = ch_mode_mask;\
  918.     }\
  919.   } \
  920. }
  921.         T8SU_GU_(3, 0)
  922.         T8SU_GU_(2, 2)
  923.         T8SU_GU_(1, 4)
  924.         T8SU_GU_(0, 6)
  925.  
  926. #endif
  927.       } else {
  928. #if   0
  929.         for (id = 0; id!= 4; id++) {
  930.           uintptr_t lo = pix16[id] & 15;
  931.           uintptr_t hi = pix16[id] >> 4;
  932.           uintptr_t pos = id << 1;
  933.  
  934.           if (solid (app[pos])) {
  935.             if (lo) {
  936.               vpp[pos] = filter (pal16[lo], vpp[pos], coeff1, coeff2, app[pos], bld_map);  
  937.               app[pos] = ch_mode_mask;
  938.             }
  939.           }
  940.           if (solid (app[pos+1])) {
  941.             if (hi) {
  942.               vpp[pos+1] = filter (pal16[hi], vpp[pos+1], coeff1, coeff2, app[pos+1], bld_map);
  943.               app[pos+1] = ch_mode_mask;
  944.             }
  945.           }
  946.         }
  947.  
  948. #else
  949. #undef T8SU_GU_
  950. #define T8SU_GU_(x, n)\
  951.   { \
  952.   uint8_t chr_mixer = pix16[x];\
  953.   uint8_t lo = chr_mixer & 15;\
  954.   uint8_t hi = chr_mixer >> 4;\
  955.   \
  956.   if (solid (app[n])) { \
  957.     if (lo) {\
  958.       vpp[n] = filter (pal16[lo], vpp[n], coeff1, coeff2, app[n], bld_map);  \
  959.       app[n] = ch_mode_mask;\
  960.     }\
  961.   } \
  962.   if (solid (app[n+1])) { \
  963.     if (hi) {\
  964.       vpp[n+1] = filter (pal16[hi], vpp[n+1], coeff1, coeff2, app[n+1], bld_map); \
  965.       app[n+1] = ch_mode_mask;\
  966.     }\
  967.   } \
  968. }
  969.         T8SU_GU_(0, 0)
  970.         T8SU_GU_(1, 2)
  971.         T8SU_GU_(2, 4)
  972.         T8SU_GU_(3, 6)
  973. #endif
  974.       }
  975.     }
  976.   } else {
  977.     for (cnt = 0; cnt != 248; cnt+= 8) {
  978.       uint16_t attr = GetAttrWord (rtc, x_calc++, y_calc);
  979.       uint8_t *pix256 = (uint8_t *) GetBG_Chr256 (rtc, attr, (attr & 0x800) ? (7 - ymini_calc) : ymini_calc);
  980.       uint16_t *vpp = & vptr[cnt];
  981.       uint16_t *app = & vattr[cnt];
  982.  
  983.       if (attr & 0x400) {
  984.         for (id = 7; id!= (uintptr_t)-1; id--) {
  985.           uintptr_t pmix = pix256[id];
  986.           uintptr_t pos = 7-id;
  987.           if (solid (app[pos])) {
  988.             if (pmix) {
  989.               vpp[pos] = filter (pal256[pmix], vpp[pos], coeff1, coeff2, app[pos], bld_map);  
  990.               app[pos] = ch_mode_mask;
  991.             }
  992.           }
  993.         }
  994.       } else {
  995.         for (id = 0; id!= 8; id++) {
  996.           uintptr_t pmix = pix256[id];
  997.        
  998.           if (solid (app[id])) {
  999.             if (pmix) {
  1000.               vpp[id] = filter (pal256[pmix], vpp[id], coeff1, coeff2, app[id], bld_map);  
  1001.               app[id] = ch_mode_mask;
  1002.             }
  1003.           }
  1004.         }
  1005.       }
  1006.     }
  1007.   }
  1008.   /* FIXME: compared with standard GBA game consoles, mosaic edge processing has errors. */
  1009.   if (rtc->interp_x != false) {
  1010.     intptr_t mosaic_blk  = rtc->interp_xvec + 1;
  1011.     vptr= & gpu->vbuf[rtc->gpu->vptr_pitch/2*gpu->line.blk + 8  ];    
  1012.     vattr = & gpu->vattr[8 ];  
  1013.  
  1014.     for (cnt = 0; cnt != 240; cnt ++){
  1015.       int mod = cnt % mosaic_blk;
  1016.       if (mod != 0) {
  1017.         int pos = cnt - mod;
  1018.         if (vattr[pos] == ch_mode_mask)
  1019.           vptr[cnt] = vptr[pos];
  1020.       }
  1021.     }
  1022.   }
  1023. }
  1024.  
  1025. finline
  1026. void CommonBG_Render_Rot (struct rasterizer_caps *rtc,
  1027.                             uint16_t ch_mode_mask,
  1028.                             uint16_t bld_map,
  1029.                             uint16_t coeff1, uint16_t coeff2,
  1030.                               kable (*solid) (uint16_t back_attr),
  1031.                               uint16_t (*filter) ( uint16_t tilePixel,
  1032.                                                              uint16_t backdrop,
  1033.                                                             uint16_t coeff1, uint16_t coeff2, uint16_t back_attr, uint16_t bld_map))
  1034. {
  1035.   struct gpu *gpu= rtc->gpu;
  1036.   struct gpu_channel *chan = rtc->chan;
  1037.   uintptr_t tile_x = chan->loopy_x_shadow.blk & 511;
  1038.   uintptr_t tile_y = chan->loopy_y_shadow.blk & 511;                            
  1039.   uint16_t *vattr_b = & gpu->vattr[ 8 ];                                          
  1040.   uint16_t *vptr_b= & gpu->vbuf[gpu->vptr_pitch/2*gpu->line.blk + 8 ];                  
  1041.   uintptr_t cnt;
  1042.   uintptr_t affi_bx;                              
  1043.   uintptr_t affi_by;                              
  1044.   uintptr_t affi_dx;                              
  1045.   uintptr_t affi_dy;                              
  1046.   uintptr_t affi_xp;                              
  1047.   uintptr_t affi_yp;          
  1048.   uintptr_t round_mask;                          
  1049.   uintptr_t affi_mask;                            
  1050.   uintptr_t affi_mask2;                                                                            
  1051.   uint8_t *chr_b = & gpu->vram[(chan->ctl.blk>> 2 & 3) * 0x4000];  
  1052.   uint8_t *te_b= & gpu->vram[0];                                
  1053.   uintptr_t te_ac = (chan->ctl .blk>> 8 & 31) * 0x800;              
  1054.   uint16_t *bg_pal = (uint16_t *)& gpu->palette16_b[0];                                                                                                                                                              
  1055.   uintptr_t te_sft;                                                        
  1056.   uintptr_t te_pi;                                                          
  1057.   uintptr_t te_blk_x;                
  1058.   uintptr_t te_blk_y;              
  1059.   uintptr_t te_mini_x;              
  1060.   uintptr_t te_mini_y;            
  1061.   uintptr_t te_addr;
  1062.   uintptr_t te_id;                                
  1063.          
  1064.   rtc->chr_base = (rtc->chan->ctl.blk>> 2 & 3) * 0x4000;
  1065.  
  1066.   switch (rtc->chan->ctl.blk & 0xC000) {                                            
  1067.   case 0x0000:                                                              
  1068.     affi_mask = 128 << 8;                        
  1069.     round_mask = 128;                            
  1070.     te_sft = 4;                                  
  1071.     break;                                        
  1072.   case 0x4000:                                    
  1073.     affi_mask = 256 << 8;                        
  1074.     round_mask = 256;                            
  1075.     te_sft = 5;                                  
  1076.     break;                                        
  1077.   case 0x8000:                                    
  1078.     affi_mask = 512 << 8;                        
  1079.     round_mask = 512;                            
  1080.     te_sft = 6;                                  
  1081.     break;                                        
  1082.   case 0xC000:              
  1083.     affi_mask = 1024 << 8;  
  1084.     round_mask = 1024;      
  1085.     te_sft = 7;            
  1086.     break;                  
  1087.   }                        
  1088.   round_mask--;            
  1089.   affi_mask--;              
  1090.   affi_mask = ~affi_mask;  
  1091.   affi_mask2= affi_mask >> 8;
  1092.                            
  1093.   affi_bx = rtc->chan->loopy_dmx.blk;
  1094.   affi_by = rtc->chan->loopy_dmy.blk;
  1095.   affi_dx = rtc->chan->dx_shadow.blk;      
  1096.   affi_dy = rtc->chan->dy_shadow.blk;      
  1097.  
  1098.   if (rtc->chan->ctl.blk & 0x2000) {
  1099.     /* 8Bit Wrapround */
  1100.     for (cnt = 0; cnt != 240; cnt++) {
  1101.                                                    
  1102.       affi_xp = (uintptr_t) affi_bx >> 8 & round_mask;            
  1103.       affi_yp = (uintptr_t) affi_by >> 8 & round_mask;      
  1104.      
  1105.       te_blk_x = affi_xp >> 3;                  
  1106.       te_blk_y = affi_yp >> 3;                  
  1107.       te_mini_x = affi_xp & 7;                  
  1108.       te_mini_y = affi_yp & 7;                  
  1109.       te_addr = te_ac + te_blk_x+ (te_blk_y << te_sft) & 0xFFFF;  
  1110.       te_id = te_b[te_addr];                                      
  1111.       te_pi = chr_b[(te_id<<6)+te_mini_x+(te_mini_y<<3)];  
  1112.                                                          
  1113.       if (solid (vattr_b[cnt]))  {                
  1114.         if (te_pi) {
  1115.           vptr_b[cnt] = filter (bg_pal[te_pi], vptr_b[cnt], coeff1, coeff2, vattr_b[cnt], bld_map);          
  1116.           vattr_b[cnt] = ch_mode_mask;
  1117.         }
  1118.       }              
  1119.       affi_bx += affi_dx;
  1120.       affi_by += affi_dy;
  1121.     }
  1122.   } else {
  1123.     for (cnt = 0; cnt != 240; cnt++) {                
  1124.      if (((affi_bx | affi_by) & affi_mask) == 0) {
  1125.     // if (1) {
  1126.         affi_xp = (uintptr_t) affi_bx >> 8 & round_mask;            
  1127.         affi_yp = (uintptr_t) affi_by >> 8 & round_mask;      
  1128.        
  1129.         te_blk_x = affi_xp >> 3;                  
  1130.         te_blk_y = affi_yp >> 3;                  
  1131.         te_mini_x = affi_xp & 7;                  
  1132.         te_mini_y = affi_yp & 7;                  
  1133.         te_addr = te_ac + te_blk_x+ (te_blk_y << te_sft) & 0xFFFF;  
  1134.         te_id = te_b[te_addr];                                      
  1135.         te_pi = chr_b[(te_id<<6)+te_mini_x+(te_mini_y<<3)];  
  1136.                                                            
  1137.         if (solid (vattr_b[cnt]))  {                
  1138.           if (te_pi) {
  1139.             vptr_b[cnt] = filter (bg_pal[te_pi], vptr_b[cnt], coeff1, coeff2, vattr_b[cnt], bld_map);                                      
  1140.             vattr_b[cnt] = ch_mode_mask;
  1141.           }
  1142.         }  
  1143.       }
  1144.       affi_bx += affi_dx;
  1145.       affi_by += affi_dy;
  1146.     }
  1147.   }
  1148. }
  1149.  
  1150. finline
  1151. void Mode3Bitmap8Full_BG_Render_Rot (struct rasterizer_caps *rtc,
  1152.                             uint16_t ch_mode_mask,
  1153.                             uint16_t bld_map,
  1154.                             uint16_t coeff1, uint16_t coeff2,
  1155.                               kable (*solid) (uint16_t back_attr),
  1156.                               uint16_t (*filter) ( uint16_t tilePixel,
  1157.                                                              uint16_t backdrop,
  1158.                                                             uint16_t coeff1, uint16_t coeff2, uint16_t back_attr, uint16_t bld_map))
  1159. {
  1160.   struct gpu *gpu= rtc->gpu;
  1161.   struct gpu_channel *chan = rtc->chan;                          
  1162.   uint16_t *vattr_b = & gpu->vattr[ 8 ];                                          
  1163.   uint16_t *vptr_b= & gpu->vbuf[gpu->vptr_pitch/2*gpu->line.blk + 8 ];                  
  1164.   uintptr_t cnt;
  1165.   intptr_t affi_bx;                              
  1166.   intptr_t affi_by;                              
  1167.   intptr_t affi_dx;                              
  1168.   intptr_t affi_dy;                              
  1169.   intptr_t affi_xp;                              
  1170.   intptr_t affi_yp;                              
  1171.   uint8_t *vram= & gpu->vram[0];                            
  1172.                    
  1173.   affi_bx = rtc->chan->loopy_dmx.sblk;
  1174.   affi_by = rtc->chan->loopy_dmy.sblk;
  1175.   affi_dx = rtc->chan->dx_shadow.sblk;      
  1176.   affi_dy = rtc->chan->dy_shadow.sblk;      
  1177.  
  1178.   if (gpu->palette16_b == & gpu->palette2[0]) {
  1179.     if (rtc->chan->ctl.blk & 0x2000) {
  1180.       /* 8Bit Wrapround */
  1181.       for (cnt = 0; cnt != 240; cnt++) {                                          
  1182.         if (solid (vattr_b[cnt]))  {    
  1183.           uint16_t pixel;
  1184.           affi_xp = affi_bx >> 8 % 240;
  1185.           affi_yp = affi_by >> 8 % 160;  
  1186.           if (affi_xp < 0)
  1187.             affi_xp = affi_xp + 240;
  1188.           if (affi_yp < 0)
  1189.              affi_yp = affi_yp + 160;
  1190.           pixel = * (uint16_t *)& vram[affi_yp*480+(affi_xp << 1)];
  1191.           pixel = pixel >> 10 & 31
  1192.         |          pixel & 31 << 5
  1193.         |          (pixel & 31) << 10;
  1194.           vptr_b[cnt] = filter (pixel, vptr_b[cnt], coeff1, coeff2, vattr_b[cnt], bld_map);          
  1195.           vattr_b[cnt] = ch_mode_mask;
  1196.         }              
  1197.         affi_bx += affi_dx;
  1198.         affi_by += affi_dy;
  1199.       }
  1200.     } else {
  1201.       for (cnt = 0; cnt != 240; cnt++) {        
  1202.         intptr_t pos_x = affi_bx >> 8;
  1203.         intptr_t pos_y = affi_by >> 8;
  1204.  
  1205.         if (pos_x >= 0
  1206.           && pos_x < 240
  1207.           && pos_y >= 0
  1208.           && pos_y < 160)
  1209.         {
  1210.           uint16_t pixel= * (uint16_t *)& vram[pos_y*480+(pos_x << 1)];
  1211.           pixel = pixel >> 10 & 31
  1212.         |          pixel & 31 << 5
  1213.         |          (pixel & 31) << 10;
  1214.           vptr_b[cnt] = filter (pixel, vptr_b[cnt], coeff1, coeff2, vattr_b[cnt], bld_map);          
  1215.           vattr_b[cnt] = ch_mode_mask;
  1216.         }
  1217.         affi_bx += affi_dx;
  1218.         affi_by += affi_dy;
  1219.       }
  1220.     }
  1221.   } else {
  1222.  
  1223.     if (rtc->chan->ctl.blk & 0x2000) {
  1224.       /* 8Bit Wrapround */
  1225.       for (cnt = 0; cnt != 240; cnt++) {                                          
  1226.         if (solid (vattr_b[cnt]))  {    
  1227.           uint16_t pixel;
  1228.           affi_xp = affi_bx >> 8 % 240;
  1229.           affi_yp = affi_by >> 8 % 160;  
  1230.           if (affi_xp < 0)
  1231.             affi_xp = affi_xp + 240;
  1232.           if (affi_yp < 0)
  1233.              affi_yp = affi_yp + 160;
  1234.           pixel = * (uint16_t *)& vram[affi_yp*480+(affi_xp << 1)];
  1235.           vptr_b[cnt] = filter (pixel, vptr_b[cnt], coeff1, coeff2, vattr_b[cnt], bld_map);          
  1236.           vattr_b[cnt] = ch_mode_mask;
  1237.         }              
  1238.         affi_bx += affi_dx;
  1239.         affi_by += affi_dy;
  1240.       }
  1241.     } else {
  1242.       for (cnt = 0; cnt != 240; cnt++) {        
  1243.         intptr_t pos_x = affi_bx >> 8;
  1244.         intptr_t pos_y = affi_by >> 8;
  1245.  
  1246.         if (pos_x >= 0
  1247.           && pos_x < 240
  1248.           && pos_y >= 0
  1249.           && pos_y < 160)
  1250.         {
  1251.           uint16_t pixel= * (uint16_t *)& vram[pos_y*480+(pos_x << 1)];
  1252.           vptr_b[cnt] = filter (pixel, vptr_b[cnt], coeff1, coeff2, vattr_b[cnt], bld_map);          
  1253.           vattr_b[cnt] = ch_mode_mask;
  1254.         }
  1255.         affi_bx += affi_dx;
  1256.         affi_by += affi_dy;
  1257.       }
  1258.     }
  1259.   }
  1260. }
  1261.  
  1262. finline
  1263. void Mode4Bitmap4SwapBuffer_BG_Render_Rot (struct rasterizer_caps *rtc,
  1264.                             uint16_t ch_mode_mask,
  1265.                             uint16_t bld_map,
  1266.                             uint16_t coeff1, uint16_t coeff2,
  1267.                               kable (*solid) (uint16_t back_attr),
  1268.                               uint16_t (*filter) ( uint16_t tilePixel,
  1269.                                                              uint16_t backdrop,
  1270.                                                             uint16_t coeff1, uint16_t coeff2, uint16_t back_attr, uint16_t bld_map))
  1271. {
  1272.   struct gpu *gpu= rtc->gpu;
  1273.   struct gpu_channel *chan = rtc->chan;                          
  1274.   uint16_t *vattr_b = & gpu->vattr[ 8 ];                                          
  1275.   uint16_t *vptr_b= & gpu->vbuf[gpu->vptr_pitch/2*gpu->line.blk + 8 ];                  
  1276.   uintptr_t cnt;
  1277.   intptr_t affi_bx;                              
  1278.   intptr_t affi_by;                              
  1279.   intptr_t affi_dx;                              
  1280.   intptr_t affi_dy;                              
  1281.   intptr_t affi_xp;                              
  1282.   intptr_t affi_yp;                                                      
  1283.   uint16_t *pal256 = (uint16_t *)& gpu->palette16_b[0];
  1284.   uint8_t *frame_bank = & gpu->vram[(gpu->ctl.blk >> 4 & 1) ? 0xA000 : 0];  
  1285.  
  1286.   affi_bx = rtc->chan->loopy_dmx.sblk;
  1287.   affi_by = rtc->chan->loopy_dmy.sblk;
  1288.   affi_dx = rtc->chan->dx_shadow.sblk;      
  1289.   affi_dy = rtc->chan->dy_shadow.sblk;      
  1290.  
  1291.   if (rtc->chan->ctl.blk & 0x2000) {
  1292.     /* 8Bit Wrapround */
  1293.     for (cnt = 0; cnt != 240; cnt++) {                                          
  1294.       if (solid (vattr_b[cnt]))  {    
  1295.         uint8_t pal;
  1296.         affi_xp = affi_bx >> 8 % 240;
  1297.         affi_yp = affi_by >> 8 % 160;  
  1298.         if (affi_xp < 0)
  1299.           affi_xp = affi_xp + 240;
  1300.         if (affi_yp < 0)
  1301.             affi_yp = affi_yp + 160;
  1302.         pal = frame_bank[affi_yp*240+affi_xp];
  1303.         if (pal ) {
  1304.           vptr_b[cnt] = filter (pal256[pal], vptr_b[cnt], coeff1, coeff2, vattr_b[cnt], bld_map);          
  1305.           vattr_b[cnt] = ch_mode_mask;
  1306.         }
  1307.       }              
  1308.       affi_bx += affi_dx;
  1309.       affi_by += affi_dy;
  1310.     }
  1311.   } else {
  1312.     for (cnt = 0; cnt != 240; cnt++) {        
  1313.       intptr_t pos_x = affi_bx >> 8;
  1314.       intptr_t pos_y = affi_by >> 8;
  1315.  
  1316.       if (pos_x >= 0
  1317.         && pos_x < 240
  1318.         && pos_y >= 0
  1319.         && pos_y < 160)
  1320.       {
  1321.         uint8_t pal = frame_bank[pos_y*240+pos_x];
  1322.         if (pal ) {
  1323.           vptr_b[cnt] = filter (pal256[pal], vptr_b[cnt], coeff1, coeff2, vattr_b[cnt], bld_map);          
  1324.           vattr_b[cnt] = ch_mode_mask;
  1325.         }
  1326.       }
  1327.       affi_bx += affi_dx;
  1328.       affi_by += affi_dy;
  1329.     }
  1330.   }
  1331. }
  1332.  
  1333. finline
  1334. void Mode5Bitmap8SwapBuffer_BG_Render_Rot (struct rasterizer_caps *rtc,
  1335.                             uint16_t ch_mode_mask,
  1336.                             uint16_t bld_map,
  1337.                             uint16_t coeff1, uint16_t coeff2,
  1338.                               kable (*solid) (uint16_t back_attr),
  1339.                               uint16_t (*filter) ( uint16_t tilePixel,
  1340.                                                              uint16_t backdrop,
  1341.                                                             uint16_t coeff1, uint16_t coeff2, uint16_t back_attr, uint16_t bld_map))
  1342. {
  1343.   struct gpu *gpu= rtc->gpu;
  1344.   struct gpu_channel *chan = rtc->chan;                          
  1345.   uint16_t *vattr_b = & gpu->vattr[ 8 ];                                          
  1346.   uint16_t *vptr_b= & gpu->vbuf[gpu->vptr_pitch/2*gpu->line.blk + 8 ];                  
  1347.   uintptr_t cnt;
  1348.   intptr_t affi_bx;                              
  1349.   intptr_t affi_by;                              
  1350.   intptr_t affi_dx;                              
  1351.   intptr_t affi_dy;                              
  1352.   intptr_t affi_xp;                              
  1353.   intptr_t affi_yp;                              
  1354.   uint8_t *vram= & gpu->vram[(gpu->ctl.blk >> 4 & 1) ? 0xA000 : 0];              
  1355.                    
  1356.   affi_bx = rtc->chan->loopy_dmx.sblk;
  1357.   affi_by = rtc->chan->loopy_dmy.sblk;
  1358.   affi_dx = rtc->chan->dx_shadow.sblk;      
  1359.   affi_dy = rtc->chan->dy_shadow.sblk;      
  1360.  
  1361.   if (gpu->palette16_b == & gpu->palette2[0]) {
  1362.     if (rtc->chan->ctl.blk & 0x2000) {
  1363.       /* 8Bit Wrapround */
  1364.       for (cnt = 0; cnt != 240; cnt++) {                                          
  1365.         if (solid (vattr_b[cnt]))  {    
  1366.           uint16_t pixel;
  1367.           affi_xp = affi_bx >> 8 % 160;
  1368.           affi_yp = affi_by >> 8 % 128;  
  1369.           if (affi_xp < 0)
  1370.             affi_xp = affi_xp + 160;
  1371.           if (affi_yp < 0)
  1372.              affi_yp = affi_yp + 128;
  1373.           pixel = * (uint16_t *)& vram[affi_yp*320+(affi_xp << 1)];
  1374.           pixel = pixel >> 10 & 31
  1375.         |          pixel & 31 << 5
  1376.         |          (pixel & 31) << 10;
  1377.           vptr_b[cnt] = filter (pixel, vptr_b[cnt], coeff1, coeff2, vattr_b[cnt], bld_map);          
  1378.           vattr_b[cnt] = ch_mode_mask;
  1379.         }              
  1380.         affi_bx += affi_dx;
  1381.         affi_by += affi_dy;
  1382.       }
  1383.     } else {
  1384.       for (cnt = 0; cnt != 240; cnt++) {        
  1385.         intptr_t pos_x = affi_bx >> 8;
  1386.         intptr_t pos_y = affi_by >> 8;
  1387.  
  1388.         if (pos_x >= 0
  1389.           && pos_x < 160
  1390.           && pos_y >= 0
  1391.           && pos_y < 128)
  1392.         {
  1393.           uint16_t pixel= * (uint16_t *)& vram[pos_y*320+(pos_x << 1)];
  1394.           pixel = pixel >> 10 & 31
  1395.         |          pixel & 31 << 5
  1396.         |          (pixel & 31) << 10;
  1397.           vptr_b[cnt] = filter (pixel, vptr_b[cnt], coeff1, coeff2, vattr_b[cnt], bld_map);          
  1398.           vattr_b[cnt] = ch_mode_mask;
  1399.         }
  1400.         affi_bx += affi_dx;
  1401.         affi_by += affi_dy;
  1402.       }
  1403.     }
  1404.   } else {
  1405.  
  1406.     if (rtc->chan->ctl.blk & 0x2000) {
  1407.       /* 8Bit Wrapround */
  1408.       for (cnt = 0; cnt != 240; cnt++) {                                          
  1409.         if (solid (vattr_b[cnt]))  {    
  1410.           uint16_t pixel;
  1411.           affi_xp = affi_bx >> 8 % 160;
  1412.           affi_yp = affi_by >> 8 % 128;  
  1413.           if (affi_xp < 0)
  1414.             affi_xp = affi_xp + 160;
  1415.           if (affi_yp < 0)
  1416.              affi_yp = affi_yp + 128;
  1417.           pixel = * (uint16_t *)& vram[affi_yp*320+(affi_xp << 1)];
  1418.           vptr_b[cnt] = filter (pixel, vptr_b[cnt], coeff1, coeff2, vattr_b[cnt], bld_map);          
  1419.           vattr_b[cnt] = ch_mode_mask;
  1420.         }              
  1421.         affi_bx += affi_dx;
  1422.         affi_by += affi_dy;
  1423.       }
  1424.     } else {
  1425.       for (cnt = 0; cnt != 240; cnt++) {        
  1426.         intptr_t pos_x = affi_bx >> 8;
  1427.         intptr_t pos_y = affi_by >> 8;
  1428.  
  1429.         if (pos_x >= 0
  1430.           && pos_x < 160
  1431.           && pos_y >= 0
  1432.           && pos_y < 128)
  1433.         {
  1434.           uint16_t pixel= * (uint16_t *)& vram[pos_y*320+(pos_x << 1)];
  1435.           vptr_b[cnt] = filter (pixel, vptr_b[cnt], coeff1, coeff2, vattr_b[cnt], bld_map);          
  1436.           vattr_b[cnt] = ch_mode_mask;
  1437.         }
  1438.         affi_bx += affi_dx;
  1439.         affi_by += affi_dy;
  1440.       }
  1441.     }
  1442.   }
  1443. }
  1444.  
  1445. finline
  1446. void CommonSP_Draw (
  1447.            struct rasterizer_caps *rtc,
  1448.              uint16_t ch_mode_mask,
  1449.              uint16_t bld_map,
  1450.      uint16_t *vptr_b,
  1451.      uint16_t *vattr_b,
  1452.      uint16_t *oam_entry,
  1453.      uint16_t *pal16,
  1454.      uint16_t *pal256,
  1455.      uint16_t coeff1,
  1456.      uint16_t coeff2,
  1457.      uintptr_t tbid_t,
  1458.      uintptr_t line,
  1459.      uintptr_t dim_x,
  1460.      uintptr_t dim_y,
  1461.      uintptr_t pos_x,
  1462.      uintptr_t pos_y,
  1463.      kable (*solid) (uint16_t back_attr),
  1464.     uint16_t (*filter) ( uint16_t tilePixel,
  1465.                                    uint16_t backdrop,
  1466.                                   uint16_t coeff1, uint16_t coeff2, uint16_t back_attr, uint16_t bld_map))
  1467. {
  1468.   intptr_t y_offset;
  1469.   intptr_t y_blockoffset;
  1470.   intptr_t y_miniffset;
  1471.   uintptr_t line2 = line;
  1472.  
  1473.   if ((pos_y + dim_y) >= 256)
  1474.     line2 += 256;
  1475.   if (line2 < pos_y) /* TODO : oam bug */
  1476.     return ;
  1477.   if (line2 >= (pos_y+dim_y))
  1478.     return ;
  1479.   if (pos_x >= 240 && (pos_x + dim_x) <= 512)
  1480.     return ;
  1481.   y_offset = line2 - pos_y;
  1482.   if (oam_entry[1] & 0x2000)
  1483.     y_offset = dim_y - 1 - y_offset;
  1484.   else ;
  1485.   y_blockoffset = y_offset >> 3;
  1486.   y_miniffset = y_offset & 7;
  1487.  
  1488.   if (oam_entry[0] & 0x2000) {
  1489.     if (oam_entry[1] & 0x1000) {
  1490.       /* std horz-swap sprite */
  1491.       const uint8_t *chrb = & rtc->gpu->vram [0x10000 + (tbid_t & 1022) * 32 + (y_miniffset << 3)];
  1492.       uintptr_t x = pos_x + dim_x - 1;
  1493.       do
  1494.       {
  1495. #undef OAMS_
  1496. #define OAMS_(n)\
  1497.       x &= 511;\
  1498.       if (solid (vattr_b[x])) {\
  1499.         uint8_t chr_value = chrb[n];\
  1500.         if (chr_value) {\
  1501.           vptr_b[x] =  filter (pal256[chr_value], vptr_b[x], coeff1, coeff2, vattr_b[x], bld_map);\
  1502.           vattr_b[x] = ch_mode_mask;\
  1503.         }\
  1504.       }\
  1505.       x--;
  1506.  
  1507. #undef OAMZ_
  1508. #define OAMZ_(n)\
  1509.       OAMS_(64*n+0)\
  1510.       OAMS_(64*n+1)\
  1511.       OAMS_(64*n+2)\
  1512.       OAMS_(64*n+3)\
  1513.       OAMS_(64*n+4)\
  1514.       OAMS_(64*n+5)\
  1515.       OAMS_(64*n+6)\
  1516.       OAMS_(64*n+7)\
  1517.       if (dim_x == 8* (n+1))\
  1518.         break;
  1519.  
  1520. #undef OAMZ__
  1521. #define OAMZ__(n)\
  1522.       OAMS_(64*n+0)\
  1523.       OAMS_(64*n+1)\
  1524.       OAMS_(64*n+2)\
  1525.       OAMS_(64*n+3)\
  1526.       OAMS_(64*n+4)\
  1527.       OAMS_(64*n+5)\
  1528.       OAMS_(64*n+6)\
  1529.       OAMS_(64*n+7)
  1530.  
  1531.         OAMZ_(0)
  1532.         OAMZ_(1)
  1533.         OAMZ_(2)
  1534.         OAMZ_(3)
  1535.         OAMZ_(4)
  1536.         OAMZ_(5)
  1537.         OAMZ_(6)
  1538.         OAMZ__(7)
  1539.  
  1540.       } while (0);
  1541.     } else {
  1542.       /* std noswap sprite */
  1543.       const uint8_t *chrb = & rtc->gpu->vram [0x10000 + (tbid_t & 1022) * 32 + (y_miniffset << 3)];
  1544.       uintptr_t x = pos_x;
  1545.  
  1546.       do
  1547.       {
  1548. #undef OAMS_
  1549. #define OAMS_(n)\
  1550.       x &= 511;\
  1551.       if (solid (vattr_b[x])) {\
  1552.         uint8_t chr_value = chrb[n];\
  1553.         if (chr_value) {\
  1554.           vptr_b[x] =  filter (pal256[chr_value], vptr_b[x], coeff1, coeff2, vattr_b[x], bld_map);\
  1555.           vattr_b[x] = ch_mode_mask;\
  1556.         }\
  1557.       }\
  1558.       x++;
  1559.         OAMZ_(0)
  1560.         OAMZ_(1)
  1561.         OAMZ_(2)
  1562.         OAMZ_(3)
  1563.         OAMZ_(4)
  1564.         OAMZ_(5)
  1565.         OAMZ_(6)
  1566.         OAMZ__(7)
  1567.  
  1568.       } while (0);
  1569.     }
  1570.   } else {
  1571.     if (oam_entry[1] & 0x1000) {
  1572.       /* std horz-swap sprite */
  1573.       const uint8_t *chrb = & rtc->gpu->vram [0x10000 + (tbid_t & 1023) * 32 + (y_miniffset << 2)];
  1574.       uintptr_t x = pos_x + dim_x - 1;
  1575.       uint8_t chr_mixer;
  1576.  
  1577.       do
  1578.       {
  1579. #undef OAMS_
  1580. #define OAMS_(n)\
  1581.       x &= 511;\
  1582.       chr_mixer = chrb[n];\
  1583.       if (solid (vattr_b[x])) {\
  1584.         uint8_t chr_lo = chr_mixer & 15; \
  1585.         if (chr_lo) {\
  1586.           vptr_b[x] =  filter (pal16[chr_lo], vptr_b[x], coeff1, coeff2, vattr_b[x], bld_map);\
  1587.           vattr_b[x] = ch_mode_mask;\
  1588.         }\
  1589.       }\
  1590.       x--; \
  1591.       x &= 511;\
  1592.       if (solid (vattr_b[x])) {\
  1593.         uint8_t chr_hi = chr_mixer >> 4; \
  1594.         if (chr_hi) {\
  1595.           vptr_b[x] =  filter (pal16[chr_hi], vptr_b[x], coeff1, coeff2, vattr_b[x], bld_map);\
  1596.           vattr_b[x] = ch_mode_mask;\
  1597.         }\
  1598.       }\
  1599.       x--;
  1600.  
  1601. #undef OAMZ_
  1602. #define OAMZ_(n)\
  1603.       OAMS_(32*n+0)\
  1604.       OAMS_(32*n+1)\
  1605.       OAMS_(32*n+2)\
  1606.       OAMS_(32*n+3)\
  1607.       if (dim_x == 8* (n+1))\
  1608.         break;
  1609.  
  1610. #undef OAMZ__
  1611. #define OAMZ__(n)\
  1612.       OAMS_(32*n+0)\
  1613.       OAMS_(32*n+1)\
  1614.       OAMS_(32*n+2)\
  1615.       OAMS_(32*n+3)
  1616.  
  1617.         OAMZ_(0)
  1618.         OAMZ_(1)
  1619.         OAMZ_(2)
  1620.         OAMZ_(3)
  1621.         OAMZ_(4)
  1622.         OAMZ_(5)
  1623.         OAMZ_(6)
  1624.         OAMZ__(7)
  1625.  
  1626.       } while (0);
  1627.     } else {
  1628.       /* std noswap sprite */
  1629.       const uint8_t *chrb = & rtc->gpu->vram [0x10000 + (tbid_t & 1023) * 32 + (y_miniffset << 2)];
  1630.       uintptr_t x = pos_x;
  1631.       uint8_t chr_mixer;
  1632.  
  1633.       do
  1634.       {
  1635. #undef OAMS_
  1636. #define OAMS_(n)\
  1637.       x &= 511;\
  1638.       chr_mixer = chrb[n];\
  1639.       if (solid (vattr_b[x])) {\
  1640.         uint8_t chr_lo = chr_mixer & 15; \
  1641.         if (chr_lo) {\
  1642.           vptr_b[x] =  filter (pal16[chr_lo], vptr_b[x], coeff1, coeff2, vattr_b[x], bld_map);\
  1643.           vattr_b[x] = ch_mode_mask;\
  1644.         }\
  1645.       }\
  1646.       x++; \
  1647.       x &= 511;\
  1648.       if (solid (vattr_b[x])) {\
  1649.         uint8_t chr_hi = chr_mixer >> 4; \
  1650.         if (chr_hi) {\
  1651.           vptr_b[x] =  filter (pal16[chr_hi], vptr_b[x], coeff1, coeff2, vattr_b[x], bld_map);\
  1652.           vattr_b[x] = ch_mode_mask;\
  1653.         }\
  1654.       }\
  1655.       x++;
  1656.  
  1657. #undef OAMZ_
  1658. #define OAMZ_(n)\
  1659.       OAMS_(32*n+0)\
  1660.       OAMS_(32*n+1)\
  1661.       OAMS_(32*n+2)\
  1662.       OAMS_(32*n+3)\
  1663.       if (dim_x == 8* (n+1))\
  1664.         break;
  1665.  
  1666. #undef OAMZ__
  1667. #define OAMZ__(n)\
  1668.       OAMS_(32*n+0)\
  1669.       OAMS_(32*n+1)\
  1670.       OAMS_(32*n+2)\
  1671.       OAMS_(32*n+3)
  1672.  
  1673.         OAMZ_(0)
  1674.         OAMZ_(1)
  1675.         OAMZ_(2)
  1676.         OAMZ_(3)
  1677.         OAMZ_(4)
  1678.         OAMZ_(5)
  1679.         OAMZ_(6)
  1680.         OAMZ__(7)
  1681.  
  1682.       } while (0);
  1683.     }
  1684.   }
  1685. }
  1686.  
  1687. finline
  1688. void CommonSP_Draw_Shadow (
  1689.            struct rasterizer_caps *rtc,
  1690.      uint16_t *vattr_b,
  1691.      uint16_t *oam_entry,
  1692.      uintptr_t tbid_t,
  1693.      uintptr_t line,
  1694.      uintptr_t dim_x,
  1695.      uintptr_t dim_y,
  1696.      uintptr_t pos_x,
  1697.      uintptr_t pos_y)
  1698. {
  1699.   int y_offset;
  1700.   int y_blockoffset;
  1701.   int y_miniffset;
  1702.   unsigned int line2 = line;
  1703.  
  1704.   if ((pos_y + dim_y) >= 256)
  1705.     line2 += 256;
  1706.   if (line2 < pos_y) /* TODO : oam bug */
  1707.     return ;
  1708.   if (line2 >= (pos_y+dim_y))
  1709.     return ;
  1710.   if (pos_x >= 240 && (pos_x + dim_x) <= 512)
  1711.     return ;
  1712.   y_offset = line2 - pos_y;
  1713.   if (oam_entry[1] & 0x2000)
  1714.     y_offset = dim_y - 1 - y_offset;
  1715.   else ;
  1716.   y_blockoffset = y_offset >> 3;
  1717.   y_miniffset = y_offset & 7;
  1718.  
  1719.   if (oam_entry[0] & 0x2000) {
  1720.     if (oam_entry[1] & 0x1000) {
  1721.       /* std horz-swap sprite */
  1722.       const uint8_t *chrb = & rtc->gpu->vram [0x10000 + (tbid_t & 1022) * 32 + (y_miniffset << 3)];
  1723.       uintptr_t x = pos_x + dim_x - 1;
  1724.       do
  1725.       {
  1726. #undef OAMS_
  1727. #define OAMS_(n)\
  1728.       if (chrb[n]) \
  1729.         vattr_b[x & 511] |= SOLID_MODE_OBJWIN_MASK;\
  1730.       x--;
  1731.  
  1732. #undef OAMZ_
  1733. #define OAMZ_(n)\
  1734.       OAMS_(64*n+0)\
  1735.       OAMS_(64*n+1)\
  1736.       OAMS_(64*n+2)\
  1737.       OAMS_(64*n+3)\
  1738.       OAMS_(64*n+4)\
  1739.       OAMS_(64*n+5)\
  1740.       OAMS_(64*n+6)\
  1741.       OAMS_(64*n+7)\
  1742.       if (dim_x == 8* (n+1))\
  1743.         break;
  1744.  
  1745. #undef OAMZ__
  1746. #define OAMZ__(n)\
  1747.       OAMS_(64*n+0)\
  1748.       OAMS_(64*n+1)\
  1749.       OAMS_(64*n+2)\
  1750.       OAMS_(64*n+3)\
  1751.       OAMS_(64*n+4)\
  1752.       OAMS_(64*n+5)\
  1753.       OAMS_(64*n+6)\
  1754.       OAMS_(64*n+7)
  1755.  
  1756.         OAMZ_(0)
  1757.         OAMZ_(1)
  1758.         OAMZ_(2)
  1759.         OAMZ_(3)
  1760.         OAMZ_(4)
  1761.         OAMZ_(5)
  1762.         OAMZ_(6)
  1763.         OAMZ__(7)
  1764.  
  1765.       } while (0);
  1766.     } else {
  1767.       /* std noswap sprite */
  1768.       const uint8_t *chrb = & rtc->gpu->vram [0x10000 + (tbid_t & 1022) * 32 + (y_miniffset << 3)];
  1769.       uintptr_t x = pos_x;
  1770.  
  1771.       do
  1772.       {
  1773. #undef OAMS_
  1774. #define OAMS_(n)\
  1775.       if (chrb[n]) \
  1776.         vattr_b[x & 511] |= SOLID_MODE_OBJWIN_MASK;\
  1777.       x++;
  1778.         OAMZ_(0)
  1779.         OAMZ_(1)
  1780.         OAMZ_(2)
  1781.         OAMZ_(3)
  1782.         OAMZ_(4)
  1783.         OAMZ_(5)
  1784.         OAMZ_(6)
  1785.         OAMZ__(7)
  1786.  
  1787.       } while (0);
  1788.     }
  1789.   } else {
  1790.     if (oam_entry[1] & 0x1000) {
  1791.       /* std horz-swap sprite */
  1792.       const uint8_t *chrb = & rtc->gpu->vram [0x10000 + (tbid_t & 1023) * 32 + (y_miniffset << 2)];
  1793.       uintptr_t x = pos_x + dim_x - 1;
  1794.       uint8_t chr_mixer;
  1795.  
  1796.       do
  1797.       {
  1798. #undef OAMS_
  1799. #define OAMS_(n)\
  1800.       chr_mixer = chrb[n];\
  1801.       if (chr_mixer & 0x0F) \
  1802.         vattr_b[x & 511] |= SOLID_MODE_OBJWIN_MASK;\
  1803.       x--; \
  1804.       if (chr_mixer & 0xF0) \
  1805.         vattr_b[x & 511] |= SOLID_MODE_OBJWIN_MASK;\
  1806.       x--;
  1807.  
  1808. #undef OAMZ_
  1809. #define OAMZ_(n)\
  1810.       OAMS_(32*n+0)\
  1811.       OAMS_(32*n+1)\
  1812.       OAMS_(32*n+2)\
  1813.       OAMS_(32*n+3)\
  1814.       if (dim_x == 8* (n+1))\
  1815.         break;
  1816.  
  1817. #undef OAMZ__
  1818. #define OAMZ__(n)\
  1819.       OAMS_(32*n+0)\
  1820.       OAMS_(32*n+1)\
  1821.       OAMS_(32*n+2)\
  1822.       OAMS_(32*n+3)
  1823.  
  1824.         OAMZ_(0)
  1825.         OAMZ_(1)
  1826.         OAMZ_(2)
  1827.         OAMZ_(3)
  1828.         OAMZ_(4)
  1829.         OAMZ_(5)
  1830.         OAMZ_(6)
  1831.         OAMZ__(7)
  1832.  
  1833.       } while (0);
  1834.     } else {
  1835.       /* std noswap sprite */
  1836.       const uint8_t *chrb = & rtc->gpu->vram [0x10000 + (tbid_t & 1023) * 32 + (y_miniffset << 2)];
  1837.       uintptr_t x = pos_x;
  1838.       uint8_t chr_mixer;
  1839.  
  1840.       do
  1841.       {
  1842. #undef OAMS_
  1843. #define OAMS_(n)\
  1844.       chr_mixer = chrb[n];\
  1845.       if (chr_mixer & 0x0F) \
  1846.         vattr_b[x & 511] |= SOLID_MODE_OBJWIN_MASK;\
  1847.       x++; \
  1848.       if (chr_mixer & 0xF0) \
  1849.         vattr_b[x & 511] |= SOLID_MODE_OBJWIN_MASK;\
  1850.       x++;
  1851.  
  1852. #undef OAMZ_
  1853. #define OAMZ_(n)\
  1854.       OAMS_(32*n+0)\
  1855.       OAMS_(32*n+1)\
  1856.       OAMS_(32*n+2)\
  1857.       OAMS_(32*n+3)\
  1858.       if (dim_x == 8* (n+1))\
  1859.         break;
  1860.  
  1861. #undef OAMZ__
  1862. #define OAMZ__(n)\
  1863.       OAMS_(32*n+0)\
  1864.       OAMS_(32*n+1)\
  1865.       OAMS_(32*n+2)\
  1866.       OAMS_(32*n+3)
  1867.  
  1868.         OAMZ_(0)
  1869.         OAMZ_(1)
  1870.         OAMZ_(2)
  1871.         OAMZ_(3)
  1872.         OAMZ_(4)
  1873.         OAMZ_(5)
  1874.         OAMZ_(6)
  1875.         OAMZ__(7)
  1876.  
  1877.       } while (0);
  1878.     }
  1879.   }
  1880. }
  1881.  
  1882. finline
  1883. void AffineMartixSP_Draw (
  1884.            struct rasterizer_caps *rtc,
  1885.              uint16_t ch_mode_mask,
  1886.              uint16_t bld_map,
  1887.              uint16_t *oamb,
  1888.              uint16_t slot,
  1889.              uint16_t tile_pitch,
  1890.      uint16_t *vptr_b,
  1891.      uint16_t *vattr_b,
  1892.      uint16_t *pal16,
  1893.      uint16_t *pal256,
  1894.      uint16_t coeff1,
  1895.      uint16_t coeff2,
  1896.      uintptr_t line,
  1897.      uintptr_t dim_x,
  1898.      uintptr_t dim_y,
  1899.      uintptr_t pos_x,
  1900.      uintptr_t pos_y,
  1901.      kable (*solid) (uint16_t back_attr),
  1902.         uint16_t (*filter) ( uint16_t tilePixel,
  1903.                                         uint16_t backdrop,
  1904.                                       uint16_t coeff1, uint16_t coeff2, uint16_t back_att, uint16_t bld_map))
  1905. {
  1906.   uint16_t *oam;
  1907.   uintptr_t virtual_pos_x;
  1908.   uintptr_t virtual_pos_y;
  1909.   uintptr_t virtual_dim_x;
  1910.   uintptr_t virtual_dim_y;
  1911.   uintptr_t tbid;
  1912.   intptr_t calc_left;
  1913.   intptr_t calc_right;
  1914.   intptr_t calc_top;
  1915.   intptr_t calc_bottom;
  1916.   intptr_t check_left;
  1917.   intptr_t check_right;
  1918.   intptr_t check_top;
  1919.   intptr_t check_bottom;
  1920.   intptr_t affine_bx;
  1921.   intptr_t affine_by;
  1922.   intptr_t affine_dx;
  1923.   intptr_t affine_dy;
  1924.   intptr_t affine_dmx;
  1925.   intptr_t affine_dmy;
  1926.   intptr_t cnt;
  1927.   int16_t *affbank;
  1928.  
  1929.   oam = & oamb[slot <<2];
  1930.  
  1931.   /* Get affine params */
  1932.   affbank = (int16_t *)& oamb[(oam[1] >> 9 & 31) << 4];
  1933.   affine_dx =affbank[3] ;
  1934.   affine_dmx =affbank[3+4] ;
  1935.   affine_dy =affbank[3+8] ;
  1936.   affine_dmy =affbank[3+12] ;
  1937.  
  1938.   if (line >= 160 && line <= 255)
  1939.     return ;
  1940.  
  1941.   if (oam[0] & 0x200) {
  1942.     /* TODO: Scanline 128-160 BUG. */
  1943.     virtual_pos_x = pos_x;
  1944.     virtual_pos_y = pos_y;
  1945.     virtual_dim_x = dim_x << 1;
  1946.     virtual_dim_y = dim_y << 1;
  1947.     check_left = pos_x + (dim_x >> 1);
  1948.     check_top = dim_y >> 1;
  1949.   } else {
  1950.     virtual_pos_x = pos_x;
  1951.     virtual_pos_y = pos_y;
  1952.     virtual_dim_x = dim_x;
  1953.     virtual_dim_y = dim_y;
  1954.     check_left = pos_x;
  1955.     check_top = 0;
  1956.   }
  1957.   virtual_pos_x &= 511;
  1958.   virtual_pos_y &= 255;
  1959.  
  1960.   if (virtual_pos_x >= 240 && (virtual_pos_x + virtual_dim_x) <= 512)
  1961.     return ;
  1962.   if (virtual_pos_y >= 160 && (virtual_pos_y + virtual_dim_y) <= 256)
  1963.     return ;
  1964.  
  1965.   tbid = oam[2] & 1023;
  1966.   calc_left = virtual_pos_x;
  1967.   calc_right = virtual_pos_x + virtual_dim_x;
  1968.  
  1969.   if ((virtual_pos_y + virtual_dim_y) > 256) {
  1970.     /* Cross screen bottom to up.*/
  1971.     uintptr_t phase = virtual_pos_y + virtual_dim_y - 1 & 255;
  1972.     if (!(line >= 0 && line <= phase))
  1973.       return ;
  1974.     /* Make affine guard negtive */
  1975.     calc_top = virtual_pos_y - 256;
  1976.     check_top = check_top + pos_y - 256;
  1977.     /* Get Y Offset in tile
  1978.       x init vector always -dim_x/2 *affine param.
  1979.     */
  1980.   } else {
  1981.     if (line < virtual_pos_y) /* TODO : oam bug */
  1982.       return ;
  1983.     if (line > (virtual_pos_y + virtual_dim_y -1)) /* TODO : oam bug */
  1984.       return ;
  1985.  
  1986.     check_top += pos_y;
  1987.     calc_top = virtual_pos_y;
  1988.   }
  1989.   check_bottom = check_top+ dim_y;
  1990.   check_right = check_left + dim_x;
  1991.   calc_bottom = calc_top + virtual_dim_y;
  1992.  
  1993.   /* Calc init affine params */
  1994.   if (1) {
  1995.     /* Pos - Middle Point */
  1996.     intptr_t mid_x = (calc_left + calc_right) / 2;
  1997.     intptr_t mid_y = (calc_top + calc_bottom) / 2;
  1998.     intptr_t affoff_x = -(virtual_dim_x/2);
  1999.     intptr_t affoff_y = (intptr_t) line - mid_y;
  2000.  
  2001.     affine_bx = affine_dx*affoff_x+affine_dmx*affoff_y+mid_x*256;
  2002.     affine_by  = affine_dy*affoff_x+affine_dmy*affoff_y+mid_y*256;
  2003.   }
  2004.  
  2005.   if (oam[0] & 0x2000) {
  2006.     /* 256 PAL */
  2007.     for (cnt = virtual_pos_x; cnt != virtual_pos_x+virtual_dim_x; cnt++) {
  2008.       /* Check Range */
  2009.       intptr_t xpos = affine_bx >> 8;
  2010.       intptr_t ypos = affine_by >> 8;
  2011.  
  2012.       if (xpos >= check_left
  2013.         && xpos < check_right
  2014.          && ypos >= check_top
  2015.         && ypos < check_bottom)
  2016.       {
  2017.         intptr_t tile_x = xpos - check_left;
  2018.         intptr_t tile_y = ypos - check_top;
  2019.         intptr_t tile_mod8_x = tile_x & 7;
  2020.         intptr_t tile_mod8_y = tile_y & 7;
  2021.         intptr_t tid;
  2022.         uint8_t *chr;
  2023.         uint8_t memb;
  2024.         const uintptr_t ii = cnt&511;
  2025.  
  2026.         tile_x >>= 3;
  2027.         tile_y >>= 3;
  2028.  
  2029.         tid = tbid + tile_y * tile_pitch + tile_x *2;
  2030.         /* fetch tile mem */
  2031.         chr = (uint8_t *)GetSP_Chr256 (rtc, tid, tile_mod8_y);
  2032.         memb = chr[tile_mod8_x];
  2033.  
  2034.         if (solid (vattr_b[ii])) {
  2035.           if (memb) {
  2036.             vptr_b[ii] =  filter (pal256[memb], vptr_b[ii], coeff1, coeff2, vattr_b[ii], bld_map);
  2037.             vattr_b[ii] = ch_mode_mask;
  2038.           }
  2039.         }
  2040.       }
  2041.       affine_bx += affine_dx;
  2042.       affine_by += affine_dy;
  2043.     }
  2044.   } else {
  2045.     /* 16 PAL */
  2046.     for (cnt = virtual_pos_x; cnt != virtual_pos_x+virtual_dim_x; cnt++) {
  2047.       /* Check Range */
  2048.       intptr_t xpos = affine_bx >> 8;
  2049.       intptr_t ypos = affine_by >> 8;
  2050.  
  2051.       if (xpos >= check_left
  2052.         && xpos < check_right
  2053.          && ypos >= check_top
  2054.         && ypos < check_bottom)
  2055.       {
  2056.         intptr_t tile_x = xpos - check_left;
  2057.         intptr_t tile_y = ypos - check_top;
  2058.         intptr_t tile_mod8_x = tile_x & 7;
  2059.         intptr_t tile_mod8_y = tile_y & 7;
  2060.         intptr_t tid;
  2061.         uint8_t *chr;
  2062.         uint8_t memb;
  2063.         const uintptr_t ii = cnt&511;
  2064.  
  2065.         tile_x >>= 3;
  2066.         tile_y >>= 3;
  2067.  
  2068.         tid = tbid + tile_y * tile_pitch + tile_x;
  2069.         /* fetch tile mem */
  2070.         chr = (uint8_t *)GetSP_Chr16 (rtc, tid, tile_mod8_y);
  2071.         memb = chr[tile_mod8_x>>1];
  2072.         if (tile_mod8_x & 1)
  2073.           memb >>= 4;
  2074.         else
  2075.           memb &= 15;
  2076.  
  2077.         if (solid (vattr_b[ii])) {
  2078.           if (memb) {
  2079.             vptr_b[ii] =  filter (pal16[memb], vptr_b[ii], coeff1, coeff2, vattr_b[ii], bld_map);
  2080.             vattr_b[ii] = ch_mode_mask;
  2081.           }
  2082.         }
  2083.       }
  2084.       affine_bx += affine_dx;
  2085.       affine_by += affine_dy;
  2086.     }
  2087.   }
  2088. }
  2089.  
  2090. finline
  2091. void AffineMartixSP_Draw_Shadow (
  2092.            struct rasterizer_caps *rtc,
  2093.              uint16_t *oamb,
  2094.              uint16_t slot,
  2095.              uint16_t tile_pitch,
  2096.      uint16_t *vattr_b,
  2097.      uintptr_t line,
  2098.      uintptr_t dim_x,
  2099.      uintptr_t dim_y,
  2100.      uintptr_t pos_x,
  2101.      uintptr_t pos_y)
  2102. {
  2103.   uint16_t *oam;
  2104.   uintptr_t virtual_pos_x;
  2105.   uintptr_t virtual_pos_y;
  2106.   uintptr_t virtual_dim_x;
  2107.   uintptr_t virtual_dim_y;
  2108.   uintptr_t tbid;
  2109.   intptr_t calc_left;
  2110.   intptr_t calc_right;
  2111.   intptr_t calc_top;
  2112.   intptr_t calc_bottom;
  2113.   intptr_t check_left;
  2114.   intptr_t check_right;
  2115.   intptr_t check_top;
  2116.   intptr_t check_bottom;
  2117.   intptr_t affine_bx;
  2118.   intptr_t affine_by;
  2119.   intptr_t affine_dx;
  2120.   intptr_t affine_dy;
  2121.   intptr_t affine_dmx;
  2122.   intptr_t affine_dmy;
  2123.   intptr_t cnt;
  2124.   int16_t *affbank;
  2125.  
  2126.   oam = & oamb[slot <<2];
  2127.  
  2128.   /* Get affine params */
  2129.   affbank = (int16_t *)& oamb[(oam[1] >> 9 & 31) << 4];
  2130.   affine_dx =affbank[3] ;
  2131.   affine_dmx =affbank[3+4] ;
  2132.   affine_dy =affbank[3+8] ;
  2133.   affine_dmy =affbank[3+12] ;
  2134.  
  2135.   if (line >= 160 && line <= 255)
  2136.     return ;
  2137.  
  2138.   if (oam[0] & 0x200) {
  2139.     /* TODO: Scanline 128-160 BUG. */
  2140.     virtual_pos_x = pos_x;
  2141.     virtual_pos_y = pos_y;
  2142.     virtual_dim_x = dim_x << 1;
  2143.     virtual_dim_y = dim_y << 1;
  2144.     check_left = pos_x + (dim_x >> 1);
  2145.     check_top = dim_y >> 1;
  2146.   } else {
  2147.     virtual_pos_x = pos_x;
  2148.     virtual_pos_y = pos_y;
  2149.     virtual_dim_x = dim_x;
  2150.     virtual_dim_y = dim_y;
  2151.     check_left = pos_x;
  2152.     check_top = 0;
  2153.   }
  2154.   virtual_pos_x &= 511;
  2155.   virtual_pos_y &= 255;
  2156.  
  2157.   if (virtual_pos_x >= 240 && (virtual_pos_x + virtual_dim_x) <= 512)
  2158.     return ;
  2159.   if (virtual_pos_y >= 160 && (virtual_pos_y + virtual_dim_y) <= 256)
  2160.     return ;
  2161.  
  2162.   calc_left = virtual_pos_x;
  2163.   calc_right = virtual_pos_x + virtual_dim_x;
  2164.   tbid = oam[2] & 1023;
  2165.  
  2166.   if ((virtual_pos_y + virtual_dim_y) > 256) {
  2167.     /* Cross screen bottom to up.*/
  2168.     uintptr_t phase = virtual_pos_y + virtual_dim_y - 1 & 255;
  2169.     if (!(line >= 0 && line <= phase))
  2170.       return ;
  2171.     /* Make affine guard negtive */
  2172.     calc_top = virtual_pos_y - 256;
  2173.     check_top = check_top + pos_y - 256;
  2174.     /* Get Y Offset in tile
  2175.       x init vector always -dim_x/2 *affine param.
  2176.     */
  2177.   } else {
  2178.     if (line < virtual_pos_y) /* TODO : oam bug */
  2179.       return ;
  2180.     if (line > (virtual_pos_y + virtual_dim_y -1)) /* TODO : oam bug */
  2181.       return ;
  2182.  
  2183.     check_top += pos_y;
  2184.     calc_top = virtual_pos_y;
  2185.   }
  2186.   check_bottom = check_top+ dim_y;
  2187.   check_right = check_left + dim_x;
  2188.   calc_bottom = calc_top + virtual_dim_y;
  2189.  
  2190.   /* Calc init affine params */
  2191.   if (1) {
  2192.     /* Pos - Middle Point */
  2193.     intptr_t mid_x = (calc_left + calc_right) / 2;
  2194.     intptr_t mid_y = (calc_top + calc_bottom) / 2;
  2195.     intptr_t affoff_x = -(virtual_dim_x/2);
  2196.     intptr_t affoff_y = (intptr_t) line - mid_y;
  2197.  
  2198.     affine_bx = affine_dx*affoff_x+affine_dmx*affoff_y+mid_x*256;
  2199.     affine_by  = affine_dy*affoff_x+affine_dmy*affoff_y+mid_y*256;
  2200.   }
  2201.  
  2202.   if (oam[0] & 0x2000) {
  2203.     /* 256 PAL */
  2204.     for (cnt = virtual_pos_x; cnt != virtual_pos_x+virtual_dim_x; cnt++) {
  2205.       /* Check Range */
  2206.       intptr_t xpos = affine_bx >> 8;
  2207.       intptr_t ypos = affine_by >> 8;
  2208.  
  2209.       if (xpos >= check_left
  2210.         && xpos < check_right
  2211.          && ypos >= check_top
  2212.         && ypos < check_bottom)
  2213.       {
  2214.         intptr_t tile_x = xpos - check_left;
  2215.         intptr_t tile_y = ypos - check_top;
  2216.         intptr_t tile_mod8_x = tile_x & 7;
  2217.         intptr_t tile_mod8_y = tile_y & 7;
  2218.         intptr_t tid;
  2219.         uint8_t *chr;
  2220.         uint8_t memb;
  2221.         tile_x >>= 3;
  2222.         tile_y >>= 3;
  2223.  
  2224.         tid = tbid + tile_y * tile_pitch + tile_x *2;
  2225.         /* fetch tile mem */
  2226.         chr = (uint8_t *)GetSP_Chr256 (rtc, tid, tile_mod8_y);
  2227.         memb = chr[tile_mod8_x];
  2228.         if (memb) {
  2229.             vattr_b[cnt&255] |= SOLID_MODE_OBJWIN_MASK;
  2230.         }
  2231.       }
  2232.       affine_bx += affine_dx;
  2233.       affine_by += affine_dy;
  2234.     }
  2235.   } else {
  2236.     /* 16 PAL */
  2237.     for (cnt = virtual_pos_x; cnt != virtual_pos_x+virtual_dim_x; cnt++) {
  2238.       /* Check Range */
  2239.       intptr_t xpos = affine_bx >> 8;
  2240.       intptr_t ypos = affine_by >> 8;
  2241.  
  2242.       if (xpos >= check_left
  2243.         && xpos < check_right
  2244.          && ypos >= check_top
  2245.         && ypos < check_bottom)
  2246.       {
  2247.         intptr_t tile_x = xpos - check_left;
  2248.         intptr_t tile_y = ypos - check_top;
  2249.         intptr_t tile_mod8_x = tile_x & 7;
  2250.         intptr_t tile_mod8_y = tile_y & 7;
  2251.         intptr_t tid;
  2252.         uint8_t *chr;
  2253.         uint8_t memb;
  2254.         tile_x >>= 3;
  2255.         tile_y >>= 3;
  2256.  
  2257.         tid = tbid + tile_y * tile_pitch + tile_x;
  2258.         /* fetch tile mem */
  2259.         chr = (uint8_t *)GetSP_Chr16 (rtc, tid, tile_mod8_y);
  2260.         memb = chr[tile_mod8_x>>1];
  2261.         if (tile_mod8_x & 1)
  2262.           memb >>= 4;
  2263.         else
  2264.           memb &= 15;
  2265.  
  2266.         if (memb) {
  2267.           vattr_b[cnt&255] |= SOLID_MODE_OBJWIN_MASK;
  2268.         }
  2269.       }
  2270.       affine_bx += affine_dx;
  2271.       affine_by += affine_dy;
  2272.     }
  2273.   }
  2274. }
  2275.  
  2276. finline
  2277. void
  2278. StdSP_Render (struct rasterizer_caps *rtc,
  2279.                             const uint16_t mode_mask,
  2280.                             const uint16_t layerLevel, /* bit << 10 */
  2281.                             const kable en_effect,
  2282.                             const uint16_t bld_map,
  2283.                                  kable (*solid) (uint16_t back_attr),
  2284.                                  uint16_t coeff_b,
  2285.                             uint16_t coeff1,
  2286.                             uint16_t coeff2)
  2287. {
  2288.   intptr_t cnt;
  2289.   intptr_t line = rtc->gpu->line.blk;
  2290.   uint16_t *oam;
  2291.   intptr_t dim_x;
  2292.   intptr_t dim_y;
  2293.   int32_t x_coo;
  2294.   int32_t y_coo;
  2295.   intptr_t tile_id;
  2296.   int32_t tile_pitch;
  2297.   uint16_t *vattr_b = & rtc->gpu->vattr[ 8 ];
  2298.   uint16_t *vptr_b = & rtc->gpu->vbuf[rtc->gpu->vptr_pitch/2*rtc->gpu->line.blk + 8 ];
  2299.   uint16_t *vattr;
  2300.   uint16_t *vptr;
  2301.   uint16_t *vaptr;
  2302.   uint16_t *vaptr_b = & rtc->gpu->vptr_cahce[0];
  2303.   uintptr_t tbid_t;
  2304.   uint16_t *pal16;
  2305.   uint16_t *pal256 = (uint16_t *) & rtc->gpu->palette16_b[512];
  2306.   uint16_t ch_mode_mask = SOLID_CHAN_SPRITE_MASK |  mode_mask;
  2307.   void *oamb = & rtc->gpu->oam[0];
  2308.  
  2309.   /* Maybe useless ??*/
  2310.   // memcpy (vaptr_b, vptr_b, 512);
  2311.   /* check sprite mapper,  when tile switch to next line */
  2312.   if (rtc->gpu->ctl.blk & 0x40) {
  2313.     /* 1D Mappering. continuous mapping */
  2314.     tile_pitch = 0;
  2315.   } else {
  2316.     /* 2D Mappering. pitch mapping */
  2317.     tile_pitch = 32;
  2318.   }
  2319.  
  2320.   for (cnt = 127; cnt != (intptr_t) -1; cnt--) {  
  2321.  
  2322.     oam = (uint16_t *)& rtc->gpu->oam[cnt<<3];
  2323.     if (       ((oam[0] & 0x300 ) == 0x200)
  2324.       || (oam[0] & 0x800)
  2325.       || ((oam[2] & 0xC00) != layerLevel) )
  2326.       continue;
  2327.  
  2328.     switch ((oam[1] & 0xC000) | (oam[0] >> 2 & 0x3000)) {
  2329.     case 0x0000: dim_x = 8; dim_y = 8; break;
  2330.     case 0x1000: dim_x =16; dim_y = 8; break;
  2331.     case 0x2000: dim_x = 8; dim_y =16; break;
  2332.     case 0x4000: dim_x =16; dim_y =16; break;
  2333.     case 0x5000: dim_x =32; dim_y = 8; break;
  2334.     case 0x6000: dim_x = 8; dim_y =32; break;
  2335.     case 0x8000: dim_x =32; dim_y =32; break;
  2336.     case 0x9000: dim_x =32; dim_y =16; break;
  2337.     case 0xA000: dim_x =16; dim_y =32; break;
  2338.     case 0xC000: dim_x =64; dim_y =64; break;
  2339.     case 0xD000: dim_x =64; dim_y =32; break;
  2340.     case 0xE000: dim_x =32; dim_y =64; break;
  2341.     default:
  2342.       DEBUG_BREAK ();
  2343.     }
  2344.    
  2345.     x_coo = oam[1] & 511; /* X Wrapround ??*/
  2346.     y_coo = oam[0] & 255;
  2347.     tile_id = oam[2] & 1023;
  2348.     vattr = & vattr_b[x_coo];
  2349.     vptr = & vptr_b[x_coo];
  2350.     vaptr = & vaptr_b[x_coo];
  2351.     pal16 = & pal256[ (oam[2] >> 12) << 4];
  2352.  
  2353.     if (oam[0] & 0x2000) { /* 8bit pal~*/
  2354.        if ((tile_pitch & 32) == 0)
  2355.          tile_pitch = dim_x>> 2; /* XXX: mod 2^n~*/
  2356.        tile_id &= 1022;
  2357.     } else  if ((tile_pitch & 32) == 0)
  2358.          tile_pitch = dim_x>> 3;
  2359.  
  2360.     if (oam[1] & 0x2000) /* FIXME: maybe bug in complex attribute deal */
  2361.       tbid_t =tile_id + tile_pitch * (((line-(intptr_t) y_coo) & 0x80000000) ? ((dim_y - 1 - (  line-y_coo)) + 256) >> 3: (dim_y - 1 - (  line-y_coo)) >> 3);
  2362.     else
  2363.        tbid_t =tile_id + tile_pitch * (((line-(intptr_t) y_coo) & 0x80000000) ? (line+256-y_coo) >> 3: (line-y_coo) >> 3);
  2364.  
  2365.     if (oam[0] & 0x100) {
  2366.       if (oam[0] & 0x400) {
  2367.         /*  If alpha is specified, it always executes regardless of the rendering type */
  2368.         AffineMartixSP_Draw (rtc, ch_mode_mask, bld_map, (uint16_t *)oamb, cnt,
  2369.         tile_pitch,  
  2370.            vptr_b,
  2371.            vattr_b,
  2372.            pal16,
  2373.            pal256,
  2374.            coeff1,
  2375.            coeff2, line, dim_x, dim_y, x_coo, y_coo, solid, alpha16);
  2376.         continue;
  2377.       } else {
  2378.         switch (rtc->opca) {
  2379.         case 0:
  2380.         AffineMartixSP_Draw (rtc, ch_mode_mask, bld_map, (uint16_t *)oamb, cnt,
  2381.         tile_pitch,  
  2382.            vptr_b,
  2383.            vattr_b,
  2384.            pal16,
  2385.            pal256,
  2386.            coeff1,
  2387.            coeff2, line, dim_x, dim_y, x_coo, y_coo, solid, pixcpy);
  2388.           break;
  2389.         case 1:
  2390.         AffineMartixSP_Draw (rtc, ch_mode_mask, bld_map, (uint16_t *)oamb, cnt,
  2391.         tile_pitch,  
  2392.            vptr_b,
  2393.            vattr_b,
  2394.            pal16,
  2395.            pal256,
  2396.            coeff1,
  2397.            coeff2, line, dim_x, dim_y, x_coo, y_coo, solid, alpha16);
  2398.           break;
  2399.         case 2:
  2400.         AffineMartixSP_Draw (rtc, ch_mode_mask, bld_map, (uint16_t *)oamb, cnt,
  2401.         tile_pitch,  
  2402.            vptr_b,
  2403.            vattr_b,
  2404.            pal16,
  2405.            pal256,
  2406.            coeff_b,
  2407.            coeff_b, line, dim_x, dim_y, x_coo, y_coo, solid, brightness_inc16);
  2408.           break;
  2409.         case 3:
  2410.         AffineMartixSP_Draw (rtc, ch_mode_mask, bld_map, (uint16_t *)oamb, cnt,
  2411.         tile_pitch,
  2412.            vptr_b,
  2413.            vattr_b,
  2414.            pal16,
  2415.            pal256,
  2416.            coeff_b,
  2417.            coeff_b, line, dim_x, dim_y, x_coo, y_coo, solid, brightness_dec16);
  2418.           break;
  2419.         default:
  2420.           assert (0);
  2421.           break;
  2422.         }
  2423.       }
  2424.     } else {
  2425.       if (oam[0] & 0x400) {
  2426.         CommonSP_Draw (rtc, ch_mode_mask, bld_map,
  2427.            vptr_b,
  2428.            vattr_b,
  2429.            & oam[0],
  2430.            pal16,
  2431.            pal256,
  2432.            coeff1,
  2433.            coeff2, tbid_t, line, dim_x, dim_y, x_coo, y_coo, solid, alpha16);
  2434.         continue;
  2435.       } else {
  2436.         switch (rtc->opca) {
  2437.         case 0:
  2438.         CommonSP_Draw (rtc, ch_mode_mask, bld_map,
  2439.            vptr_b,
  2440.            vattr_b,
  2441.            & oam[0],
  2442.            pal16,
  2443.            pal256,
  2444.            coeff1,
  2445.            coeff2, tbid_t, line, dim_x, dim_y, x_coo, y_coo, solid, pixcpy);
  2446.           break;
  2447.         case 1:
  2448.         CommonSP_Draw (rtc, ch_mode_mask, bld_map,
  2449.            vptr_b,
  2450.            vattr_b,
  2451.            & oam[0],
  2452.            pal16,
  2453.            pal256,
  2454.            coeff1,
  2455.            coeff2, tbid_t, line, dim_x, dim_y, x_coo, y_coo, solid, alpha16);
  2456.           break;
  2457.         case 2:
  2458.         CommonSP_Draw (rtc, ch_mode_mask, bld_map,
  2459.            vptr_b,
  2460.            vattr_b,
  2461.            & oam[0],
  2462.            pal16,
  2463.            pal256,
  2464.            coeff_b,
  2465.            coeff_b, tbid_t, line, dim_x, dim_y, x_coo, y_coo, solid, brightness_inc16);
  2466.           break;
  2467.         case 3:
  2468.         CommonSP_Draw (rtc, ch_mode_mask, bld_map,
  2469.            vptr_b,
  2470.            vattr_b,
  2471.            & oam[0],
  2472.            pal16,
  2473.            pal256,
  2474.            coeff_b,
  2475.            coeff_b, tbid_t, line, dim_x, dim_y, x_coo, y_coo, solid, brightness_dec16);
  2476.           break;
  2477.         default:
  2478.           assert (0);
  2479.           break;
  2480.         }
  2481.       }
  2482.     }
  2483.   }
  2484. }
  2485.  
  2486. finline
  2487. void sp_render_shadow (struct rasterizer_caps *const rtc)
  2488. {
  2489.   intptr_t cnt;
  2490.   intptr_t line = rtc->gpu->line.blk;
  2491.   uint16_t *oam;
  2492.   intptr_t dim_x;
  2493.   intptr_t dim_y;
  2494.   int32_t x_coo;
  2495.   int32_t y_coo;
  2496.   intptr_t tile_id;
  2497.   int32_t tile_pitch;
  2498.   uint16_t *vattr_b = & rtc->gpu->vattr[ 8 ];
  2499.   uintptr_t tbid_t;
  2500.   void *oamb = & rtc->gpu->oam[0];
  2501.  
  2502.   /* check sprite mapper,  when tile switch to next line */
  2503.   if (rtc->gpu->ctl.blk & 0x40) {
  2504.     /* 1D Mappering. continuous mapping */
  2505.     tile_pitch = 0;
  2506.   } else {
  2507.     /* 2D Mappering. pitch mapping */
  2508.     tile_pitch = 32;
  2509.   }
  2510.  
  2511.   for (cnt = 127; cnt != (intptr_t) -1; cnt--) {
  2512.    
  2513.     oam = (uint16_t *)& rtc->gpu->oam[cnt<<3];
  2514.     if ((oam[0] & 0x300 ) == 0x200) {
  2515.       continue;
  2516.     } else if ((oam[0] & 0xC00) != 0x800)
  2517.       continue;
  2518.  
  2519.     switch ((oam[1] & 0xC000) | (oam[0] >> 2 & 0x3000)) {
  2520.     case 0x0000: dim_x = 8; dim_y = 8; break;
  2521.     case 0x1000: dim_x =16; dim_y = 8; break;
  2522.     case 0x2000: dim_x = 8; dim_y =16; break;
  2523.     case 0x4000: dim_x =16; dim_y =16; break;
  2524.     case 0x5000: dim_x =32; dim_y = 8; break;
  2525.     case 0x6000: dim_x = 8; dim_y =32; break;
  2526.     case 0x8000: dim_x =32; dim_y =32; break;
  2527.     case 0x9000: dim_x =32; dim_y =16; break;
  2528.     case 0xA000: dim_x =16; dim_y =32; break;
  2529.     case 0xC000: dim_x =64; dim_y =64; break;
  2530.     case 0xD000: dim_x =64; dim_y =32; break;
  2531.     case 0xE000: dim_x =32; dim_y =64; break;
  2532.     default:
  2533.       DEBUG_BREAK ();
  2534.     }
  2535.    
  2536.     x_coo = oam[1] & 511; /* X Wrapround ??*/
  2537.     y_coo = oam[0] & 255;
  2538.     tile_id = oam[2] & 1023;
  2539.  
  2540.     if (oam[0] & 0x2000) { /* 8bit pal~*/
  2541.        if ((tile_pitch & 32) == 0)
  2542.          tile_pitch = dim_x>> 2; /* XXX: mod 2^n~*/
  2543.     } else  if ((tile_pitch & 32) == 0)
  2544.          tile_pitch = dim_x>> 3;
  2545.  
  2546.     tbid_t = tile_id + tile_pitch * (((line-(intptr_t) y_coo) & 0x80000000) ? (line+256-y_coo) >> 3: (line-y_coo) >> 3); /* XXX:*/
  2547.  
  2548.     if (oam[0] & 0x100)
  2549.         AffineMartixSP_Draw_Shadow (rtc, (uint16_t *)oamb, cnt, tile_pitch,
  2550.            vattr_b, line, dim_x, dim_y, x_coo, y_coo);
  2551.     else
  2552.         CommonSP_Draw_Shadow (rtc, vattr_b, & oam[0],
  2553.               tbid_t, line, dim_x, dim_y, x_coo, y_coo);
  2554.   }
  2555. }
  2556. finline
  2557. kable solid_nowin (uint16_t attr)
  2558. {
  2559.   return true;
  2560. }
  2561. finline
  2562. kable solid_out (uint16_t attr)
  2563. {
  2564.   return (attr & SOLID_MODE_FULL_MASK) ? false : true;
  2565. }
  2566. finline
  2567. kable solid_win1 (uint16_t attr)
  2568. {
  2569.   return (attr & SOLID_MODE_WIN1_MASK) ? true : false;
  2570. }
  2571. finline
  2572. kable solid_win0 (uint16_t attr)
  2573. {
  2574.   return (attr & SOLID_MODE_WIN0_MASK) ? true : false;
  2575. }
  2576. finline
  2577. kable solid_shadow (uint16_t attr)
  2578. {
  2579.   return (attr & SOLID_MODE_OBJWIN_MASK) ? true : false;
  2580. }
  2581. static
  2582. void StdBG_Render_shadow
  2583.             (struct rasterizer_caps *rtc,
  2584.                    uint16_t chan_mask,
  2585.                    uint16_t bld_map,
  2586.                    uint16_t coeff1, uint16_t coeff2)
  2587. {
  2588.   switch (rtc->opca) {
  2589.   case 0: CommonBG_Render (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, pixcpy); break;
  2590.   case 1: CommonBG_Render (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, alpha16); break;
  2591.   case 2: CommonBG_Render (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, brightness_inc16); break;
  2592.   case 3: CommonBG_Render (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, brightness_dec16); break;
  2593.   default: DEBUG_BREAK ();
  2594.   }
  2595. }
  2596. static
  2597. void StdBG_Render_nowindow
  2598.             (struct rasterizer_caps *rtc,
  2599.                                uint16_t chan_mask,
  2600.                    uint16_t bld_map,
  2601.                             uint16_t coeff1, uint16_t coeff2)
  2602. {
  2603.   switch (rtc->opca) {
  2604.   case 0: CommonBG_Render (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, pixcpy); break;
  2605.   case 1: CommonBG_Render (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, alpha16); break;
  2606.   case 2: CommonBG_Render (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, brightness_inc16); break;
  2607.   case 3: CommonBG_Render (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, brightness_dec16); break;
  2608.   default: DEBUG_BREAK ();
  2609.   }
  2610. }
  2611. static
  2612. void StdBG_Render_win0
  2613.             (struct rasterizer_caps *rtc,
  2614.                                uint16_t chan_mask,
  2615.                    uint16_t bld_map,
  2616.                             uint16_t coeff1, uint16_t coeff2)
  2617. {
  2618.   switch (rtc->opca) {
  2619.   case 0: CommonBG_Render (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, pixcpy); break;
  2620.   case 1: CommonBG_Render (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, alpha16); break;
  2621.   case 2: CommonBG_Render (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, brightness_inc16); break;
  2622.   case 3: CommonBG_Render (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, brightness_dec16); break;
  2623.   default: DEBUG_BREAK ();
  2624.   }
  2625. }
  2626. static
  2627. void StdBG_Render_win1
  2628.             (struct rasterizer_caps *rtc,
  2629.                                uint16_t chan_mask,
  2630.                    uint16_t bld_map,
  2631.                             uint16_t coeff1, uint16_t coeff2)
  2632. {
  2633.   switch (rtc->opca) {
  2634.   case 0: CommonBG_Render (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, pixcpy); break;
  2635.   case 1: CommonBG_Render (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, alpha16); break;
  2636.   case 2: CommonBG_Render (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, brightness_inc16); break;
  2637.   case 3: CommonBG_Render (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, brightness_dec16); break;
  2638.   default: DEBUG_BREAK ();
  2639.   }
  2640. }
  2641. static
  2642. void StdBG_Render_outwin
  2643.             (struct rasterizer_caps *rtc,
  2644.                                uint16_t chan_mask,
  2645.                    uint16_t bld_map,
  2646.                             uint16_t coeff1, uint16_t coeff2)
  2647. {
  2648.   switch (rtc->opca) {
  2649.   case 0: CommonBG_Render (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, pixcpy); break;
  2650.   case 1: CommonBG_Render (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, alpha16); break;
  2651.   case 2: CommonBG_Render (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, brightness_inc16); break;
  2652.   case 3: CommonBG_Render (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, brightness_dec16); break;
  2653.   default: DEBUG_BREAK ();
  2654.   }
  2655. }
  2656. static
  2657. void StdBG_Render_Rot_shadow
  2658.             (struct rasterizer_caps *rtc,
  2659.                                uint16_t chan_mask,
  2660.                    uint16_t bld_map,
  2661.                             uint16_t coeff1, uint16_t coeff2)
  2662. {
  2663.   switch (rtc->opca) {
  2664.   case 0: CommonBG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, pixcpy); break;
  2665.   case 1: CommonBG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, alpha16); break;
  2666.   case 2: CommonBG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, brightness_inc16); break;
  2667.   case 3: CommonBG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, brightness_dec16); break;
  2668.   default: DEBUG_BREAK ();
  2669.   }
  2670. }
  2671. static
  2672. void StdBG_Render_Rot_win0
  2673.             (struct rasterizer_caps *rtc,
  2674.                                uint16_t chan_mask,
  2675.                    uint16_t bld_map,
  2676.                             uint16_t coeff1, uint16_t coeff2)
  2677. {
  2678.   switch (rtc->opca) {
  2679.   case 0: CommonBG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, pixcpy); break;
  2680.   case 1: CommonBG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, alpha16); break;
  2681.   case 2: CommonBG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, brightness_inc16); break;
  2682.   case 3: CommonBG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, brightness_dec16); break;
  2683.   default: DEBUG_BREAK ();
  2684.   }
  2685. }
  2686. static
  2687. void StdBG_Render_Rot_win1
  2688.             (struct rasterizer_caps *rtc,
  2689.                                uint16_t chan_mask,
  2690.                    uint16_t bld_map,
  2691.                             uint16_t coeff1, uint16_t coeff2)
  2692. {
  2693.   switch (rtc->opca) {
  2694.   case 0: CommonBG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, pixcpy); break;
  2695.   case 1: CommonBG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, alpha16); break;
  2696.   case 2: CommonBG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, brightness_inc16); break;
  2697.   case 3: CommonBG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, brightness_dec16); break;
  2698.   default: DEBUG_BREAK ();
  2699.   }
  2700. }
  2701. static
  2702. void StdBG_Render_Rot_outwin
  2703.             (struct rasterizer_caps *rtc,
  2704.                                uint16_t chan_mask,
  2705.                    uint16_t bld_map,
  2706.                             uint16_t coeff1, uint16_t coeff2)
  2707. {
  2708.   switch (rtc->opca) {
  2709.   case 0: CommonBG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, pixcpy); break;
  2710.   case 1: CommonBG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, alpha16); break;
  2711.   case 2: CommonBG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, brightness_inc16); break;
  2712.   case 3: CommonBG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, brightness_dec16); break;
  2713.   default: DEBUG_BREAK ();
  2714.   }
  2715. }
  2716. static
  2717. void StdBG_Render_Rot_nowindow
  2718.             (struct rasterizer_caps *rtc,
  2719.                                uint16_t chan_mask,
  2720.                    uint16_t bld_map,
  2721.                             uint16_t coeff1, uint16_t coeff2)
  2722. {
  2723.   switch (rtc->opca) {
  2724.   case 0: CommonBG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, pixcpy); break;
  2725.   case 1: CommonBG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, alpha16); break;
  2726.   case 2: CommonBG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, brightness_inc16); break;
  2727.   case 3: CommonBG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, brightness_dec16); break;
  2728.   default: DEBUG_BREAK ();
  2729.   }
  2730. }
  2731. static
  2732. void Mode3_Render_shadow
  2733.             (struct rasterizer_caps *rtc,
  2734.                    uint16_t chan_mask,
  2735.                    uint16_t bld_map,
  2736.                    uint16_t coeff1, uint16_t coeff2)
  2737. {
  2738.   switch (rtc->opca) {
  2739.   case 0: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, pixcpy); break;
  2740.   case 1: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, alpha16); break;
  2741.   case 2: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, brightness_inc16); break;
  2742.   case 3: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, brightness_dec16); break;
  2743.   default: DEBUG_BREAK ();
  2744.   }
  2745. }
  2746. static
  2747. void Mode3_Render_nowindow
  2748.             (struct rasterizer_caps *rtc,
  2749.                                uint16_t chan_mask,
  2750.                    uint16_t bld_map,
  2751.                             uint16_t coeff1, uint16_t coeff2)
  2752. {
  2753.   switch (rtc->opca) {
  2754.   case 0: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, pixcpy); break;
  2755.   case 1: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, alpha16); break;
  2756.   case 2: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, brightness_inc16); break;
  2757.   case 3: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, brightness_dec16); break;
  2758.   default: DEBUG_BREAK ();
  2759.   }
  2760. }
  2761. static
  2762. void Mode3_Render_win0
  2763.             (struct rasterizer_caps *rtc,
  2764.                                uint16_t chan_mask,
  2765.                    uint16_t bld_map,
  2766.                             uint16_t coeff1, uint16_t coeff2)
  2767. {
  2768.   switch (rtc->opca) {
  2769.   case 0: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, pixcpy); break;
  2770.   case 1: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, alpha16); break;
  2771.   case 2: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, brightness_inc16); break;
  2772.   case 3: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, brightness_dec16); break;
  2773.   default: DEBUG_BREAK ();
  2774.   }
  2775. }
  2776. static
  2777. void Mode3_Render_win1
  2778.             (struct rasterizer_caps *rtc,
  2779.                                uint16_t chan_mask,
  2780.                    uint16_t bld_map,
  2781.                             uint16_t coeff1, uint16_t coeff2)
  2782. {
  2783.   switch (rtc->opca) {
  2784.   case 0: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, pixcpy); break;
  2785.   case 1: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, alpha16); break;
  2786.   case 2: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, brightness_inc16); break;
  2787.   case 3: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, brightness_dec16); break;
  2788.   default: DEBUG_BREAK ();
  2789.   }
  2790. }
  2791. static
  2792. void Mode3_Render_outwin
  2793.             (struct rasterizer_caps *rtc,
  2794.                                uint16_t chan_mask,
  2795.                    uint16_t bld_map,
  2796.                             uint16_t coeff1, uint16_t coeff2)
  2797. {
  2798.   switch (rtc->opca) {
  2799.   case 0: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, pixcpy); break;
  2800.   case 1: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, alpha16); break;
  2801.   case 2: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, brightness_inc16); break;
  2802.   case 3: Mode3Bitmap8Full_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, brightness_dec16); break;
  2803.   default: DEBUG_BREAK ();
  2804.   }
  2805. }
  2806. static
  2807. void Mode4_Render_shadow
  2808.             (struct rasterizer_caps *rtc,
  2809.                    uint16_t chan_mask,
  2810.                    uint16_t bld_map,
  2811.                    uint16_t coeff1, uint16_t coeff2)
  2812. {
  2813.   switch (rtc->opca) {
  2814.   case 0: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, pixcpy); break;
  2815.   case 1: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, alpha16); break;
  2816.   case 2: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, brightness_inc16); break;
  2817.   case 3: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, brightness_dec16); break;
  2818.   default: DEBUG_BREAK ();
  2819.   }
  2820. }
  2821. static
  2822. void Mode4_Render_nowindow
  2823.             (struct rasterizer_caps *rtc,
  2824.                                uint16_t chan_mask,
  2825.                    uint16_t bld_map,
  2826.                             uint16_t coeff1, uint16_t coeff2)
  2827. {
  2828.   switch (rtc->opca) {
  2829.   case 0: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, pixcpy); break;
  2830.   case 1: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, alpha16); break;
  2831.   case 2: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, brightness_inc16); break;
  2832.   case 3: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, brightness_dec16); break;
  2833.   default: DEBUG_BREAK ();
  2834.   }
  2835. }
  2836. static
  2837. void Mode4_Render_win0
  2838.             (struct rasterizer_caps *rtc,
  2839.                                uint16_t chan_mask,
  2840.                    uint16_t bld_map,
  2841.                             uint16_t coeff1, uint16_t coeff2)
  2842. {
  2843.   switch (rtc->opca) {
  2844.   case 0: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, pixcpy); break;
  2845.   case 1: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, alpha16); break;
  2846.   case 2: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, brightness_inc16); break;
  2847.   case 3: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, brightness_dec16); break;
  2848.   default: DEBUG_BREAK ();
  2849.   }
  2850. }
  2851. static
  2852. void Mode4_Render_win1
  2853.             (struct rasterizer_caps *rtc,
  2854.                                uint16_t chan_mask,
  2855.                    uint16_t bld_map,
  2856.                             uint16_t coeff1, uint16_t coeff2)
  2857. {
  2858.   switch (rtc->opca) {
  2859.   case 0: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, pixcpy); break;
  2860.   case 1: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, alpha16); break;
  2861.   case 2: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, brightness_inc16); break;
  2862.   case 3: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, brightness_dec16); break;
  2863.   default: DEBUG_BREAK ();
  2864.   }
  2865. }
  2866. static
  2867. void Mode4_Render_outwin
  2868.             (struct rasterizer_caps *rtc,
  2869.                                uint16_t chan_mask,
  2870.                    uint16_t bld_map,
  2871.                             uint16_t coeff1, uint16_t coeff2)
  2872. {
  2873.   switch (rtc->opca) {
  2874.   case 0: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, pixcpy); break;
  2875.   case 1: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, alpha16); break;
  2876.   case 2: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, brightness_inc16); break;
  2877.   case 3: Mode4Bitmap4SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, brightness_dec16); break;
  2878.   default: DEBUG_BREAK ();
  2879.   }
  2880. }
  2881. static
  2882. void Mode5_Render_shadow
  2883.             (struct rasterizer_caps *rtc,
  2884.                    uint16_t chan_mask,
  2885.                    uint16_t bld_map,
  2886.                    uint16_t coeff1, uint16_t coeff2)
  2887. {
  2888.   switch (rtc->opca) {
  2889.   case 0: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, pixcpy); break;
  2890.   case 1: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, alpha16); break;
  2891.   case 2: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, brightness_inc16); break;
  2892.   case 3: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_OBJWIN_MASK, bld_map, coeff1, coeff2, solid_shadow, brightness_dec16); break;
  2893.   default: DEBUG_BREAK ();
  2894.   }
  2895. }
  2896. static
  2897. void Mode5_Render_nowindow
  2898.             (struct rasterizer_caps *rtc,
  2899.                                uint16_t chan_mask,
  2900.                    uint16_t bld_map,
  2901.                             uint16_t coeff1, uint16_t coeff2)
  2902. {
  2903.   switch (rtc->opca) {
  2904.   case 0: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, pixcpy); break;
  2905.   case 1: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, alpha16); break;
  2906.   case 2: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, brightness_inc16); break;
  2907.   case 3: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_nowin, brightness_dec16); break;
  2908.   default: DEBUG_BREAK ();
  2909.   }
  2910. }
  2911. static
  2912. void Mode5_Render_win0
  2913.             (struct rasterizer_caps *rtc,
  2914.                                uint16_t chan_mask,
  2915.                    uint16_t bld_map,
  2916.                             uint16_t coeff1, uint16_t coeff2)
  2917. {
  2918.   switch (rtc->opca) {
  2919.   case 0: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, pixcpy); break;
  2920.   case 1: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, alpha16); break;
  2921.   case 2: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, brightness_inc16); break;
  2922.   case 3: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN0_MASK, bld_map, coeff1, coeff2, solid_win0, brightness_dec16); break;
  2923.   default: DEBUG_BREAK ();
  2924.   }
  2925. }
  2926. static
  2927. void Mode5_Render_win1
  2928.             (struct rasterizer_caps *rtc,
  2929.                                uint16_t chan_mask,
  2930.                    uint16_t bld_map,
  2931.                             uint16_t coeff1, uint16_t coeff2)
  2932. {
  2933.   switch (rtc->opca) {
  2934.   case 0: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, pixcpy); break;
  2935.   case 1: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, alpha16); break;
  2936.   case 2: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, brightness_inc16); break;
  2937.   case 3: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask | SOLID_MODE_WIN1_MASK, bld_map, coeff1, coeff2, solid_win1, brightness_dec16); break;
  2938.   default: DEBUG_BREAK ();
  2939.   }
  2940. }
  2941. static
  2942. void Mode5_Render_outwin
  2943.             (struct rasterizer_caps *rtc,
  2944.                                uint16_t chan_mask,
  2945.                    uint16_t bld_map,
  2946.                             uint16_t coeff1, uint16_t coeff2)
  2947. {
  2948.   switch (rtc->opca) {
  2949.   case 0: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, pixcpy); break;
  2950.   case 1: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, alpha16); break;
  2951.   case 2: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, brightness_inc16); break;
  2952.   case 3: Mode5Bitmap8SwapBuffer_BG_Render_Rot (rtc, chan_mask, bld_map, coeff1, coeff2, solid_out, brightness_dec16); break;
  2953.   default: DEBUG_BREAK ();
  2954.   }
  2955. }
  2956.  
  2957. finline
  2958. void vec_memset16a512b (void *const mem, const uint16_t mask) {
  2959.   __m128i maskvec = _mm_shufflelo_epi16 (*(__m128i *)& mask, 0); /* TODO: pshufb done */
  2960.   __m128i *smemptr = (__m128i *) mem;
  2961.   maskvec = _mm_unpacklo_epi16 (maskvec, maskvec);
  2962.  
  2963. #undef SIMD_GUP_
  2964. #define SIMD_GUP_(x)\
  2965.   _mm_store_si128 (& smemptr[(x)+0], maskvec);\
  2966.   _mm_store_si128 (& smemptr[(x)+1], maskvec);\
  2967.   _mm_store_si128 (& smemptr[(x)+2], maskvec);\
  2968.   _mm_store_si128 (& smemptr[(x)+3], maskvec);\
  2969.   _mm_store_si128 (& smemptr[(x)+4], maskvec);\
  2970.   _mm_store_si128 (& smemptr[(x)+5], maskvec);\
  2971.   _mm_store_si128 (& smemptr[(x)+6], maskvec);\
  2972.   _mm_store_si128 (& smemptr[(x)+7], maskvec);
  2973.  
  2974.   SIMD_GUP_(0)
  2975.   SIMD_GUP_(8)
  2976.   SIMD_GUP_(16)
  2977.   SIMD_GUP_(24)
  2978. }
  2979.  
  2980. void win_render (struct gpu *gpu, uintptr_t *sort4_t,
  2981.                     struct rasterizer_caps *rtinfos_t,
  2982.                       const uint16_t mode_mask,
  2983.                       const uint16_t winIo_Shift, /* require: must shift to high for win0/outwin io register */
  2984.                       const uint16_t coeff_b,
  2985.                       const uint16_t coeff_a1,
  2986.                       const uint16_t coeff_a2,
  2987.                       const uint16_t mode,
  2988.                                uint16_t en_bgmask,
  2989.                                kable (*sp_solid) (uint16_t back_attr),
  2990.                                void (*bg_render_std) (struct rasterizer_caps *rtc,  uint16_t chan_mask, uint16_t bld_map, uint16_t coeff1, uint16_t coeff2),
  2991.                                void (*bg_render_rot) (struct rasterizer_caps *rtc,  uint16_t chan_mask, uint16_t bld_map, uint16_t coeff1, uint16_t coeff2),
  2992.                                void (*mode3_render_rot) (struct rasterizer_caps *rtc,  uint16_t chan_mask, uint16_t bld_map, uint16_t coeff1, uint16_t coeff2),
  2993.                                void (*mode4_render_rot) (struct rasterizer_caps *rtc,  uint16_t chan_mask, uint16_t bld_map, uint16_t coeff1, uint16_t coeff2),                            
  2994.                                void (*mode5_render_rot) (struct rasterizer_caps *rtc,  uint16_t chan_mask, uint16_t bld_map, uint16_t coeff1, uint16_t coeff2) )
  2995. {
  2996.   struct rasterizer_caps rtinfos[5];
  2997.   uintptr_t sort[4];
  2998.   uintptr_t layer_cur;
  2999.   uintptr_t layer_van;
  3000.   uintptr_t cnt;
  3001.   uint16_t bld_map;
  3002.   uint16_t spbld_mask;
  3003.   const uintptr_t enb_mask = 0x100;
  3004.   const uintptr_t sp_mask =  enb_mask << 4;
  3005.   const uintptr_t eff_mask = 0x2000;
  3006.   const kable en_sprite = (en_bgmask & sp_mask) && (winIo_Shift & sp_mask);
  3007.   const kable en_effect = !! (winIo_Shift & eff_mask);
  3008.  
  3009.   memcpy (rtinfos, rtinfos_t, sizeof (rtinfos));
  3010.   memcpy (sort, sort4_t, sizeof (sort));
  3011.  
  3012.   layer_cur = 3;
  3013.   layer_van = 3;
  3014.   spbld_mask = 0;
  3015.  
  3016.   if (gpu->bld_ctl.blk & sp_mask)
  3017.     spbld_mask |= SOLID_CHAN_SPRITE_MASK;
  3018.  
  3019.   if (en_effect == false) {
  3020.     rtinfos[0].opca =
  3021.     rtinfos[1].opca =
  3022.     rtinfos[2].opca =
  3023.     rtinfos[3].opca =
  3024.     rtinfos[4].opca = 0;
  3025.   }
  3026.  
  3027. #define CALC_PREV_PRI_BLD()                   \
  3028.   do {                                        \
  3029.     uintptr_t id;                             \
  3030.     bld_map = 0;                              \
  3031.     for (  id = 0; id != cnt; id++) {         \
  3032.          const uintptr_t ch_ac = sort[id] & 3; \
  3033.          if (gpu->bld_ctl.blk & enb_mask << ch_ac)   \
  3034.            bld_map |= SOLID_CHAN_BG_BASE_MASK << ch_ac; \
  3035.     }                                                   \
  3036.   } while (0)
  3037.   for (cnt = 0; cnt != 4; cnt++) {
  3038.     const uintptr_t bga_isa = sort[cnt];
  3039.     const uintptr_t ch = bga_isa & 3;
  3040.     const uintptr_t en_mask = enb_mask << ch;
  3041.  
  3042.     if (en_bgmask & winIo_Shift & en_mask) {
  3043.  
  3044.       if (  en_sprite != false ) {
  3045.  
  3046.         layer_van = layer_cur;
  3047.         layer_cur = bga_isa >> 8 & 3;
  3048.        
  3049.         /* Shadow window enable sprite ??*/
  3050.         if (layer_van != layer_cur) {
  3051.           uintptr_t layer_start;
  3052.          
  3053.           for ( layer_start = layer_van;
  3054.                 layer_start != layer_cur;
  3055.                 layer_start--){
  3056.           /* Collect target 2 blend/alpha mask  
  3057.                 It's important to assume that sprite doesn't mix with itself.
  3058.           */
  3059.                 CALC_PREV_PRI_BLD();
  3060.                
  3061.                 StdSP_Render (& rtinfos[4], mode_mask,
  3062.                   layer_start << 10,
  3063.                         en_effect,   bld_map,
  3064.                  sp_solid,
  3065.                   coeff_b,  coeff_a1,  coeff_a2);
  3066.           }
  3067.         }
  3068.       }
  3069.       CALC_PREV_PRI_BLD();
  3070.       bld_map |= spbld_mask;
  3071.      // if (ch == 0)
  3072.      //   continue;
  3073.       switch (mode) {
  3074.       case 1:
  3075.         if (ch != 2)
  3076.       case 0:
  3077.           bg_render_std (& rtinfos[ch], SOLID_CHAN_BG_BASE_MASK << ch, bld_map,  rtinfos[ch].opca == 1 ? coeff_a1 : coeff_b, coeff_a2);
  3078.         else
  3079.       case 2:
  3080.           bg_render_rot (& rtinfos[ch], SOLID_CHAN_BG_BASE_MASK << ch, bld_map,  rtinfos[ch].opca == 1 ? coeff_a1 : coeff_b, coeff_a2);
  3081.         break;
  3082.       case 3:
  3083.         if (ch == 2)
  3084.           mode3_render_rot (& rtinfos[ch], SOLID_CHAN_BG_BASE_MASK << ch, bld_map,  rtinfos[ch].opca == 1 ? coeff_a1 : coeff_b, coeff_a2);
  3085.         break;
  3086.       case 4:
  3087.         if (ch == 2)
  3088.           mode4_render_rot (& rtinfos[ch], SOLID_CHAN_BG_BASE_MASK << ch, bld_map,  rtinfos[ch].opca == 1 ? coeff_a1 : coeff_b, coeff_a2);
  3089.         break;
  3090.       case 5:
  3091.         if (ch == 2)
  3092.           mode5_render_rot (& rtinfos[ch], SOLID_CHAN_BG_BASE_MASK << ch, bld_map,  rtinfos[ch].opca == 1 ? coeff_a1 : coeff_b, coeff_a2);
  3093.       default:
  3094.         break;
  3095.       }
  3096.     }
  3097.   }
  3098.   if (en_sprite != false) {
  3099.     CALC_PREV_PRI_BLD();
  3100.    
  3101.     for ( ; layer_cur != (uintptr_t) -1;
  3102.              layer_cur--){
  3103.  
  3104.           StdSP_Render (& rtinfos[4], mode_mask,
  3105.             layer_cur << 10,
  3106.                   en_effect,   bld_map,
  3107.             sp_solid,
  3108.             coeff_b,  coeff_a1,  coeff_a2);
  3109.     }
  3110.   }
  3111. }
  3112.  
  3113. finline
  3114. void nowin_render (struct gpu * gpu, uintptr_t * sort4_t,
  3115.                     struct rasterizer_caps * rtinfos_t,
  3116.                       const uint16_t coeff_b,
  3117.                       const uint16_t coeff_a1,
  3118.                       const uint16_t coeff_a2,
  3119.                       const uint16_t mode,
  3120.                                uint16_t en_bgmask )
  3121. {
  3122.   struct rasterizer_caps rtinfos[5];
  3123.   uintptr_t sort[4];
  3124.   uintptr_t layer_cur;
  3125.   uintptr_t layer_van;
  3126.   uintptr_t cnt;
  3127.   uint16_t bld_map;
  3128.   uint16_t spbld_mask;
  3129.   const uintptr_t enb_mask = 0x100;
  3130.   const uintptr_t sp_mask =  enb_mask << 4;
  3131.   const kable en_sprite = !! (en_bgmask & sp_mask);
  3132.  
  3133.   memcpy (rtinfos, rtinfos_t, sizeof (rtinfos));
  3134.   memcpy (sort, sort4_t, sizeof (sort));
  3135.  
  3136.   layer_cur = 3;
  3137.   layer_van = 3;
  3138.   spbld_mask = 0;
  3139.  
  3140.   if (gpu->bld_ctl.blk & sp_mask)
  3141.     spbld_mask |= SOLID_CHAN_SPRITE_MASK;
  3142.  
  3143.   for (cnt = 0; cnt != 4; cnt++) {
  3144.     const uintptr_t bga_isa = sort[cnt];
  3145.     const uintptr_t ch = bga_isa & 3;
  3146.     const uintptr_t en_mask = enb_mask << ch;
  3147.  
  3148.     if (en_bgmask & en_mask) {
  3149.  
  3150.       if (  en_sprite != false ) {
  3151.  
  3152.         layer_van = layer_cur;
  3153.         layer_cur = bga_isa >> 8 & 3;
  3154.        
  3155.         /* Shadow window enable sprite ??*/
  3156.         if (layer_van != layer_cur) {
  3157.           uintptr_t layer_start;
  3158.          
  3159.           for ( layer_start = layer_van;
  3160.                 layer_start != layer_cur;
  3161.                 layer_start--){
  3162.           /* Collect target 2 blend/alpha mask  
  3163.                 It's important to assume that sprite doesn't mix with itself.
  3164.           */
  3165.                 CALC_PREV_PRI_BLD();
  3166.                
  3167.                 StdSP_Render (& rtinfos[4], 0,
  3168.                   layer_start << 10,
  3169.                         true,   bld_map,
  3170.                  solid_nowin,
  3171.                   coeff_b,  coeff_a1,  coeff_a2);
  3172.           }
  3173.         }
  3174.       }
  3175.       CALC_PREV_PRI_BLD();
  3176.       bld_map |= spbld_mask;
  3177.       // if (ch != 0)
  3178.      //   continue;
  3179.       switch (mode) {
  3180.       case 1:
  3181.         if (ch != 2)
  3182.       case 0:
  3183.           StdBG_Render_nowindow (& rtinfos[ch], SOLID_CHAN_BG_BASE_MASK << ch, bld_map,  rtinfos[ch].opca == 1 ? coeff_a1 : coeff_b, coeff_a2);
  3184.         else
  3185.       case 2:
  3186.           StdBG_Render_Rot_nowindow (& rtinfos[ch], SOLID_CHAN_BG_BASE_MASK << ch, bld_map,  rtinfos[ch].opca == 1 ? coeff_a1 : coeff_b, coeff_a2);
  3187.         break;
  3188.       case 3:
  3189.         if (ch == 2)
  3190.           Mode3_Render_nowindow (& rtinfos[ch], SOLID_CHAN_BG_BASE_MASK << ch, bld_map,  rtinfos[ch].opca == 1 ? coeff_a1 : coeff_b, coeff_a2);
  3191.         break;
  3192.       case 4:
  3193.         if (ch == 2)
  3194.           Mode4_Render_nowindow (& rtinfos[ch], SOLID_CHAN_BG_BASE_MASK << ch, bld_map,  rtinfos[ch].opca == 1 ? coeff_a1 : coeff_b, coeff_a2);
  3195.         break;
  3196.       case 5:
  3197.         if (ch == 2)
  3198.           Mode5_Render_nowindow (& rtinfos[ch], SOLID_CHAN_BG_BASE_MASK << ch, bld_map,  rtinfos[ch].opca == 1 ? coeff_a1 : coeff_b, coeff_a2);
  3199.       default:
  3200.         break;
  3201.       }
  3202.     }
  3203.   }
  3204.   if (en_sprite != false) {
  3205.     CALC_PREV_PRI_BLD();
  3206.    
  3207.     for ( ; layer_cur != (uintptr_t) -1;
  3208.              layer_cur--){
  3209.  
  3210.           StdSP_Render (& rtinfos[4], 0,
  3211.             layer_cur << 10,
  3212.                   true,   bld_map,
  3213.             solid_nowin,
  3214.             coeff_b,  coeff_a1,  coeff_a2);
  3215.     }
  3216.   }
  3217. }
  3218.  
  3219. void gpu_scanline (struct gpu *const gpu) {
  3220.  
  3221.   struct rasterizer_caps rtinfos[5];
  3222.  
  3223.   kable win_done = false;
  3224.   kable win0_done = false;
  3225.   kable win1_done = false;
  3226.   kable winobj_done = false;
  3227.   uintptr_t bg_enable_mask = gpu->ctl.blk;
  3228.   uintptr_t cnt;          
  3229.   uintptr_t sort[4] = { 0x0000 | gpu->chan[0].ctl.blk << 8 & 0x300,                    
  3230.                                  0x0001 | gpu->chan[1].ctl.blk << 8 & 0x300,
  3231.                                  0x0002 | gpu->chan[2].ctl.blk << 8 & 0x300,
  3232.                                  0x0003 | gpu->chan[3].ctl.blk << 8 & 0x300 };
  3233.   uintptr_t n;
  3234.  
  3235.   const uintptr_t gpu_mode = gpu->ctl.blk & 7;
  3236.   const uint16_t coeff_b_t =  gpu->bri_args.blk & 31;
  3237.   const uint16_t coeff_a1_t = gpu->bld_args.blk & 31;
  3238.   const uint16_t coeff_a2_t = gpu->bld_args.blk >> 8 & 31;
  3239.   const uint16_t coeff_b =  (coeff_b_t > 16) ? 16 : coeff_b_t;
  3240.   const uint16_t coeff_a1 = (coeff_a1_t > 16) ? 16 : coeff_a1_t;
  3241.   const uint16_t coeff_a2 = (coeff_a2_t > 16) ? 16 : coeff_a2_t;
  3242.            uint16_t *const vbptr = & gpu->vbuf[gpu->vptr_pitch/2*gpu->line.blk + 8];
  3243.  
  3244.   if (gpu->ctl.blk & 0x2000)
  3245.     win0_done = true;
  3246.   if (gpu->ctl.blk & 0x4000)
  3247.     win1_done = true;
  3248.   if (gpu->ctl.blk & 0x8000)
  3249.     winobj_done = true;
  3250.   if (winobj_done || win1_done ||  win0_done)
  3251.     win_done = true;
  3252.  
  3253.   /* TODO: More rigorous  testing, repeater code emmm... */
  3254.   switch (gpu_mode) {
  3255.   case 0:
  3256.     /* MODE-0 ALIVE BG : BG0 BG1 BG2 BG3 - 2D MAPPER BASE NES/CGB Tile extend (4Bit/8Bit Switchable) */
  3257.     break;
  3258.   case 1:
  3259.     /* MODE-1 ALIVE BG : BG0 BG1 BG2(2 is AffineBG) */
  3260.     bg_enable_mask &= ~(1 << 11);
  3261.     break;
  3262.   case 2:
  3263.     /* MODE-2 ALIVE BG : BG2 BG3(2, 3 is AffineBG) */
  3264.     bg_enable_mask &= ~(3 << 8);
  3265.     break;
  3266.   case 3:
  3267.     /* MODE-3 240x160 pixels, 32768 colors 75KB */
  3268.   case 4:
  3269.     /* MODE-4 240x160 pixels, 255 colors 2 frame */
  3270.   case 5:
  3271.     /* MODE-5 160*128 pixels, 32768 colors 2 frame */
  3272.     bg_enable_mask &= 0x400;
  3273.     break;
  3274.   case 6:
  3275.   case 7:
  3276.     DEBUG_BREAK ();
  3277.     break;
  3278.   }
  3279.  
  3280.   /* fill backdrop color
  3281.       see. GBA Programming Manual v1.1 :: 7 Color Palettes::3) Color 0 Transparency (page 73)
  3282.   */
  3283.   vec_memset16a512b (vbptr, * (uint16_t *)& gpu->palette16_b[0]);
  3284.   vec_memset16a512b (& gpu->vattr[8], 0);
  3285.  
  3286.   for (cnt = 0; cnt != sizeof (rtinfos)/ sizeof (rtinfos[0]); cnt++) {
  3287.  
  3288.     const uint8_t mosaic = gpu->mosaic.blk >> ((cnt == 4) ? 8 : 0) & 0xFF;
  3289.  
  3290.     rtinfos[cnt].chan = & gpu->chan[cnt];
  3291.     rtinfos[cnt].chanId = cnt;
  3292.     rtinfos[cnt].gpu = gpu;
  3293.     rtinfos[cnt].order = 0;
  3294.     rtinfos[cnt].sub_order = 0;
  3295.     rtinfos[cnt].chr_base = 0;
  3296.     rtinfos[cnt].opca = 0;
  3297.     rtinfos[cnt].winId = cnt & 1;
  3298.     rtinfos[cnt].solid = 0;
  3299.     rtinfos[cnt].interp_x = false;
  3300.     rtinfos[cnt].interp_y = false;
  3301.     rtinfos[cnt].interp_xvec = mosaic & 0x0F;
  3302.     rtinfos[cnt].interp_yvec = mosaic >> 4 & 0x0F;
  3303.  
  3304.     if ( (rtinfos[cnt].chan->ctl.blk & 64)) {
  3305.       if (rtinfos[cnt].interp_xvec != 0)
  3306.         rtinfos[cnt].interp_x = true;
  3307.       if (rtinfos[cnt].interp_yvec != 0)
  3308.         rtinfos[cnt].interp_y = true;
  3309.     }
  3310.     if (gpu->bld_ctl.blk & 1 << cnt)
  3311.       rtinfos[cnt].opca = (gpu->bld_ctl.blk >> 6) & 3;
  3312.     else ;
  3313.   }
  3314.  
  3315.   for (cnt = 0; cnt != 3; cnt++) {
  3316.     int c;
  3317.     for (c = cnt + 1; c != 4; c++)
  3318.       if (sort[cnt] < sort[c]) {
  3319.         uintptr_t temp = sort[c];
  3320.         sort[c] = sort[cnt];
  3321.         sort[cnt] = temp;
  3322.       }
  3323.   }
  3324. #define VISUAL_GBA_LOGIC_FORGOTTEN
  3325. #ifndef VISUAL_GBA_LOGIC_FORGOTTEN
  3326.   if (win_done != false) {
  3327.     if (winobj_done != false) {
  3328.       sp_render_shadow (& rtinfos[4]);
  3329.       win_render (gpu, & sort[0], & rtinfos[0], SOLID_MODE_OBJWIN_MASK, gpu->win_out.blk, coeff_b, coeff_a1, coeff_a2, gpu_mode,
  3330.         bg_enable_mask, solid_shadow, StdBG_Render_shadow, StdBG_Render_Rot_shadow);
  3331.     }
  3332.     if (win1_done != false) {
  3333.         uintptr_t left = gpu->clip_x[1].blk >> 8 & 255;
  3334.         uintptr_t right = (gpu->clip_x[1].blk & 255);
  3335.         uintptr_t top = gpu->clip_y[1].blk >> 8 & 255;
  3336.         uintptr_t bottom = (gpu->clip_y[1].blk & 255);
  3337.         uint16_t *vattr_b = & gpu->vattr[8];
  3338.  
  3339.         if (!(left | right))
  3340.           goto _out;
  3341.         if (!(top | bottom))
  3342.           goto _out;
  3343.         if (left == right)
  3344.           goto _out;
  3345.         if (bottom == top)
  3346.           goto _out;
  3347.  
  3348.         if (right >= 240 || left >= right)
  3349.           right = 240;
  3350.         if (bottom >= 160 || top >= bottom)
  3351.           bottom = 160;
  3352.         if (!(gpu->line.blk >top
  3353.           && gpu->line.blk < bottom))
  3354.           goto _out;
  3355.  
  3356.         for (n = left; n != right; n++) {
  3357.             vattr_b[n] = SOLID_MODE_WIN1_MASK;
  3358.         }
  3359.        win_render (gpu, & sort[0], & rtinfos[0], SOLID_MODE_WIN1_MASK, gpu->win_in.blk, coeff_b, coeff_a1, coeff_a2, gpu_mode,
  3360.         bg_enable_mask, solid_win1, StdBG_Render_win1, StdBG_Render_Rot_win1);
  3361.     }
  3362.     _out:
  3363.     if (win0_done != false) {
  3364.         uintptr_t left = gpu->clip_x[0].blk >> 8 & 255;
  3365.         uintptr_t right = (gpu->clip_x[0].blk & 255);
  3366.         uintptr_t top = gpu->clip_y[0].blk >> 8 & 255;
  3367.         uintptr_t bottom = (gpu->clip_y[0].blk & 255);
  3368.         uint16_t *vattr_b = & gpu->vattr[8];
  3369.  
  3370.         if (!(left | right))
  3371.           goto _out2;
  3372.         if (!(top | bottom))
  3373.           goto _out2;
  3374.         if (left == right)
  3375.           goto _out2;
  3376.         if (bottom == top)
  3377.           goto _out2;
  3378.         if (right >= 240 || left >= right)
  3379.           right = 240;
  3380.         if (bottom >= 160 || top >= bottom)
  3381.           bottom = 160;
  3382.         if (left >= right)
  3383.          goto _out2;
  3384.         if (top >= bottom)
  3385.          goto _out2;
  3386.         if (!(gpu->line.blk >top
  3387.           && gpu->line.blk < bottom))
  3388.           goto _out2;
  3389.  
  3390.         for (n = left; n != right; n++) {
  3391.             vattr_b[n] = SOLID_MODE_WIN0_MASK;
  3392.         }
  3393.        win_render (gpu, & sort[0], & rtinfos[0], SOLID_MODE_WIN0_MASK, gpu->win_in.blk << 8, coeff_b, coeff_a1, coeff_a2, gpu_mode,
  3394.         bg_enable_mask, solid_win0, StdBG_Render_win0, StdBG_Render_Rot_win0);
  3395.     }
  3396.     _out2:
  3397.        win_render (gpu, & sort[0], & rtinfos[0], 0, gpu->win_out.blk << 8, coeff_b, coeff_a1, coeff_a2, gpu_mode,
  3398.         bg_enable_mask, solid_out, StdBG_Render_outwin, StdBG_Render_Rot_outwin);
  3399.   } else  nowin_render (gpu, sort, rtinfos, coeff_b, coeff_a1, coeff_a2, gpu_mode, bg_enable_mask);
  3400. #else
  3401.   if (win_done != false) {
  3402.     if (winobj_done != false) {
  3403.       sp_render_shadow (& rtinfos[4]);
  3404.       win_render (gpu, & sort[0], & rtinfos[0], SOLID_MODE_OBJWIN_MASK, gpu->win_out.blk, coeff_b, coeff_a1, coeff_a2, gpu_mode,
  3405.         bg_enable_mask, solid_shadow, StdBG_Render_shadow, StdBG_Render_Rot_shadow, Mode3_Render_shadow, Mode4_Render_shadow, Mode5_Render_shadow);
  3406.     }
  3407.     if (win1_done != false) {
  3408.         uintptr_t left = gpu->clip_x[1].blk >> 8 & 255;
  3409.         uintptr_t right = (gpu->clip_x[1].blk & 255);
  3410.         uintptr_t top = gpu->clip_y[1].blk >> 8 & 255;
  3411.         uintptr_t bottom = (gpu->clip_y[1].blk & 255);
  3412.         uint16_t *vattr_b = & gpu->vattr[8];
  3413.  
  3414.         if (top > bottom)
  3415.           if (!(gpu->line.blk >= top || gpu->line.blk < bottom))
  3416.             goto _out;
  3417.           else {}
  3418.         else if (top < bottom)
  3419.           if (!(gpu->line.blk >= top && gpu->line.blk < bottom))
  3420.             goto _out;
  3421.           else {}
  3422.  
  3423.         if (left > right) {
  3424.           for (n = left; n < 256; n++) {
  3425.             vattr_b[n] = SOLID_MODE_WIN1_MASK;
  3426.             vbptr[n] = *(uint16_t *)& gpu->palette16_b[0];
  3427.           }
  3428.           for (n = right; n > 0; n--) {
  3429.             vattr_b[n] = SOLID_MODE_WIN1_MASK;
  3430.             vbptr[n] = *(uint16_t *)& gpu->palette16_b[0];
  3431.           }
  3432.         } else {
  3433.           for (n = left; n != right; n++) {
  3434.             vattr_b[n] = SOLID_MODE_WIN1_MASK;
  3435.             vbptr[n] = *(uint16_t *)& gpu->palette16_b[0];
  3436.           }
  3437.         }
  3438.        win_render (gpu, & sort[0], & rtinfos[0], SOLID_MODE_WIN1_MASK, gpu->win_in.blk, coeff_b, coeff_a1, coeff_a2, gpu_mode,
  3439.         bg_enable_mask, solid_win1, StdBG_Render_win1, StdBG_Render_Rot_win1, Mode3_Render_win1, Mode4_Render_win1, Mode5_Render_win1);
  3440.     }
  3441.     _out:
  3442.     if (win0_done != false) {
  3443.         uintptr_t left = gpu->clip_x[0].blk >> 8 & 255;
  3444.         uintptr_t right = (gpu->clip_x[0].blk & 255);
  3445.         uintptr_t top = gpu->clip_y[0].blk >> 8 & 255;
  3446.         uintptr_t bottom = (gpu->clip_y[0].blk & 255);
  3447.         uint16_t *vattr_b = & gpu->vattr[8];
  3448.  
  3449.         if (top > bottom)
  3450.           if (!(gpu->line.blk >= top || gpu->line.blk < bottom))
  3451.             goto _out2;
  3452.           else {}
  3453.         else if (top < bottom)
  3454.           if (!(gpu->line.blk >= top && gpu->line.blk < bottom))
  3455.             goto _out2;
  3456.           else {}
  3457.  
  3458.         if (left > right) {
  3459.           for (n = left; n < 256; n++) {
  3460.             vattr_b[n] = SOLID_MODE_WIN0_MASK;
  3461.             vbptr[n] = *(uint16_t *)& gpu->palette16_b[0];
  3462.           }
  3463.           for (n = right; n > 0; n--) {
  3464.             vattr_b[n] = SOLID_MODE_WIN0_MASK;
  3465.             vbptr[n] = *(uint16_t *)& gpu->palette16_b[0];
  3466.           }
  3467.         } else {
  3468.           for (n = left; n != right; n++) {
  3469.             vattr_b[n] = SOLID_MODE_WIN0_MASK;
  3470.             vbptr[n] = *(uint16_t *)& gpu->palette16_b[0];
  3471.           }
  3472.         }
  3473.        win_render (gpu, & sort[0], & rtinfos[0], SOLID_MODE_WIN0_MASK, gpu->win_in.blk << 8, coeff_b, coeff_a1, coeff_a2, gpu_mode,
  3474.         bg_enable_mask, solid_win0, StdBG_Render_win0, StdBG_Render_Rot_win0, Mode3_Render_win0, Mode4_Render_win0, Mode5_Render_win0);
  3475.     }
  3476.     _out2:
  3477.        win_render (gpu, & sort[0], & rtinfos[0], 0, gpu->win_out.blk << 8, coeff_b, coeff_a1, coeff_a2, gpu_mode,
  3478.         bg_enable_mask, solid_out, StdBG_Render_outwin, StdBG_Render_Rot_outwin, Mode3_Render_outwin, Mode4_Render_outwin, Mode5_Render_outwin);
  3479.   } else  nowin_render (gpu, sort, rtinfos, coeff_b, coeff_a1, coeff_a2, gpu_mode, bg_enable_mask);
  3480. #endif
  3481. }
  3482.  
  3483. finline
  3484. uint16_t gpu_read (struct gpu *gpu, uint32_t addr) {
  3485.   switch (addr & 0x3FF) {
  3486.   case 0x000: return gpu->ctl.blk;
  3487.   case 0x002: return gpu->col_swap.blk;
  3488.   case 0x004: return gpu->status.blk;
  3489.   case 0x006: return gpu->line.blk;
  3490.   case 0x008: return gpu->chan[0].ctl.blk;
  3491.   case 0x00A: return gpu->chan[1].ctl.blk;
  3492.   case 0x00C: return gpu->chan[2].ctl.blk;
  3493.   case 0x00E: return gpu->chan[3].ctl.blk;
  3494.   case 0x048: return gpu->win_in.blk & 0x3F3F;
  3495.   case 0x04A: return gpu->win_out.blk & 0x3F3F;
  3496.   case 0x050: return gpu->bld_ctl.blk & 0x3FFF;
  3497.   case 0x052: return gpu->bld_args.blk & 0x1F1F;
  3498.   case 0x04E: return gpu->ugpio_4E.blk;
  3499.   case 0x056: return gpu->ugpio_56.blk;
  3500.   case 0x010:
  3501.   case 0x012:
  3502.   case 0x014:
  3503.   case 0x016:
  3504.   case 0x018:
  3505.   case 0x01A:
  3506.   case 0x01C:
  3507.   case 0x01E:
  3508.   case 0x020:
  3509.   case 0x022:
  3510.   case 0x024:
  3511.   case 0x026:
  3512.   case 0x028:
  3513.   case 0x02A:
  3514.   case 0x02C: return gpu->agb->arm7.opcode[0];
  3515.   default:
  3516.     DEBUG_BREAK ();
  3517.     return 0;
  3518.   }
  3519. }
  3520.  
  3521. finline
  3522. void gpu_write (struct gpu *gpu, uint32_t addr, uint16_t value, const IO_WRITE_BLOCK block) {
  3523.  
  3524. #define blk_done16_8(io_blk) \
  3525.   (block == IO_WRITE_16) ? ((io_blk).blk = value) : ((io_blk).blk8[addr&1] = (uint8_t) (value))
  3526. #define blk_done16_8_ext(io_blk, value16, value_lo8, value_hi8) \
  3527.   (block == IO_WRITE_16) ? ((io_blk).blk = (value16)) \
  3528.                       : (   (addr&1)    ?((io_blk).blk8[addr&1] = (uint8_t) (value_hi8)) \
  3529.                                               : ((io_blk).blk8[addr&1] = (uint8_t) (value_lo8)))
  3530. #define blk_done16_8_x32(io_blk, blk16_pos) \
  3531.   (block == IO_WRITE_16) ? ((io_blk).blk16[blk16_pos] = value) : (  fto_uint8p ((io_blk).blk16[blk16_pos])[addr & 1] = (uint8_t) (value))
  3532.  
  3533.   switch (addr & 0x7FE) {
  3534.   case 0x000: blk_done16_8 (gpu->ctl); break;
  3535.   case 0x002: blk_done16_8 (gpu->col_swap); break;
  3536.   case 0x004: blk_done16_8_ext (gpu->status,gpu->status.blk & 0x47 | value & ~0x47, gpu->status.blk & 0x47 | value & ~0x47, value); break;
  3537.   case 0x008: blk_done16_8_ext (gpu->chan[0].ctl ,value & ~0x2000, value & ~0x2000, value); break;
  3538.   case 0x00A: blk_done16_8_ext (gpu->chan[1].ctl ,value & ~0x2000, value & ~0x2000, value); break;
  3539.   case 0x00C: blk_done16_8_ext (gpu->chan[2].ctl ,value, value, value); break;
  3540.   case 0x00E: blk_done16_8_ext (gpu->chan[3].ctl ,value, value, value); break;
  3541.   case 0x010: blk_done16_8 (gpu->chan[0].loopy_x); break;
  3542.   case 0x012: blk_done16_8 (gpu->chan[0].loopy_y); break;
  3543.   case 0x014: blk_done16_8 (gpu->chan[1].loopy_x); break;
  3544.   case 0x016: blk_done16_8 (gpu->chan[1].loopy_y); break;
  3545.   case 0x018: blk_done16_8 (gpu->chan[2].loopy_x); break;
  3546.   case 0x01A: blk_done16_8 (gpu->chan[2].loopy_y); break;
  3547.   case 0x01C: blk_done16_8 (gpu->chan[3].loopy_x); break;
  3548.   case 0x01E: blk_done16_8 (gpu->chan[3].loopy_y); break;
  3549.   case 0x020: blk_done16_8 (gpu->chan[2].dx); break;
  3550.   case 0x022: blk_done16_8 (gpu->chan[2].dmx);  break;
  3551.   case 0x024: blk_done16_8 (gpu->chan[2].dy); break;
  3552.   case 0x026: blk_done16_8 (gpu->chan[2].dmy); break;
  3553.   case 0x030: blk_done16_8 (gpu->chan[3].dx); break;
  3554.   case 0x032: blk_done16_8 (gpu->chan[3].dmx); break;
  3555.   case 0x034: blk_done16_8 (gpu->chan[3].dy); break;
  3556.   case 0x036: blk_done16_8 (gpu->chan[3].dmy); break;
  3557.   case 0x040: blk_done16_8 (gpu->clip_x[0]); break;
  3558.   case 0x042: blk_done16_8 (gpu->clip_x[1]); break;
  3559.   case 0x044: blk_done16_8 (gpu->clip_y[0]); break;
  3560.   case 0x046: blk_done16_8 (gpu->clip_y[1]); break;
  3561.   case 0x048: blk_done16_8 (gpu->win_in); break;
  3562.   case 0x04A: blk_done16_8 (gpu->win_out); break;
  3563.   case 0x04C: blk_done16_8 (gpu->mosaic); break;
  3564.   case 0x050: blk_done16_8 (gpu->bld_ctl); break;
  3565.   case 0x052: blk_done16_8 (gpu->bld_args); break;
  3566.   case 0x054: blk_done16_8 (gpu->bri_args); break;
  3567.   case 0x04E: blk_done16_8 (gpu->ugpio_4E); break;
  3568.   case 0x056: blk_done16_8 (gpu->ugpio_56); break;
  3569.   case 0x028:
  3570.     blk_done16_8_x32 (gpu->chan[2].ref_x, 0);
  3571.     gpu->chan[2].loopy_dmx.blk = gpu->chan[2].ref_x.blk;
  3572.     if (gpu->chan[2].loopy_dmx.blk & 0x8000000)
  3573.       gpu->chan[2].loopy_dmx.blk |= 0xF0000000;
  3574.     break;
  3575.   case 0x02A:
  3576.     blk_done16_8_x32 (gpu->chan[2].ref_x, 1);
  3577.     gpu->chan[2].loopy_dmx.blk = gpu->chan[2].ref_x.blk;
  3578.     if (gpu->chan[2].loopy_dmx.blk & 0x8000000)
  3579.       gpu->chan[2].loopy_dmx.blk |= 0xF0000000;
  3580.     break;
  3581.   case 0x02C:
  3582.     blk_done16_8_x32 (gpu->chan[2].ref_y, 0);
  3583.     gpu->chan[2].loopy_dmy.blk = gpu->chan[2].ref_y.blk;
  3584.     if (gpu->chan[2].loopy_dmy.blk & 0x8000000)
  3585.       gpu->chan[2].loopy_dmy.blk |= 0xF0000000;
  3586.     break;
  3587.   case 0x02E:
  3588.     blk_done16_8_x32 (gpu->chan[2].ref_y, 1);
  3589.     gpu->chan[2].loopy_dmy.blk = gpu->chan[2].ref_y.blk;
  3590.     if (gpu->chan[2].loopy_dmy.blk & 0x8000000)
  3591.       gpu->chan[2].loopy_dmy.blk |= 0xF0000000;
  3592.     break;
  3593.   case 0x038:
  3594.     blk_done16_8_x32 (gpu->chan[3].ref_x, 0);
  3595.     gpu->chan[3].loopy_dmx.blk = gpu->chan[3].ref_x.blk;
  3596.     if (gpu->chan[3].loopy_dmx.blk & 0x8000000)
  3597.       gpu->chan[3].loopy_dmx.blk |= 0xF0000000;
  3598.     break;
  3599.   case 0x03A:
  3600.     blk_done16_8_x32 (gpu->chan[3].ref_x, 1);
  3601.     gpu->chan[3].loopy_dmx.blk = gpu->chan[3].ref_x.blk;
  3602.     if (gpu->chan[3].loopy_dmx.blk & 0x8000000)
  3603.       gpu->chan[3].loopy_dmx.blk |= 0xF0000000;
  3604.     break;
  3605.   case 0x03C:
  3606.     blk_done16_8_x32 (gpu->chan[3].ref_y, 0);
  3607.     gpu->chan[3].loopy_dmy.blk = gpu->chan[3].ref_y.blk;
  3608.     if (gpu->chan[3].loopy_dmy.blk & 0x8000000)
  3609.       gpu->chan[3].loopy_dmy.blk |= 0xF0000000;
  3610.     break;
  3611.   case 0x03E:
  3612.     blk_done16_8_x32 (gpu->chan[3].ref_y, 1);
  3613.     gpu->chan[3].loopy_dmy.blk = gpu->chan[3].ref_y.blk;
  3614.     if (gpu->chan[3].loopy_dmy.blk & 0x8000000)
  3615.       gpu->chan[3].loopy_dmy.blk |= 0xF0000000;
  3616.     break;
  3617.   default:
  3618.     DEBUG_BREAK ();
  3619.     break;
  3620.   }
  3621. }
  3622.  
  3623. finline
  3624. void gpu_dispose_invblank (struct gpu *gpu) {
  3625.   /* deal srcoll, affine function */
  3626.   int id;
  3627.   for (id = 0; id != 4; id++) {
  3628.     struct gpu_channel *t = & gpu->chan[id];
  3629.  
  3630.     t->loopy_y_shadow.blk = t->loopy_y.blk;
  3631.     t->loopy_x_shadow.blk = t->loopy_x.blk;
  3632.     t->dx_shadow.sblk = t->dx.sblk;
  3633.     t->dy_shadow.sblk = t->dy.sblk;
  3634.     t->dmx_shadow.sblk = t->dmx.sblk;
  3635.     t->dmy_shadow.sblk = t->dmy.sblk;
  3636.     t->loopy_dmx.sblk = t->ref_x.sblk;
  3637.     t->loopy_dmy.sblk = t->ref_y.sblk;
  3638.   }
  3639. }
  3640.  
  3641. finline
  3642. uint16_t swap_rb (uint16_t value) {
  3643.   return  value >> 10 & 31
  3644.         |          value & 31 << 5
  3645.         |          (value & 31) << 10;
  3646. }
  3647.  
  3648. finline
  3649. uint32_t swap_rw (uint32_t value) {
  3650.   return  value >> 16 & 0xFF
  3651.         |          value & 0xFF << 8
  3652.         |          (value & 0xFF) << 16;
  3653. }
  3654.  
  3655. finline
  3656. uint32_t ext_rb (uint16_t value) {
  3657.   uint32_t a = value >> 0 & 0x1F;
  3658.   uint32_t b = value >> 5 & 0x1F;
  3659.   uint32_t c = value >>10 & 0x1F;
  3660.   a <<= 3;
  3661.   b <<= 3;
  3662.   c <<= 3;
  3663.   return a | b << 8 | c << 16;
  3664. }
  3665.  
  3666. finline
  3667. void gpu_dispose_inhblank (struct gpu *gpu, kable update_affine) {
  3668.   /* -------------------------------------------------- */
  3669.   int id;
  3670.   for (id = 0; id != 4; id++) {
  3671.     struct gpu_channel *t = & gpu->chan[id];
  3672.     t->loopy_y_shadow.blk = t->loopy_y.blk;
  3673.     t->loopy_x_shadow.blk = t->loopy_x.blk;
  3674.     t->dx_shadow.sblk = t->dx.sblk;
  3675.     t->dy_shadow.sblk = t->dy.sblk;
  3676.     t->dmx_shadow.sblk = t->dmx.sblk;
  3677.     t->dmy_shadow.sblk = t->dmy.sblk;
  3678.  
  3679.     if (update_affine) {
  3680.       t->loopy_dmx.blk += t->dmx_shadow.blk;
  3681.       t->loopy_dmy.blk += t->dmy_shadow.blk;
  3682.     }
  3683.   }
  3684. }
  3685.  
  3686. finline
  3687. void gpu_adjust_palette (struct gpu *gpu, uint16_t io_address) {
  3688.   int32_t id = io_address & 0x3FF;
  3689.   uint16_t *ptr16 = (uint16_t *) & gpu->palette[id & -2];
  3690.   * (uint16_t *)& gpu->palette2[id & -2] = swap_rb (* (uint16_t *)& gpu->palette[id & -2]);
  3691.   * (uint32_t *)& gpu->palette3[id & -2] = ext_rb (* (uint16_t *)& gpu->palette[id & -2]);
  3692.   * (uint32_t *)& gpu->palette4[id & -2] = swap_rw (* (uint32_t *)& gpu->palette3[id & -2]);
  3693. }
  3694. finline
  3695. void gpu_adjust_palette16 (struct gpu *gpu, uint16_t io_address) {
  3696.   const uint32_t ioaddr = io_address & -2;
  3697.   gpu_adjust_palette (gpu, ioaddr);
  3698.   gpu_adjust_palette (gpu, ioaddr + 1);
  3699. }
  3700. finline
  3701. void gpu_adjust_palette32 (struct gpu *gpu, uint16_t io_address) {
  3702.   const uint32_t ioaddr = io_address & -4;
  3703.   gpu_adjust_palette16 (gpu, ioaddr);
  3704.   gpu_adjust_palette16 (gpu, ioaddr + 2);
  3705. }
  3706. finline
  3707. void gpu_adjust_palette64 (struct gpu *gpu, uint16_t io_address) {
  3708.   const uint32_t ioaddr = io_address & -8;
  3709.   gpu_adjust_palette32 (gpu, ioaddr);
  3710.   gpu_adjust_palette32 (gpu, ioaddr + 4);
  3711. }
  3712.  
  3713. finline
  3714. void gpu_init (struct gpu *gpu) {
  3715.   /* -------------------------------------------------- */
  3716.   memset (gpu, 0, sizeof (struct gpu));
  3717.  
  3718.   gpu->vattr = (uint16_t *) (((uintptr_t)& gpu->vattrb[512]) & (uintptr_t) -128);
  3719.   gpu->vbuf = (uint16_t *) (((uintptr_t)& gpu->vbufb[512+1024*180]) & (uintptr_t) -128);
  3720.   gpu->vattr -= 8;
  3721.   gpu->vbuf -= 8;
  3722.   gpu->vptr_pitch = 2048;
  3723.   gpu->vptrcc = (uint16_t *) (((uintptr_t)& gpu->vptr_cahce[512]) & (uintptr_t) -128);
  3724.   gpu->vid_buf.pixel = (int16_t *) & gpu->vbuf[8];
  3725.   gpu->vid_buf.h = 160;
  3726.   gpu->vid_buf.w = 240;
  3727.   gpu->vid_buf.pitch = 2048;
  3728.   gpu->palette16_b = gpu->palette2;
  3729. }
  3730.  
  3731. finline
  3732. void gpu_reset (struct gpu *gpu) {
  3733.   /* -------------------------------------------------- */
  3734.   gpu->ctl.blk = 0;
  3735.   gpu->col_swap.blk = 0;
  3736.   gpu->status.blk = 0;
  3737.   gpu->line.blk = 0;
  3738.   gpu->clip_x[0].blk = 0;
  3739.   gpu->clip_y[0].blk = 0;
  3740.   gpu->clip_x[1].blk = 0;
  3741.   gpu->clip_y[1].blk = 0;
  3742.   gpu->win_in.blk = 0;
  3743.   gpu->win_out.blk = 0;
  3744.   gpu->mosaic.blk = 0;
  3745.   gpu->bld_ctl.blk = 0;
  3746.   gpu->bld_args.blk = 0;
  3747.   gpu->bri_args.blk = 0;
  3748.   gpu->ugpio_4E.blk = 0;
  3749.   gpu->ugpio_56.blk = 0;
  3750.  
  3751.   memset (gpu->oam, 0, sizeof (gpu->oam));
  3752.   memset (& gpu->chan[0], 0, sizeof (struct gpu_channel));
  3753.   memset (& gpu->chan[1], 0, sizeof (struct gpu_channel));
  3754.   memset (& gpu->chan[2], 0, sizeof (struct gpu_channel));
  3755.   memset (& gpu->chan[3], 0, sizeof (struct gpu_channel));
  3756.   memset (& gpu->chan[4], 0, sizeof (struct gpu_channel));
  3757.  
  3758.   gpu->vattr = (uint16_t *) (((uintptr_t)& gpu->vattrb[512]) & (uintptr_t) -128);
  3759.   gpu->vbuf = (uint16_t *) (((uintptr_t)& gpu->vbufb[512+1024*180]) & (uintptr_t) -128);
  3760.   gpu->vattr -= 8;
  3761.   gpu->vbuf -= 8;
  3762.   gpu->vptr_pitch = 2048;
  3763.   gpu->vptrcc = (uint16_t *) (((uintptr_t)& gpu->vptr_cahce[512]) & (uintptr_t) -128);
  3764.   gpu->vid_buf.pixel = (int16_t *) & gpu->vbuf[8];
  3765.   gpu->vid_buf.h = 160;
  3766.   gpu->vid_buf.w = 240;
  3767.   gpu->vid_buf.pitch = 2048;
  3768. }
  3769.  
  3770. #ifdef __cplusplus
  3771. }      
  3772. #endif
  3773.  
  3774. #endif
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement