SHARE
TWEET

Wii U swizzling code

AboodXD May 19th, 2017 (edited) 384 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. BCn_formats = [0x31, 0x431, 0x32, 0x432, 0x33, 0x433, 0x34, 0x234, 0x35, 0x235]
  2.  
  3. # If swizzling, set to True.
  4. # If deswizzling, set to False.
  5. do_swizzle = False
  6.  
  7. # result is the data after swizzling
  8. result = swizzle(width, height, format_, tileMode, swizzle, pitch, data, do_swizzle)
  9.  
  10.  
  11. def swizzle(width, height, format_, tileMode, swizzle_, pitch, data, toGFD=False):
  12.     result_ = bytearray(data)
  13.  
  14.     if format_ in BCn_formats:
  15.         width = (width + 3) // 4
  16.         height = (height + 3)  // 4
  17.  
  18.     for y in range(height):
  19.         for x in range(width):
  20.             bpp = surfaceGetBitsPerPixel(format_)
  21.             pipeSwizzle = (swizzle_ >> 8) & 1
  22.             bankSwizzle = (swizzle_ >> 9) & 3
  23.  
  24.             if tileMode == 0 or tileMode == 1:
  25.                 pos = AddrLib_computeSurfaceAddrFromCoordLinear(x, y, bpp, pitch)
  26.             elif tileMode == 2 or tileMode == 3:
  27.                 pos = AddrLib_computeSurfaceAddrFromCoordMicroTiled(x, y, bpp, pitch, tileMode)
  28.             else:
  29.                 pos = AddrLib_computeSurfaceAddrFromCoordMacroTiled(x, y, bpp, pitch, height, tileMode, pipeSwizzle,
  30.                                                                     bankSwizzle)
  31.  
  32.             bpp //= 8
  33.  
  34.             pos_ = (y * width + x) * bpp
  35.  
  36.             if toGFD:
  37.                 if (pos < len(data)) and (pos_ < len(data)):
  38.                     result[pos:pos + bpp] = data[pos_:pos_ + bpp]
  39.             else:
  40.                 if (pos_ < len(data)) and (pos < len(data)):
  41.                     result[pos_:pos_ + bpp] = data[pos:pos + bpp]
  42.  
  43.     return result_
  44.  
  45.  
  46. # Credits:
  47. #  -AddrLib: actual code
  48. #  -Exzap: modifying code to apply to Wii U textures
  49. #  -AboodXD: porting, code improvements and cleaning up
  50.  
  51. m_banks = 4
  52. m_banksBitcount = 2
  53. m_pipes = 2
  54. m_pipesBitcount = 1
  55. m_pipeInterleaveBytes = 256
  56. m_pipeInterleaveBytesBitcount = 8
  57. m_rowSize = 2048
  58. m_swapSize = 256
  59. m_splitSize = 2048
  60.  
  61. m_chipFamily = 2
  62.  
  63. MicroTilePixels = 8 * 8
  64.  
  65. formatHwInfo = b"\x00\x00\x00\x01\x08\x03\x00\x01\x08\x01\x00\x01\x00\x00\x00\x01" \
  66.                b"\x00\x00\x00\x01\x10\x07\x00\x00\x10\x03\x00\x01\x10\x03\x00\x01" \
  67.                b"\x10\x0B\x00\x01\x10\x01\x00\x01\x10\x03\x00\x01\x10\x03\x00\x01" \
  68.                b"\x10\x03\x00\x01\x20\x03\x00\x00\x20\x07\x00\x00\x20\x03\x00\x00" \
  69.                b"\x20\x03\x00\x01\x20\x05\x00\x00\x00\x00\x00\x00\x20\x03\x00\x00" \
  70.                b"\x00\x00\x00\x00\x00\x00\x00\x01\x20\x03\x00\x01\x00\x00\x00\x01" \
  71.                b"\x00\x00\x00\x01\x20\x0B\x00\x01\x20\x0B\x00\x01\x20\x0B\x00\x01" \
  72.                b"\x40\x05\x00\x00\x40\x03\x00\x00\x40\x03\x00\x00\x40\x03\x00\x00" \
  73.                b"\x40\x03\x00\x01\x00\x00\x00\x00\x80\x03\x00\x00\x80\x03\x00\x00" \
  74.                b"\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x10\x01\x00\x00" \
  75.                b"\x10\x01\x00\x00\x20\x01\x00\x00\x20\x01\x00\x00\x20\x01\x00\x00" \
  76.                b"\x00\x01\x00\x01\x00\x01\x00\x00\x00\x01\x00\x00\x60\x01\x00\x00" \
  77.                b"\x60\x01\x00\x00\x40\x01\x00\x01\x80\x01\x00\x01\x80\x01\x00\x01" \
  78.                b"\x40\x01\x00\x01\x80\x01\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00" \
  79.                b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" \
  80.                b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  81.  
  82.  
  83. def surfaceGetBitsPerPixel(surfaceFormat):
  84.     hwFormat = surfaceFormat & 0x3F
  85.     bpp = formatHwInfo[hwFormat * 4 + 0]
  86.  
  87.     return bpp
  88.  
  89.  
  90. def computeSurfaceThickness(tileMode):
  91.     thickness = 1
  92.  
  93.     if tileMode == 3 or tileMode == 7 or tileMode == 11 or tileMode == 13 or tileMode == 15:
  94.         thickness = 4
  95.  
  96.     elif tileMode == 16 or tileMode == 17:
  97.         thickness = 8
  98.  
  99.     return thickness
  100.  
  101.  
  102. def computePixelIndexWithinMicroTile(x, y, bpp, tileMode, z=0):
  103.     pixelBit6 = 0
  104.     pixelBit7 = 0
  105.     pixelBit8 = 0
  106.     thickness = computeSurfaceThickness(tileMode)
  107.  
  108.     if bpp == 0x08:
  109.         pixelBit0 = x & 1
  110.         pixelBit1 = (x & 2) >> 1
  111.         pixelBit2 = (x & 4) >> 2
  112.         pixelBit3 = (y & 2) >> 1
  113.         pixelBit4 = y & 1
  114.         pixelBit5 = (y & 4) >> 2
  115.  
  116.     elif bpp == 0x10:
  117.         pixelBit0 = x & 1
  118.         pixelBit1 = (x & 2) >> 1
  119.         pixelBit2 = (x & 4) >> 2
  120.         pixelBit3 = y & 1
  121.         pixelBit4 = (y & 2) >> 1
  122.         pixelBit5 = (y & 4) >> 2
  123.  
  124.     elif bpp == 0x20 or bpp == 0x60:
  125.         pixelBit0 = x & 1
  126.         pixelBit1 = (x & 2) >> 1
  127.         pixelBit2 = y & 1
  128.         pixelBit3 = (x & 4) >> 2
  129.         pixelBit4 = (y & 2) >> 1
  130.         pixelBit5 = (y & 4) >> 2
  131.  
  132.     elif bpp == 0x40:
  133.         pixelBit0 = x & 1
  134.         pixelBit1 = y & 1
  135.         pixelBit2 = (x & 2) >> 1
  136.         pixelBit3 = (x & 4) >> 2
  137.         pixelBit4 = (y & 2) >> 1
  138.         pixelBit5 = (y & 4) >> 2
  139.  
  140.     elif bpp == 0x80:
  141.         pixelBit0 = y & 1
  142.         pixelBit1 = x & 1
  143.         pixelBit2 = (x & 2) >> 1
  144.         pixelBit3 = (x & 4) >> 2
  145.         pixelBit4 = (y & 2) >> 1
  146.         pixelBit5 = (y & 4) >> 2
  147.  
  148.     else:
  149.         pixelBit0 = x & 1
  150.         pixelBit1 = (x & 2) >> 1
  151.         pixelBit2 = y & 1
  152.         pixelBit3 = (x & 4) >> 2
  153.         pixelBit4 = (y & 2) >> 1
  154.         pixelBit5 = (y & 4) >> 2
  155.  
  156.     if thickness > 1:
  157.         pixelBit6 = z & 1
  158.         pixelBit7 = (z & 2) >> 1
  159.  
  160.     if thickness == 8:
  161.         pixelBit8 = (z & 4) >> 2
  162.  
  163.     return ((pixelBit8 << 8) | (pixelBit7 << 7) | (pixelBit6 << 6) |
  164.             32 * pixelBit5 | 16 * pixelBit4 | 8 * pixelBit3 |
  165.             4 * pixelBit2 | pixelBit0 | 2 * pixelBit1)
  166.  
  167.  
  168. def computePipeFromCoordWoRotation(x, y):
  169.     # hardcoded to assume 2 pipes
  170.     return ((y >> 3) ^ (x >> 3)) & 1
  171.  
  172.  
  173. def computeBankFromCoordWoRotation(x, y):
  174.     numPipes = m_pipes
  175.     numBanks = m_banks
  176.     bank = 0
  177.  
  178.     if numBanks == 4:
  179.         bankBit0 = ((y // (16 * numPipes)) ^ (x >> 3)) & 1
  180.         bank = bankBit0 | 2 * (((y // (8 * numPipes)) ^ (x >> 4)) & 1)
  181.  
  182.     elif numBanks == 8:
  183.         bankBit0a = ((y // (32 * numPipes)) ^ (x >> 3)) & 1
  184.         bank = (bankBit0a | 2 * (((y // (32 * numPipes)) ^ (y // (16 * numPipes) ^ (x >> 4))) & 1) |
  185.             4 * (((y // (8 * numPipes)) ^ (x >> 5)) & 1))
  186.  
  187.     return bank
  188.  
  189.  
  190. def isThickMacroTiled(tileMode):
  191.     thickMacroTiled = 0
  192.  
  193.     if tileMode == 7 or tileMode == 11 or tileMode == 13 or tileMode == 15:
  194.         thickMacroTiled = 1
  195.  
  196.     return thickMacroTiled
  197.  
  198.  
  199. def isBankSwappedTileMode(tileMode):
  200.     bankSwapped = 0
  201.  
  202.     if tileMode == 8 or tileMode == 9 or tileMode == 10 or tileMode == 11 or tileMode == 14 or tileMode == 15:
  203.         bankSwapped = 1
  204.  
  205.     return bankSwapped
  206.  
  207.  
  208. def computeMacroTileAspectRatio(tileMode):
  209.     ratio = 1
  210.  
  211.     if tileMode == 8 or tileMode == 12 or tileMode == 14:
  212.         ratio = 1
  213.  
  214.     elif tileMode == 5 or tileMode == 9:
  215.         ratio = 2
  216.  
  217.     elif tileMode == 6 or tileMode == 10:
  218.         ratio = 4
  219.  
  220.     return ratio
  221.  
  222.  
  223. def computeSurfaceBankSwappedWidth(tileMode, bpp, pitch, numSamples=1):
  224.     if isBankSwappedTileMode(tileMode) == 0: return 0
  225.  
  226.     numBanks = m_banks
  227.     numPipes = m_pipes
  228.     swapSize = m_swapSize
  229.     rowSize = m_rowSize
  230.     splitSize = m_splitSize
  231.     groupSize = m_pipeInterleaveBytes
  232.     bytesPerSample = 8 * bpp
  233.  
  234.     try:
  235.         samplesPerTile = splitSize // bytesPerSample
  236.         slicesPerTile = max(1, numSamples // samplesPerTile)
  237.     except ZeroDivisionError:
  238.         slicesPerTile = 1
  239.  
  240.     if isThickMacroTiled(tileMode) != 0:
  241.         numSamples = 4
  242.  
  243.     bytesPerTileSlice = numSamples * bytesPerSample // slicesPerTile
  244.  
  245.     factor = computeMacroTileAspectRatio(tileMode)
  246.     swapTiles = max(1, (swapSize >> 1) // bpp)
  247.  
  248.     swapWidth = swapTiles * 8 * numBanks
  249.     heightBytes = numSamples * factor * numPipes * bpp // slicesPerTile
  250.     swapMax = numPipes * numBanks * rowSize // heightBytes
  251.     swapMin = groupSize * 8 * numBanks // bytesPerTileSlice
  252.  
  253.     bankSwapWidth = min(swapMax, max(swapMin, swapWidth))
  254.  
  255.     while not bankSwapWidth < (2 * pitch):
  256.         bankSwapWidth >>= 1
  257.  
  258.     return bankSwapWidth
  259.  
  260.  
  261. def AddrLib_computeSurfaceAddrFromCoordLinear(x, y, bpp, pitch):
  262.     rowOffset = y * pitch
  263.     pixOffset = x
  264.  
  265.     addr = (rowOffset + pixOffset) * bpp
  266.     addr //= 8
  267.  
  268.     return addr
  269.  
  270.  
  271. def AddrLib_computeSurfaceAddrFromCoordMicroTiled(x, y, bpp, pitch, tileMode):
  272.     microTileThickness = 1
  273.  
  274.     if tileMode == 3:
  275.         microTileThickness = 4
  276.  
  277.     microTileBytes = (MicroTilePixels * microTileThickness * bpp + 7) // 8
  278.     microTilesPerRow = pitch >> 3
  279.     microTileIndexX = x >> 3
  280.     microTileIndexY = y >> 3
  281.  
  282.     microTileOffset = microTileBytes * (microTileIndexX + microTileIndexY * microTilesPerRow)
  283.  
  284.     pixelIndex = computePixelIndexWithinMicroTile(x, y, bpp, tileMode)
  285.  
  286.     pixelOffset = bpp * pixelIndex
  287.  
  288.     pixelOffset >>= 3
  289.  
  290.     return pixelOffset + microTileOffset
  291.  
  292.  
  293. def AddrLib_computeSurfaceAddrFromCoordMacroTiled(x, y, bpp, pitch, height, tileMode, pipeSwizzle, bankSwizzle):
  294.     numPipes = m_pipes
  295.     numBanks = m_banks
  296.     numGroupBits = m_pipeInterleaveBytesBitcount
  297.     numPipeBits = m_pipesBitcount
  298.     numBankBits = m_banksBitcount
  299.  
  300.     microTileThickness = computeSurfaceThickness(tileMode)
  301.  
  302.     microTileBits = bpp * (microTileThickness * MicroTilePixels)
  303.     microTileBytes = (microTileBits + 7) // 8
  304.  
  305.     pixelIndex = computePixelIndexWithinMicroTile(x, y, bpp, tileMode)
  306.  
  307.     pixelOffset = bpp * pixelIndex
  308.  
  309.     elemOffset = pixelOffset
  310.  
  311.     bytesPerSample = microTileBytes
  312.     if microTileBytes <= m_splitSize:
  313.         numSamples = 1
  314.         sampleSlice = 0
  315.     else:
  316.         samplesPerSlice = m_splitSize // bytesPerSample
  317.         numSampleSplits = max(1, 1 // samplesPerSlice)
  318.         numSamples = samplesPerSlice
  319.         sampleSlice = elemOffset // (microTileBits // numSampleSplits)
  320.         elemOffset %= microTileBits // numSampleSplits
  321.     elemOffset += 7
  322.     elemOffset //= 8
  323.  
  324.     pipe = computePipeFromCoordWoRotation(x, y)
  325.     bank = computeBankFromCoordWoRotation(x, y)
  326.  
  327.     bankPipe = pipe + numPipes * bank
  328.  
  329.     swizzle_ = pipeSwizzle + numPipes * bankSwizzle
  330.  
  331.     bankPipe ^= numPipes * sampleSlice * ((numBanks >> 1) + 1) ^ swizzle_
  332.     bankPipe %= numPipes * numBanks
  333.     pipe = bankPipe % numPipes
  334.     bank = bankPipe // numPipes
  335.  
  336.     sliceBytes = (height * pitch * microTileThickness * bpp * numSamples + 7) // 8
  337.     sliceOffset = sliceBytes * (sampleSlice // microTileThickness)
  338.  
  339.     macroTilePitch = 8 * m_banks
  340.     macroTileHeight = 8 * m_pipes
  341.  
  342.     if tileMode == 5 or tileMode == 9:  # GX2_TILE_MODE_2D_TILED_THIN4 and GX2_TILE_MODE_2B_TILED_THIN2
  343.         macroTilePitch >>= 1
  344.         macroTileHeight *= 2
  345.  
  346.     elif tileMode == 6 or tileMode == 10:  # GX2_TILE_MODE_2D_TILED_THIN4 and GX2_TILE_MODE_2B_TILED_THIN4
  347.         macroTilePitch >>= 2
  348.         macroTileHeight *= 4
  349.  
  350.     macroTilesPerRow = pitch // macroTilePitch
  351.     macroTileBytes = (numSamples * microTileThickness * bpp * macroTileHeight * macroTilePitch + 7) // 8
  352.     macroTileIndexX = x // macroTilePitch
  353.     macroTileIndexY = y // macroTileHeight
  354.     macroTileOffset = (macroTileIndexX + macroTilesPerRow * macroTileIndexY) * macroTileBytes
  355.  
  356.     if tileMode == 8 or tileMode == 9 or tileMode == 10 or tileMode == 11 or tileMode == 14 or tileMode == 15:
  357.         bankSwapOrder = [0, 1, 3, 2, 6, 7, 5, 4, 0, 0]
  358.         bankSwapWidth = computeSurfaceBankSwappedWidth(tileMode, bpp, pitch)
  359.         swapIndex = macroTilePitch * macroTileIndexX // bankSwapWidth
  360.         bank ^= bankSwapOrder[swapIndex & (m_banks - 1)]
  361.  
  362.     groupMask = ((1 << numGroupBits) - 1)
  363.  
  364.     numSwizzleBits = (numBankBits + numPipeBits)
  365.  
  366.     totalOffset = (elemOffset + ((macroTileOffset + sliceOffset) >> numSwizzleBits))
  367.  
  368.     offsetHigh = (totalOffset & ~groupMask) << numSwizzleBits
  369.     offsetLow = groupMask & totalOffset
  370.  
  371.     pipeBits = pipe << numGroupBits
  372.     bankBits = bank << (numPipeBits + numGroupBits)
  373.  
  374.     return bankBits | pipeBits | offsetLow | offsetHigh
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
Not a member of Pastebin yet?
Sign Up, it unlocks many cool features!
 
Top