AboodXD

Wii U swizzling code

May 19th, 2017
473
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. BCn_formats = [0x31, 0x431, 0x32, 0x432, 0x33, 0x433, 0x34, 0x234, 0x35, 0x235]
  2.  
  3. # If swizzling, set to True.
  4. # If deswizzling, set to False.
  5. do_swizzle = False
  6.  
  7. # result is the data after swizzling
  8. result = swizzle(width, height, format_, tileMode, swizzle, pitch, data, do_swizzle)
  9.  
  10.  
  11. def swizzle(width, height, format_, tileMode, swizzle_, pitch, data, toGFD=False):
  12.     result_ = bytearray(data)
  13.  
  14.     if format_ in BCn_formats:
  15.         width = (width + 3) // 4
  16.         height = (height + 3)  // 4
  17.  
  18.     for y in range(height):
  19.         for x in range(width):
  20.             bpp = surfaceGetBitsPerPixel(format_)
  21.             pipeSwizzle = (swizzle_ >> 8) & 1
  22.             bankSwizzle = (swizzle_ >> 9) & 3
  23.  
  24.             if tileMode == 0 or tileMode == 1:
  25.                 pos = AddrLib_computeSurfaceAddrFromCoordLinear(x, y, bpp, pitch)
  26.             elif tileMode == 2 or tileMode == 3:
  27.                 pos = AddrLib_computeSurfaceAddrFromCoordMicroTiled(x, y, bpp, pitch, tileMode)
  28.             else:
  29.                 pos = AddrLib_computeSurfaceAddrFromCoordMacroTiled(x, y, bpp, pitch, height, tileMode, pipeSwizzle,
  30.                                                                     bankSwizzle)
  31.  
  32.             bpp //= 8
  33.  
  34.             pos_ = (y * width + x) * bpp
  35.  
  36.             if toGFD:
  37.                 if (pos < len(data)) and (pos_ < len(data)):
  38.                     result[pos:pos + bpp] = data[pos_:pos_ + bpp]
  39.             else:
  40.                 if (pos_ < len(data)) and (pos < len(data)):
  41.                     result[pos_:pos_ + bpp] = data[pos:pos + bpp]
  42.  
  43.     return result_
  44.  
  45.  
  46. # Credits:
  47. #  -AddrLib: actual code
  48. #  -Exzap: modifying code to apply to Wii U textures
  49. #  -AboodXD: porting, code improvements and cleaning up
  50.  
  51. m_banks = 4
  52. m_banksBitcount = 2
  53. m_pipes = 2
  54. m_pipesBitcount = 1
  55. m_pipeInterleaveBytes = 256
  56. m_pipeInterleaveBytesBitcount = 8
  57. m_rowSize = 2048
  58. m_swapSize = 256
  59. m_splitSize = 2048
  60.  
  61. m_chipFamily = 2
  62.  
  63. MicroTilePixels = 8 * 8
  64.  
  65. formatHwInfo = b"\x00\x00\x00\x01\x08\x03\x00\x01\x08\x01\x00\x01\x00\x00\x00\x01" \
  66.                b"\x00\x00\x00\x01\x10\x07\x00\x00\x10\x03\x00\x01\x10\x03\x00\x01" \
  67.                b"\x10\x0B\x00\x01\x10\x01\x00\x01\x10\x03\x00\x01\x10\x03\x00\x01" \
  68.                b"\x10\x03\x00\x01\x20\x03\x00\x00\x20\x07\x00\x00\x20\x03\x00\x00" \
  69.                b"\x20\x03\x00\x01\x20\x05\x00\x00\x00\x00\x00\x00\x20\x03\x00\x00" \
  70.                b"\x00\x00\x00\x00\x00\x00\x00\x01\x20\x03\x00\x01\x00\x00\x00\x01" \
  71.                b"\x00\x00\x00\x01\x20\x0B\x00\x01\x20\x0B\x00\x01\x20\x0B\x00\x01" \
  72.                b"\x40\x05\x00\x00\x40\x03\x00\x00\x40\x03\x00\x00\x40\x03\x00\x00" \
  73.                b"\x40\x03\x00\x01\x00\x00\x00\x00\x80\x03\x00\x00\x80\x03\x00\x00" \
  74.                b"\x00\x00\x00\x01\x00\x00\x00\x01\x00\x00\x00\x01\x10\x01\x00\x00" \
  75.                b"\x10\x01\x00\x00\x20\x01\x00\x00\x20\x01\x00\x00\x20\x01\x00\x00" \
  76.                b"\x00\x01\x00\x01\x00\x01\x00\x00\x00\x01\x00\x00\x60\x01\x00\x00" \
  77.                b"\x60\x01\x00\x00\x40\x01\x00\x01\x80\x01\x00\x01\x80\x01\x00\x01" \
  78.                b"\x40\x01\x00\x01\x80\x01\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00" \
  79.                b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00" \
  80.                b"\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00"
  81.  
  82.  
  83. def surfaceGetBitsPerPixel(surfaceFormat):
  84.     hwFormat = surfaceFormat & 0x3F
  85.     bpp = formatHwInfo[hwFormat * 4 + 0]
  86.  
  87.     return bpp
  88.  
  89.  
  90. def computeSurfaceThickness(tileMode):
  91.     thickness = 1
  92.  
  93.     if tileMode == 3 or tileMode == 7 or tileMode == 11 or tileMode == 13 or tileMode == 15:
  94.         thickness = 4
  95.  
  96.     elif tileMode == 16 or tileMode == 17:
  97.         thickness = 8
  98.  
  99.     return thickness
  100.  
  101.  
  102. def computePixelIndexWithinMicroTile(x, y, bpp, tileMode, z=0):
  103.     pixelBit6 = 0
  104.     pixelBit7 = 0
  105.     pixelBit8 = 0
  106.     thickness = computeSurfaceThickness(tileMode)
  107.  
  108.     if bpp == 0x08:
  109.         pixelBit0 = x & 1
  110.         pixelBit1 = (x & 2) >> 1
  111.         pixelBit2 = (x & 4) >> 2
  112.         pixelBit3 = (y & 2) >> 1
  113.         pixelBit4 = y & 1
  114.         pixelBit5 = (y & 4) >> 2
  115.  
  116.     elif bpp == 0x10:
  117.         pixelBit0 = x & 1
  118.         pixelBit1 = (x & 2) >> 1
  119.         pixelBit2 = (x & 4) >> 2
  120.         pixelBit3 = y & 1
  121.         pixelBit4 = (y & 2) >> 1
  122.         pixelBit5 = (y & 4) >> 2
  123.  
  124.     elif bpp == 0x20 or bpp == 0x60:
  125.         pixelBit0 = x & 1
  126.         pixelBit1 = (x & 2) >> 1
  127.         pixelBit2 = y & 1
  128.         pixelBit3 = (x & 4) >> 2
  129.         pixelBit4 = (y & 2) >> 1
  130.         pixelBit5 = (y & 4) >> 2
  131.  
  132.     elif bpp == 0x40:
  133.         pixelBit0 = x & 1
  134.         pixelBit1 = y & 1
  135.         pixelBit2 = (x & 2) >> 1
  136.         pixelBit3 = (x & 4) >> 2
  137.         pixelBit4 = (y & 2) >> 1
  138.         pixelBit5 = (y & 4) >> 2
  139.  
  140.     elif bpp == 0x80:
  141.         pixelBit0 = y & 1
  142.         pixelBit1 = x & 1
  143.         pixelBit2 = (x & 2) >> 1
  144.         pixelBit3 = (x & 4) >> 2
  145.         pixelBit4 = (y & 2) >> 1
  146.         pixelBit5 = (y & 4) >> 2
  147.  
  148.     else:
  149.         pixelBit0 = x & 1
  150.         pixelBit1 = (x & 2) >> 1
  151.         pixelBit2 = y & 1
  152.         pixelBit3 = (x & 4) >> 2
  153.         pixelBit4 = (y & 2) >> 1
  154.         pixelBit5 = (y & 4) >> 2
  155.  
  156.     if thickness > 1:
  157.         pixelBit6 = z & 1
  158.         pixelBit7 = (z & 2) >> 1
  159.  
  160.     if thickness == 8:
  161.         pixelBit8 = (z & 4) >> 2
  162.  
  163.     return ((pixelBit8 << 8) | (pixelBit7 << 7) | (pixelBit6 << 6) |
  164.             32 * pixelBit5 | 16 * pixelBit4 | 8 * pixelBit3 |
  165.             4 * pixelBit2 | pixelBit0 | 2 * pixelBit1)
  166.  
  167.  
  168. def computePipeFromCoordWoRotation(x, y):
  169.     # hardcoded to assume 2 pipes
  170.     return ((y >> 3) ^ (x >> 3)) & 1
  171.  
  172.  
  173. def computeBankFromCoordWoRotation(x, y):
  174.     numPipes = m_pipes
  175.     numBanks = m_banks
  176.     bank = 0
  177.  
  178.     if numBanks == 4:
  179.         bankBit0 = ((y // (16 * numPipes)) ^ (x >> 3)) & 1
  180.         bank = bankBit0 | 2 * (((y // (8 * numPipes)) ^ (x >> 4)) & 1)
  181.  
  182.     elif numBanks == 8:
  183.         bankBit0a = ((y // (32 * numPipes)) ^ (x >> 3)) & 1
  184.         bank = (bankBit0a | 2 * (((y // (32 * numPipes)) ^ (y // (16 * numPipes) ^ (x >> 4))) & 1) |
  185.             4 * (((y // (8 * numPipes)) ^ (x >> 5)) & 1))
  186.  
  187.     return bank
  188.  
  189.  
  190. def isThickMacroTiled(tileMode):
  191.     thickMacroTiled = 0
  192.  
  193.     if tileMode == 7 or tileMode == 11 or tileMode == 13 or tileMode == 15:
  194.         thickMacroTiled = 1
  195.  
  196.     return thickMacroTiled
  197.  
  198.  
  199. def isBankSwappedTileMode(tileMode):
  200.     bankSwapped = 0
  201.  
  202.     if tileMode == 8 or tileMode == 9 or tileMode == 10 or tileMode == 11 or tileMode == 14 or tileMode == 15:
  203.         bankSwapped = 1
  204.  
  205.     return bankSwapped
  206.  
  207.  
  208. def computeMacroTileAspectRatio(tileMode):
  209.     ratio = 1
  210.  
  211.     if tileMode == 8 or tileMode == 12 or tileMode == 14:
  212.         ratio = 1
  213.  
  214.     elif tileMode == 5 or tileMode == 9:
  215.         ratio = 2
  216.  
  217.     elif tileMode == 6 or tileMode == 10:
  218.         ratio = 4
  219.  
  220.     return ratio
  221.  
  222.  
  223. def computeSurfaceBankSwappedWidth(tileMode, bpp, pitch, numSamples=1):
  224.     if isBankSwappedTileMode(tileMode) == 0: return 0
  225.  
  226.     numBanks = m_banks
  227.     numPipes = m_pipes
  228.     swapSize = m_swapSize
  229.     rowSize = m_rowSize
  230.     splitSize = m_splitSize
  231.     groupSize = m_pipeInterleaveBytes
  232.     bytesPerSample = 8 * bpp
  233.  
  234.     try:
  235.         samplesPerTile = splitSize // bytesPerSample
  236.         slicesPerTile = max(1, numSamples // samplesPerTile)
  237.     except ZeroDivisionError:
  238.         slicesPerTile = 1
  239.  
  240.     if isThickMacroTiled(tileMode) != 0:
  241.         numSamples = 4
  242.  
  243.     bytesPerTileSlice = numSamples * bytesPerSample // slicesPerTile
  244.  
  245.     factor = computeMacroTileAspectRatio(tileMode)
  246.     swapTiles = max(1, (swapSize >> 1) // bpp)
  247.  
  248.     swapWidth = swapTiles * 8 * numBanks
  249.     heightBytes = numSamples * factor * numPipes * bpp // slicesPerTile
  250.     swapMax = numPipes * numBanks * rowSize // heightBytes
  251.     swapMin = groupSize * 8 * numBanks // bytesPerTileSlice
  252.  
  253.     bankSwapWidth = min(swapMax, max(swapMin, swapWidth))
  254.  
  255.     while not bankSwapWidth < (2 * pitch):
  256.         bankSwapWidth >>= 1
  257.  
  258.     return bankSwapWidth
  259.  
  260.  
  261. def AddrLib_computeSurfaceAddrFromCoordLinear(x, y, bpp, pitch):
  262.     rowOffset = y * pitch
  263.     pixOffset = x
  264.  
  265.     addr = (rowOffset + pixOffset) * bpp
  266.     addr //= 8
  267.  
  268.     return addr
  269.  
  270.  
  271. def AddrLib_computeSurfaceAddrFromCoordMicroTiled(x, y, bpp, pitch, tileMode):
  272.     microTileThickness = 1
  273.  
  274.     if tileMode == 3:
  275.         microTileThickness = 4
  276.  
  277.     microTileBytes = (MicroTilePixels * microTileThickness * bpp + 7) // 8
  278.     microTilesPerRow = pitch >> 3
  279.     microTileIndexX = x >> 3
  280.     microTileIndexY = y >> 3
  281.  
  282.     microTileOffset = microTileBytes * (microTileIndexX + microTileIndexY * microTilesPerRow)
  283.  
  284.     pixelIndex = computePixelIndexWithinMicroTile(x, y, bpp, tileMode)
  285.  
  286.     pixelOffset = bpp * pixelIndex
  287.  
  288.     pixelOffset >>= 3
  289.  
  290.     return pixelOffset + microTileOffset
  291.  
  292.  
  293. def AddrLib_computeSurfaceAddrFromCoordMacroTiled(x, y, bpp, pitch, height, tileMode, pipeSwizzle, bankSwizzle):
  294.     numPipes = m_pipes
  295.     numBanks = m_banks
  296.     numGroupBits = m_pipeInterleaveBytesBitcount
  297.     numPipeBits = m_pipesBitcount
  298.     numBankBits = m_banksBitcount
  299.  
  300.     microTileThickness = computeSurfaceThickness(tileMode)
  301.  
  302.     microTileBits = bpp * (microTileThickness * MicroTilePixels)
  303.     microTileBytes = (microTileBits + 7) // 8
  304.  
  305.     pixelIndex = computePixelIndexWithinMicroTile(x, y, bpp, tileMode)
  306.  
  307.     pixelOffset = bpp * pixelIndex
  308.  
  309.     elemOffset = pixelOffset
  310.  
  311.     bytesPerSample = microTileBytes
  312.     if microTileBytes <= m_splitSize:
  313.         numSamples = 1
  314.         sampleSlice = 0
  315.     else:
  316.         samplesPerSlice = m_splitSize // bytesPerSample
  317.         numSampleSplits = max(1, 1 // samplesPerSlice)
  318.         numSamples = samplesPerSlice
  319.         sampleSlice = elemOffset // (microTileBits // numSampleSplits)
  320.         elemOffset %= microTileBits // numSampleSplits
  321.     elemOffset += 7
  322.     elemOffset //= 8
  323.  
  324.     pipe = computePipeFromCoordWoRotation(x, y)
  325.     bank = computeBankFromCoordWoRotation(x, y)
  326.  
  327.     bankPipe = pipe + numPipes * bank
  328.  
  329.     swizzle_ = pipeSwizzle + numPipes * bankSwizzle
  330.  
  331.     bankPipe ^= numPipes * sampleSlice * ((numBanks >> 1) + 1) ^ swizzle_
  332.     bankPipe %= numPipes * numBanks
  333.     pipe = bankPipe % numPipes
  334.     bank = bankPipe // numPipes
  335.  
  336.     sliceBytes = (height * pitch * microTileThickness * bpp * numSamples + 7) // 8
  337.     sliceOffset = sliceBytes * (sampleSlice // microTileThickness)
  338.  
  339.     macroTilePitch = 8 * m_banks
  340.     macroTileHeight = 8 * m_pipes
  341.  
  342.     if tileMode == 5 or tileMode == 9:  # GX2_TILE_MODE_2D_TILED_THIN4 and GX2_TILE_MODE_2B_TILED_THIN2
  343.         macroTilePitch >>= 1
  344.         macroTileHeight *= 2
  345.  
  346.     elif tileMode == 6 or tileMode == 10:  # GX2_TILE_MODE_2D_TILED_THIN4 and GX2_TILE_MODE_2B_TILED_THIN4
  347.         macroTilePitch >>= 2
  348.         macroTileHeight *= 4
  349.  
  350.     macroTilesPerRow = pitch // macroTilePitch
  351.     macroTileBytes = (numSamples * microTileThickness * bpp * macroTileHeight * macroTilePitch + 7) // 8
  352.     macroTileIndexX = x // macroTilePitch
  353.     macroTileIndexY = y // macroTileHeight
  354.     macroTileOffset = (macroTileIndexX + macroTilesPerRow * macroTileIndexY) * macroTileBytes
  355.  
  356.     if tileMode == 8 or tileMode == 9 or tileMode == 10 or tileMode == 11 or tileMode == 14 or tileMode == 15:
  357.         bankSwapOrder = [0, 1, 3, 2, 6, 7, 5, 4, 0, 0]
  358.         bankSwapWidth = computeSurfaceBankSwappedWidth(tileMode, bpp, pitch)
  359.         swapIndex = macroTilePitch * macroTileIndexX // bankSwapWidth
  360.         bank ^= bankSwapOrder[swapIndex & (m_banks - 1)]
  361.  
  362.     groupMask = ((1 << numGroupBits) - 1)
  363.  
  364.     numSwizzleBits = (numBankBits + numPipeBits)
  365.  
  366.     totalOffset = (elemOffset + ((macroTileOffset + sliceOffset) >> numSwizzleBits))
  367.  
  368.     offsetHigh = (totalOffset & ~groupMask) << numSwizzleBits
  369.     offsetLow = groupMask & totalOffset
  370.  
  371.     pipeBits = pipe << numGroupBits
  372.     bankBits = bank << (numPipeBits + numGroupBits)
  373.  
  374.     return bankBits | pipeBits | offsetLow | offsetHigh
RAW Paste Data