ryujinx/Ryujinx.Graphics.Texture/Astc/AstcDecoder.cs
Alex Barney d1ab9fb42c ASTC optimizations (#845)
* ASTC optimizations

* Move code to Ryujinx.Common

* Support 3D textures

* Address feedback

* Remove ASTC logging

* Use stackalloc instead of a Buffer20 struct

* Code style and cleanup

* Respond to feedback

* Rearrange public/private property ordering
2020-01-09 02:13:00 +01:00

1607 lines
54 KiB
C#

using Ryujinx.Common.Utilities;
using System;
using System.Diagnostics;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
namespace Ryujinx.Graphics.Texture.Astc
{
// https://github.com/GammaUNC/FasTC/blob/master/ASTCEncoder/src/Decompressor.cpp
public class AstcDecoder
{
private ReadOnlyMemory<byte> InputBuffer { get; }
private Memory<byte> OutputBuffer { get; }
private int BlockSizeX { get; }
private int BlockSizeY { get; }
private AstcLevel[] Levels { get; }
private bool Success { get; set; }
public int TotalBlockCount { get; }
public AstcDecoder(
ReadOnlyMemory<byte> inputBuffer,
Memory<byte> outputBuffer,
int blockWidth,
int blockHeight,
int width,
int height,
int depth,
int levels)
{
if ((uint)blockWidth > 12 || (uint)blockHeight > 12)
{
throw new AstcDecoderException("Invalid block size.");
}
InputBuffer = inputBuffer;
OutputBuffer = outputBuffer;
BlockSizeX = blockWidth;
BlockSizeY = blockHeight;
Levels = new AstcLevel[levels];
TotalBlockCount = 0;
int currentInputBlock = 0;
int currentOutputOffset = 0;
for (int i = 0; i < levels; i++)
{
ref AstcLevel level = ref Levels[i];
level.ImageSizeX = Math.Max(1, width >> i);
level.ImageSizeY = Math.Max(1, height >> i);
level.ImageSizeZ = Math.Max(1, depth >> i);
level.BlockCountX = (level.ImageSizeX + blockWidth - 1) / blockWidth;
level.BlockCountY = (level.ImageSizeY + blockHeight - 1) / blockHeight;
level.StartBlock = currentInputBlock;
level.OutputByteOffset = currentOutputOffset;
currentInputBlock += level.TotalBlockCount;
currentOutputOffset += level.PixelCount * 4;
}
TotalBlockCount = currentInputBlock;
}
private struct AstcLevel
{
public int ImageSizeX { get; set; }
public int ImageSizeY { get; set; }
public int ImageSizeZ { get; set; }
public int BlockCountX { get; set; }
public int BlockCountY { get; set; }
public int StartBlock { get; set; }
public int OutputByteOffset { get; set; }
public int TotalBlockCount => BlockCountX * BlockCountY * ImageSizeZ;
public int PixelCount => ImageSizeX * ImageSizeY * ImageSizeZ;
}
public static int QueryDecompressedSize(int sizeX, int sizeY, int sizeZ, int levelCount)
{
int size = 0;
for (int i = 0; i < levelCount; i++)
{
int levelSizeX = Math.Max(1, sizeX >> i);
int levelSizeY = Math.Max(1, sizeY >> i);
int levelSizeZ = Math.Max(1, sizeZ >> i);
size += levelSizeX * levelSizeY * levelSizeZ;
}
return size * 4;
}
public void ProcessBlock(int index)
{
Buffer16 inputBlock = MemoryMarshal.Cast<byte, Buffer16>(InputBuffer.Span)[index];
Span<int> decompressedData = stackalloc int[144];
try
{
DecompressBlock(inputBlock, decompressedData, BlockSizeX, BlockSizeY);
}
catch (Exception)
{
Success = false;
}
Span<byte> decompressedBytes = MemoryMarshal.Cast<int, byte>(decompressedData);
AstcLevel levelInfo = GetLevelInfo(index);
WriteDecompressedBlock(decompressedBytes, OutputBuffer.Span.Slice(levelInfo.OutputByteOffset),
index - levelInfo.StartBlock, levelInfo);
}
private AstcLevel GetLevelInfo(int blockIndex)
{
foreach (AstcLevel levelInfo in Levels)
{
if (blockIndex < levelInfo.StartBlock + levelInfo.TotalBlockCount)
{
return levelInfo;
}
}
throw new AstcDecoderException("Invalid block index.");
}
private void WriteDecompressedBlock(ReadOnlySpan<byte> block, Span<byte> outputBuffer, int blockIndex, AstcLevel level)
{
int stride = level.ImageSizeX * 4;
int blockCordX = blockIndex % level.BlockCountX;
int blockCordY = blockIndex / level.BlockCountX;
int pixelCordX = blockCordX * BlockSizeX;
int pixelCordY = blockCordY * BlockSizeY;
int outputPixelsX = Math.Min(pixelCordX + BlockSizeX, level.ImageSizeX) - pixelCordX;
int outputPixelsY = Math.Min(pixelCordY + BlockSizeY, level.ImageSizeY * level.ImageSizeZ) - pixelCordY;
int outputStart = pixelCordX * 4 + pixelCordY * stride;
int outputOffset = outputStart;
int inputOffset = 0;
for (int i = 0; i < outputPixelsY; i++)
{
ReadOnlySpan<byte> blockRow = block.Slice(inputOffset, outputPixelsX * 4);
Span<byte> outputRow = outputBuffer.Slice(outputOffset);
blockRow.CopyTo(outputRow);
inputOffset += BlockSizeX * 4;
outputOffset += stride;
}
}
struct TexelWeightParams
{
public int Width;
public int Height;
public int MaxWeight;
public bool DualPlane;
public bool Error;
public bool VoidExtentLdr;
public bool VoidExtentHdr;
public int GetPackedBitSize()
{
// How many indices do we have?
int indices = Height * Width;
if (DualPlane)
{
indices *= 2;
}
IntegerEncoded intEncoded = IntegerEncoded.CreateEncoding(MaxWeight);
return intEncoded.GetBitLength(indices);
}
public int GetNumWeightValues()
{
int ret = Width * Height;
if (DualPlane)
{
ret *= 2;
}
return ret;
}
}
public static bool TryDecodeToRgba8(
ReadOnlyMemory<byte> data,
int blockWidth,
int blockHeight,
int width,
int height,
int depth,
int levels,
out Span<byte> decoded)
{
byte[] output = new byte[QueryDecompressedSize(width, height, depth, levels)];
AstcDecoder decoder = new AstcDecoder(data, output, blockWidth, blockHeight, width, height, depth, levels);
for (int i = 0; i < decoder.TotalBlockCount; i++)
{
decoder.ProcessBlock(i);
}
decoded = output;
return decoder.Success;
}
public static bool TryDecodeToRgba8(
ReadOnlyMemory<byte> data,
Memory<byte> outputBuffer,
int blockWidth,
int blockHeight,
int width,
int height,
int depth,
int levels)
{
AstcDecoder decoder = new AstcDecoder(data, outputBuffer, blockWidth, blockHeight, width, height, depth, levels);
for (int i = 0; i < decoder.TotalBlockCount; i++)
{
decoder.ProcessBlock(i);
}
return decoder.Success;
}
public static bool TryDecodeToRgba8P(
ReadOnlyMemory<byte> data,
Memory<byte> outputBuffer,
int blockWidth,
int blockHeight,
int width,
int height,
int depth,
int levels)
{
AstcDecoder decoder = new AstcDecoder(data, outputBuffer, blockWidth, blockHeight, width, height, depth, levels);
// Lazy parallelism
Enumerable.Range(0, decoder.TotalBlockCount).AsParallel().ForAll(x => decoder.ProcessBlock(x));
return decoder.Success;
}
public static bool TryDecodeToRgba8P(
ReadOnlyMemory<byte> data,
int blockWidth,
int blockHeight,
int width,
int height,
int depth,
int levels,
out Span<byte> decoded)
{
byte[] output = new byte[QueryDecompressedSize(width, height, depth, levels)];
AstcDecoder decoder = new AstcDecoder(data, output, blockWidth, blockHeight, width, height, depth, levels);
Enumerable.Range(0, decoder.TotalBlockCount).AsParallel().ForAll(x => decoder.ProcessBlock(x));
decoded = output;
return decoder.Success;
}
public static bool DecompressBlock(
Buffer16 inputBlock,
Span<int> outputBuffer,
int blockWidth,
int blockHeight)
{
BitStream128 bitStream = new BitStream128(inputBlock);
DecodeBlockInfo(ref bitStream, out TexelWeightParams texelParams);
if (texelParams.Error)
{
throw new AstcDecoderException("Invalid block mode");
}
if (texelParams.VoidExtentLdr)
{
FillVoidExtentLdr(ref bitStream, outputBuffer, blockWidth, blockHeight);
return true;
}
if (texelParams.VoidExtentHdr)
{
throw new AstcDecoderException("HDR void extent blocks are not supported.");
}
if (texelParams.Width > blockWidth)
{
throw new AstcDecoderException("Texel weight grid width should be smaller than block width.");
}
if (texelParams.Height > blockHeight)
{
throw new AstcDecoderException("Texel weight grid height should be smaller than block height.");
}
// Read num partitions
int numberPartitions = bitStream.ReadBits(2) + 1;
Debug.Assert(numberPartitions <= 4);
if (numberPartitions == 4 && texelParams.DualPlane)
{
throw new AstcDecoderException("Dual plane mode is incompatible with four partition blocks.");
}
// Based on the number of partitions, read the color endpoint mode for
// each partition.
// Determine partitions, partition index, and color endpoint modes
int planeIndices;
int partitionIndex;
Span<uint> colorEndpointMode = stackalloc uint[4];
BitStream128 colorEndpointStream = new BitStream128();
// Read extra config data...
uint baseColorEndpointMode = 0;
if (numberPartitions == 1)
{
colorEndpointMode[0] = (uint)bitStream.ReadBits(4);
partitionIndex = 0;
}
else
{
partitionIndex = bitStream.ReadBits(10);
baseColorEndpointMode = (uint)bitStream.ReadBits(6);
}
uint baseMode = (baseColorEndpointMode & 3);
// Remaining bits are color endpoint data...
int numberWeightBits = texelParams.GetPackedBitSize();
int remainingBits = bitStream.BitsLeft - numberWeightBits;
// Consider extra bits prior to texel data...
uint extraColorEndpointModeBits = 0;
if (baseMode != 0)
{
switch (numberPartitions)
{
case 2: extraColorEndpointModeBits += 2; break;
case 3: extraColorEndpointModeBits += 5; break;
case 4: extraColorEndpointModeBits += 8; break;
default: Debug.Assert(false); break;
}
}
remainingBits -= (int)extraColorEndpointModeBits;
// Do we have a dual plane situation?
int planeSelectorBits = 0;
if (texelParams.DualPlane)
{
planeSelectorBits = 2;
}
remainingBits -= planeSelectorBits;
// Read color data...
int colorDataBits = remainingBits;
while (remainingBits > 0)
{
int numberBits = Math.Min(remainingBits, 8);
int bits = bitStream.ReadBits(numberBits);
colorEndpointStream.WriteBits(bits, numberBits);
remainingBits -= 8;
}
// Read the plane selection bits
planeIndices = bitStream.ReadBits(planeSelectorBits);
// Read the rest of the CEM
if (baseMode != 0)
{
uint extraColorEndpointMode = (uint)bitStream.ReadBits((int)extraColorEndpointModeBits);
uint tempColorEndpointMode = (extraColorEndpointMode << 6) | baseColorEndpointMode;
tempColorEndpointMode >>= 2;
Span<bool> c = stackalloc bool[4];
for (int i = 0; i < numberPartitions; i++)
{
c[i] = (tempColorEndpointMode & 1) != 0;
tempColorEndpointMode >>= 1;
}
Span<byte> m = stackalloc byte[4];
for (int i = 0; i < numberPartitions; i++)
{
m[i] = (byte)(tempColorEndpointMode & 3);
tempColorEndpointMode >>= 2;
Debug.Assert(m[i] <= 3);
}
for (int i = 0; i < numberPartitions; i++)
{
colorEndpointMode[i] = baseMode;
if (!(c[i])) colorEndpointMode[i] -= 1;
colorEndpointMode[i] <<= 2;
colorEndpointMode[i] |= m[i];
}
}
else if (numberPartitions > 1)
{
uint tempColorEndpointMode = baseColorEndpointMode >> 2;
for (int i = 0; i < numberPartitions; i++)
{
colorEndpointMode[i] = tempColorEndpointMode;
}
}
// Make sure everything up till here is sane.
for (int i = 0; i < numberPartitions; i++)
{
Debug.Assert(colorEndpointMode[i] < 16);
}
Debug.Assert(bitStream.BitsLeft == texelParams.GetPackedBitSize());
// Decode both color data and texel weight data
Span<int> colorValues = stackalloc int[32]; // Four values * two endpoints * four maximum partitions
DecodeColorValues(colorValues, ref colorEndpointStream, colorEndpointMode, numberPartitions, colorDataBits);
EndPointSet endPoints;
unsafe { _ = &endPoints; } // Skip struct initialization
int colorValuesPosition = 0;
for (int i = 0; i < numberPartitions; i++)
{
ComputeEndpoints(endPoints.Get(i), colorValues, colorEndpointMode[i], ref colorValuesPosition);
}
// Read the texel weight data.
Buffer16 texelWeightData = inputBlock;
// Reverse everything
for (int i = 0; i < 8; i++)
{
byte a = ReverseByte(texelWeightData[i]);
byte b = ReverseByte(texelWeightData[15 - i]);
texelWeightData[i] = b;
texelWeightData[15 - i] = a;
}
// Make sure that higher non-texel bits are set to zero
int clearByteStart = (texelParams.GetPackedBitSize() >> 3) + 1;
texelWeightData[clearByteStart - 1] &= (byte)((1 << (texelParams.GetPackedBitSize() % 8)) - 1);
int cLen = 16 - clearByteStart;
for (int i = clearByteStart; i < clearByteStart + cLen; i++) texelWeightData[i] = 0;
IntegerSequence texelWeightValues;
unsafe { _ = &texelWeightValues; } // Skip struct initialization
texelWeightValues.Reset();
BitStream128 weightBitStream = new BitStream128(texelWeightData);
IntegerEncoded.DecodeIntegerSequence(ref texelWeightValues, ref weightBitStream, texelParams.MaxWeight, texelParams.GetNumWeightValues());
// Blocks can be at most 12x12, so we can have as many as 144 weights
Weights weights;
unsafe { _ = &weights; } // Skip struct initialization
UnquantizeTexelWeights(ref weights, ref texelWeightValues, ref texelParams, blockWidth, blockHeight);
ushort[] table = Bits.Replicate8_16Table;
// Now that we have endpoints and weights, we can interpolate and generate
// the proper decoding...
for (int j = 0; j < blockHeight; j++)
{
for (int i = 0; i < blockWidth; i++)
{
int partition = Select2dPartition(partitionIndex, i, j, numberPartitions, ((blockHeight * blockWidth) < 32));
Debug.Assert(partition < numberPartitions);
AstcPixel pixel = new AstcPixel();
for (int component = 0; component < 4; component++)
{
int component0 = endPoints.Get(partition)[0].GetComponent(component);
component0 = table[component0];
int component1 = endPoints.Get(partition)[1].GetComponent(component);
component1 = table[component1];
int plane = 0;
if (texelParams.DualPlane && (((planeIndices + 1) & 3) == component))
{
plane = 1;
}
int weight = weights.Get(plane)[j * blockWidth + i];
int finalComponent = (component0 * (64 - weight) + component1 * weight + 32) / 64;
if (finalComponent == 65535)
{
pixel.SetComponent(component, 255);
}
else
{
double finalComponentFloat = finalComponent;
pixel.SetComponent(component, (int)(255.0 * (finalComponentFloat / 65536.0) + 0.5));
}
}
outputBuffer[j * blockWidth + i] = pixel.Pack();
}
}
return true;
}
// Blocks can be at most 12x12, so we can have as many as 144 weights
[StructLayout(LayoutKind.Sequential, Size = 144 * sizeof(int) * Count)]
private struct Weights
{
private int _start;
public const int Count = 2;
public Span<int> this[int index]
{
get
{
if ((uint)index >= Count)
{
throw new ArgumentOutOfRangeException();
}
ref int start = ref Unsafe.Add(ref _start, index * 144);
return MemoryMarshal.CreateSpan(ref start, 144);
}
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public Span<int> Get(int index)
{
ref int start = ref Unsafe.Add(ref _start, index * 144);
return MemoryMarshal.CreateSpan(ref start, 144);
}
}
private static int Select2dPartition(int seed, int x, int y, int partitionCount, bool isSmallBlock)
{
return SelectPartition(seed, x, y, 0, partitionCount, isSmallBlock);
}
private static int SelectPartition(int seed, int x, int y, int z, int partitionCount, bool isSmallBlock)
{
if (partitionCount == 1)
{
return 0;
}
if (isSmallBlock)
{
x <<= 1;
y <<= 1;
z <<= 1;
}
seed += (partitionCount - 1) * 1024;
int rightNum = Hash52((uint)seed);
byte seed01 = (byte)(rightNum & 0xF);
byte seed02 = (byte)((rightNum >> 4) & 0xF);
byte seed03 = (byte)((rightNum >> 8) & 0xF);
byte seed04 = (byte)((rightNum >> 12) & 0xF);
byte seed05 = (byte)((rightNum >> 16) & 0xF);
byte seed06 = (byte)((rightNum >> 20) & 0xF);
byte seed07 = (byte)((rightNum >> 24) & 0xF);
byte seed08 = (byte)((rightNum >> 28) & 0xF);
byte seed09 = (byte)((rightNum >> 18) & 0xF);
byte seed10 = (byte)((rightNum >> 22) & 0xF);
byte seed11 = (byte)((rightNum >> 26) & 0xF);
byte seed12 = (byte)(((rightNum >> 30) | (rightNum << 2)) & 0xF);
seed01 *= seed01; seed02 *= seed02;
seed03 *= seed03; seed04 *= seed04;
seed05 *= seed05; seed06 *= seed06;
seed07 *= seed07; seed08 *= seed08;
seed09 *= seed09; seed10 *= seed10;
seed11 *= seed11; seed12 *= seed12;
int seedHash1, seedHash2, seedHash3;
if ((seed & 1) != 0)
{
seedHash1 = (seed & 2) != 0 ? 4 : 5;
seedHash2 = (partitionCount == 3) ? 6 : 5;
}
else
{
seedHash1 = (partitionCount == 3) ? 6 : 5;
seedHash2 = (seed & 2) != 0 ? 4 : 5;
}
seedHash3 = (seed & 0x10) != 0 ? seedHash1 : seedHash2;
seed01 >>= seedHash1; seed02 >>= seedHash2; seed03 >>= seedHash1; seed04 >>= seedHash2;
seed05 >>= seedHash1; seed06 >>= seedHash2; seed07 >>= seedHash1; seed08 >>= seedHash2;
seed09 >>= seedHash3; seed10 >>= seedHash3; seed11 >>= seedHash3; seed12 >>= seedHash3;
int a = seed01 * x + seed02 * y + seed11 * z + (rightNum >> 14);
int b = seed03 * x + seed04 * y + seed12 * z + (rightNum >> 10);
int c = seed05 * x + seed06 * y + seed09 * z + (rightNum >> 6);
int d = seed07 * x + seed08 * y + seed10 * z + (rightNum >> 2);
a &= 0x3F; b &= 0x3F; c &= 0x3F; d &= 0x3F;
if (partitionCount < 4) d = 0;
if (partitionCount < 3) c = 0;
if (a >= b && a >= c && a >= d) return 0;
else if (b >= c && b >= d) return 1;
else if (c >= d) return 2;
return 3;
}
static int Hash52(uint val)
{
val ^= val >> 15; val -= val << 17; val += val << 7; val += val << 4;
val ^= val >> 5; val += val << 16; val ^= val >> 7; val ^= val >> 3;
val ^= val << 6; val ^= val >> 17;
return (int)val;
}
static void UnquantizeTexelWeights(
ref Weights outputBuffer,
ref IntegerSequence weights,
ref TexelWeightParams texelParams,
int blockWidth,
int blockHeight)
{
int weightIndices = 0;
Weights unquantized;
unsafe { _ = &unquantized; } // Skip struct initialization
Span<IntegerEncoded> weightsList = weights.List;
Span<int> unquantized0 = unquantized[0];
Span<int> unquantized1 = unquantized[1];
for (int i = 0; i < weightsList.Length; i++)
{
unquantized0[weightIndices] = UnquantizeTexelWeight(weightsList[i]);
if (texelParams.DualPlane)
{
i++;
unquantized1[weightIndices] = UnquantizeTexelWeight(weightsList[i]);
if (i == weightsList.Length)
{
break;
}
}
if (++weightIndices >= texelParams.Width * texelParams.Height) break;
}
// Do infill if necessary (Section C.2.18) ...
int ds = (1024 + blockWidth / 2) / (blockWidth - 1);
int dt = (1024 + blockHeight / 2) / (blockHeight - 1);
int planeScale = texelParams.DualPlane ? 2 : 1;
for (int plane = 0; plane < planeScale; plane++)
{
Span<int> unquantizedSpan = unquantized.Get(plane);
Span<int> outputSpan = outputBuffer.Get(plane);
for (int t = 0; t < blockHeight; t++)
{
for (int s = 0; s < blockWidth; s++)
{
int cs = ds * s;
int ct = dt * t;
int gs = (cs * (texelParams.Width - 1) + 32) >> 6;
int gt = (ct * (texelParams.Height - 1) + 32) >> 6;
int js = gs >> 4;
int fs = gs & 0xF;
int jt = gt >> 4;
int ft = gt & 0x0F;
int w11 = (fs * ft + 8) >> 4;
int v0 = js + jt * texelParams.Width;
int weight = 8;
int wxh = texelParams.Width * texelParams.Height;
if (v0 < wxh)
{
weight += unquantizedSpan[v0] * (16 - fs - ft + w11);
if (v0 + 1 < wxh)
{
weight += unquantizedSpan[v0 + 1] * (fs - w11);
}
}
if (v0 + texelParams.Width < wxh)
{
weight += unquantizedSpan[v0 + texelParams.Width] * (ft - w11);
if (v0 + texelParams.Width + 1 < wxh)
{
weight += unquantizedSpan[v0 + texelParams.Width + 1] * w11;
}
}
outputSpan[t * blockWidth + s] = weight >> 4;
}
}
}
}
static int UnquantizeTexelWeight(IntegerEncoded intEncoded)
{
int bitValue = intEncoded.BitValue;
int bitLength = intEncoded.NumberBits;
int a = Bits.Replicate1_7(bitValue & 1);
int b = 0, c = 0, d = 0;
int result = 0;
switch (intEncoded.GetEncoding())
{
case IntegerEncoded.EIntegerEncoding.JustBits:
result = Bits.Replicate(bitValue, bitLength, 6);
break;
case IntegerEncoded.EIntegerEncoding.Trit:
{
d = intEncoded.TritValue;
Debug.Assert(d < 3);
switch (bitLength)
{
case 0:
{
result = d switch
{
0 => 0,
1 => 32,
2 => 63,
_ => 0
};
break;
}
case 1:
{
c = 50;
break;
}
case 2:
{
c = 23;
int b2 = (bitValue >> 1) & 1;
b = (b2 << 6) | (b2 << 2) | b2;
break;
}
case 3:
{
c = 11;
int cb = (bitValue >> 1) & 3;
b = (cb << 5) | cb;
break;
}
default:
throw new AstcDecoderException("Invalid trit encoding for texel weight.");
}
break;
}
case IntegerEncoded.EIntegerEncoding.Quint:
{
d = intEncoded.QuintValue;
Debug.Assert(d < 5);
switch (bitLength)
{
case 0:
{
result = d switch
{
0 => 0,
1 => 16,
2 => 32,
3 => 47,
4 => 63,
_ => 0
};
break;
}
case 1:
{
c = 28;
break;
}
case 2:
{
c = 13;
int b2 = (bitValue >> 1) & 1;
b = (b2 << 6) | (b2 << 1);
break;
}
default:
throw new AstcDecoderException("Invalid quint encoding for texel weight.");
}
break;
}
}
if (intEncoded.GetEncoding() != IntegerEncoded.EIntegerEncoding.JustBits && bitLength > 0)
{
// Decode the value...
result = d * c + b;
result ^= a;
result = (a & 0x20) | (result >> 2);
}
Debug.Assert(result < 64);
// Change from [0,63] to [0,64]
if (result > 32)
{
result += 1;
}
return result;
}
static byte ReverseByte(byte b)
{
// Taken from http://graphics.stanford.edu/~seander/bithacks.html#ReverseByteWith64Bits
return (byte)((((b) * 0x80200802L) & 0x0884422110L) * 0x0101010101L >> 32);
}
static Span<uint> ReadUintColorValues(int number, Span<int> colorValues, ref int colorValuesPosition)
{
Span<int> ret = colorValues.Slice(colorValuesPosition, number);
colorValuesPosition += number;
return MemoryMarshal.Cast<int, uint>(ret);
}
static Span<int> ReadIntColorValues(int number, Span<int> colorValues, ref int colorValuesPosition)
{
Span<int> ret = colorValues.Slice(colorValuesPosition, number);
colorValuesPosition += number;
return ret;
}
static void ComputeEndpoints(
Span<AstcPixel> endPoints,
Span<int> colorValues,
uint colorEndpointMode,
ref int colorValuesPosition)
{
switch (colorEndpointMode)
{
case 0:
{
Span<uint> val = ReadUintColorValues(2, colorValues, ref colorValuesPosition);
endPoints[0] = new AstcPixel(0xFF, (short)val[0], (short)val[0], (short)val[0]);
endPoints[1] = new AstcPixel(0xFF, (short)val[1], (short)val[1], (short)val[1]);
break;
}
case 1:
{
Span<uint> val = ReadUintColorValues(2, colorValues, ref colorValuesPosition);
int l0 = (int)((val[0] >> 2) | (val[1] & 0xC0));
int l1 = (int)Math.Max(l0 + (val[1] & 0x3F), 0xFFU);
endPoints[0] = new AstcPixel(0xFF, (short)l0, (short)l0, (short)l0);
endPoints[1] = new AstcPixel(0xFF, (short)l1, (short)l1, (short)l1);
break;
}
case 4:
{
Span<uint> val = ReadUintColorValues(4, colorValues, ref colorValuesPosition);
endPoints[0] = new AstcPixel((short)val[2], (short)val[0], (short)val[0], (short)val[0]);
endPoints[1] = new AstcPixel((short)val[3], (short)val[1], (short)val[1], (short)val[1]);
break;
}
case 5:
{
Span<int> val = ReadIntColorValues(4, colorValues, ref colorValuesPosition);
Bits.BitTransferSigned(ref val[1], ref val[0]);
Bits.BitTransferSigned(ref val[3], ref val[2]);
endPoints[0] = new AstcPixel((short)val[2], (short)val[0], (short)val[0], (short)val[0]);
endPoints[1] = new AstcPixel((short)(val[2] + val[3]), (short)(val[0] + val[1]), (short)(val[0] + val[1]), (short)(val[0] + val[1]));
endPoints[0].ClampByte();
endPoints[1].ClampByte();
break;
}
case 6:
{
Span<uint> val = ReadUintColorValues(4, colorValues, ref colorValuesPosition);
endPoints[0] = new AstcPixel(0xFF, (short)(val[0] * val[3] >> 8), (short)(val[1] * val[3] >> 8), (short)(val[2] * val[3] >> 8));
endPoints[1] = new AstcPixel(0xFF, (short)val[0], (short)val[1], (short)val[2]);
break;
}
case 8:
{
Span<uint> val = ReadUintColorValues(6, colorValues, ref colorValuesPosition);
if (val[1] + val[3] + val[5] >= val[0] + val[2] + val[4])
{
endPoints[0] = new AstcPixel(0xFF, (short)val[0], (short)val[2], (short)val[4]);
endPoints[1] = new AstcPixel(0xFF, (short)val[1], (short)val[3], (short)val[5]);
}
else
{
endPoints[0] = AstcPixel.BlueContract(0xFF, (short)val[1], (short)val[3], (short)val[5]);
endPoints[1] = AstcPixel.BlueContract(0xFF, (short)val[0], (short)val[2], (short)val[4]);
}
break;
}
case 9:
{
Span<int> val = ReadIntColorValues(6, colorValues, ref colorValuesPosition);
Bits.BitTransferSigned(ref val[1], ref val[0]);
Bits.BitTransferSigned(ref val[3], ref val[2]);
Bits.BitTransferSigned(ref val[5], ref val[4]);
if (val[1] + val[3] + val[5] >= 0)
{
endPoints[0] = new AstcPixel(0xFF, (short)val[0], (short)val[2], (short)val[4]);
endPoints[1] = new AstcPixel(0xFF, (short)(val[0] + val[1]), (short)(val[2] + val[3]), (short)(val[4] + val[5]));
}
else
{
endPoints[0] = AstcPixel.BlueContract(0xFF, val[0] + val[1], val[2] + val[3], val[4] + val[5]);
endPoints[1] = AstcPixel.BlueContract(0xFF, val[0], val[2], val[4]);
}
endPoints[0].ClampByte();
endPoints[1].ClampByte();
break;
}
case 10:
{
Span<uint> val = ReadUintColorValues(6, colorValues, ref colorValuesPosition);
endPoints[0] = new AstcPixel((short)val[4], (short)(val[0] * val[3] >> 8), (short)(val[1] * val[3] >> 8), (short)(val[2] * val[3] >> 8));
endPoints[1] = new AstcPixel((short)val[5], (short)val[0], (short)val[1], (short)val[2]);
break;
}
case 12:
{
Span<uint> val = ReadUintColorValues(8, colorValues, ref colorValuesPosition);
if (val[1] + val[3] + val[5] >= val[0] + val[2] + val[4])
{
endPoints[0] = new AstcPixel((short)val[6], (short)val[0], (short)val[2], (short)val[4]);
endPoints[1] = new AstcPixel((short)val[7], (short)val[1], (short)val[3], (short)val[5]);
}
else
{
endPoints[0] = AstcPixel.BlueContract((short)val[7], (short)val[1], (short)val[3], (short)val[5]);
endPoints[1] = AstcPixel.BlueContract((short)val[6], (short)val[0], (short)val[2], (short)val[4]);
}
break;
}
case 13:
{
Span<int> val = ReadIntColorValues(8, colorValues, ref colorValuesPosition);
Bits.BitTransferSigned(ref val[1], ref val[0]);
Bits.BitTransferSigned(ref val[3], ref val[2]);
Bits.BitTransferSigned(ref val[5], ref val[4]);
Bits.BitTransferSigned(ref val[7], ref val[6]);
if (val[1] + val[3] + val[5] >= 0)
{
endPoints[0] = new AstcPixel((short)val[6], (short)val[0], (short)val[2], (short)val[4]);
endPoints[1] = new AstcPixel((short)(val[7] + val[6]), (short)(val[0] + val[1]), (short)(val[2] + val[3]), (short)(val[4] + val[5]));
}
else
{
endPoints[0] = AstcPixel.BlueContract(val[6] + val[7], val[0] + val[1], val[2] + val[3], val[4] + val[5]);
endPoints[1] = AstcPixel.BlueContract(val[6], val[0], val[2], val[4]);
}
endPoints[0].ClampByte();
endPoints[1].ClampByte();
break;
}
default:
throw new AstcDecoderException("Unsupported color endpoint mode (is it HDR?)");
}
}
static void DecodeColorValues(
Span<int> outputValues,
ref BitStream128 colorBitStream,
Span<uint> modes,
int numberPartitions,
int numberBitsForColorData)
{
// First figure out how many color values we have
int numberValues = 0;
for (int i = 0; i < numberPartitions; i++)
{
numberValues += (int)((modes[i] >> 2) + 1) << 1;
}
// Then based on the number of values and the remaining number of bits,
// figure out the max value for each of them...
int range = 256;
while (--range > 0)
{
IntegerEncoded intEncoded = IntegerEncoded.CreateEncoding(range);
int bitLength = intEncoded.GetBitLength(numberValues);
if (bitLength <= numberBitsForColorData)
{
// Find the smallest possible range that matches the given encoding
while (--range > 0)
{
IntegerEncoded newIntEncoded = IntegerEncoded.CreateEncoding(range);
if (!newIntEncoded.MatchesEncoding(intEncoded))
{
break;
}
}
// Return to last matching range.
range++;
break;
}
}
// We now have enough to decode our integer sequence.
IntegerSequence integerEncodedSequence;
unsafe { _ = &integerEncodedSequence; } // Skip struct initialization
integerEncodedSequence.Reset();
IntegerEncoded.DecodeIntegerSequence(ref integerEncodedSequence, ref colorBitStream, range, numberValues);
// Once we have the decoded values, we need to dequantize them to the 0-255 range
// This procedure is outlined in ASTC spec C.2.13
int outputIndices = 0;
foreach (ref IntegerEncoded intEncoded in integerEncodedSequence.List)
{
int bitLength = intEncoded.NumberBits;
int bitValue = intEncoded.BitValue;
Debug.Assert(bitLength >= 1);
int a = 0, b = 0, c = 0, d = 0;
// A is just the lsb replicated 9 times.
a = Bits.Replicate(bitValue & 1, 1, 9);
switch (intEncoded.GetEncoding())
{
case IntegerEncoded.EIntegerEncoding.JustBits:
{
outputValues[outputIndices++] = Bits.Replicate(bitValue, bitLength, 8);
break;
}
case IntegerEncoded.EIntegerEncoding.Trit:
{
d = intEncoded.TritValue;
switch (bitLength)
{
case 1:
{
c = 204;
break;
}
case 2:
{
c = 93;
// B = b000b0bb0
int b2 = (bitValue >> 1) & 1;
b = (b2 << 8) | (b2 << 4) | (b2 << 2) | (b2 << 1);
break;
}
case 3:
{
c = 44;
// B = cb000cbcb
int cb = (bitValue >> 1) & 3;
b = (cb << 7) | (cb << 2) | cb;
break;
}
case 4:
{
c = 22;
// B = dcb000dcb
int dcb = (bitValue >> 1) & 7;
b = (dcb << 6) | dcb;
break;
}
case 5:
{
c = 11;
// B = edcb000ed
int edcb = (bitValue >> 1) & 0xF;
b = (edcb << 5) | (edcb >> 2);
break;
}
case 6:
{
c = 5;
// B = fedcb000f
int fedcb = (bitValue >> 1) & 0x1F;
b = (fedcb << 4) | (fedcb >> 4);
break;
}
default:
throw new AstcDecoderException("Unsupported trit encoding for color values.");
}
break;
}
case IntegerEncoded.EIntegerEncoding.Quint:
{
d = intEncoded.QuintValue;
switch (bitLength)
{
case 1:
{
c = 113;
break;
}
case 2:
{
c = 54;
// B = b0000bb00
int b2 = (bitValue >> 1) & 1;
b = (b2 << 8) | (b2 << 3) | (b2 << 2);
break;
}
case 3:
{
c = 26;
// B = cb0000cbc
int cb = (bitValue >> 1) & 3;
b = (cb << 7) | (cb << 1) | (cb >> 1);
break;
}
case 4:
{
c = 13;
// B = dcb0000dc
int dcb = (bitValue >> 1) & 7;
b = (dcb << 6) | (dcb >> 1);
break;
}
case 5:
{
c = 6;
// B = edcb0000e
int edcb = (bitValue >> 1) & 0xF;
b = (edcb << 5) | (edcb >> 3);
break;
}
default:
throw new AstcDecoderException("Unsupported quint encoding for color values.");
}
break;
}
}
if (intEncoded.GetEncoding() != IntegerEncoded.EIntegerEncoding.JustBits)
{
int T = d * c + b;
T ^= a;
T = (a & 0x80) | (T >> 2);
outputValues[outputIndices++] = T;
}
}
// Make sure that each of our values is in the proper range...
for (int i = 0; i < numberValues; i++)
{
Debug.Assert(outputValues[i] <= 255);
}
}
static void FillVoidExtentLdr(ref BitStream128 bitStream, Span<int> outputBuffer, int blockWidth, int blockHeight)
{
// Don't actually care about the void extent, just read the bits...
for (int i = 0; i < 4; ++i)
{
bitStream.ReadBits(13);
}
// Decode the RGBA components and renormalize them to the range [0, 255]
ushort r = (ushort)bitStream.ReadBits(16);
ushort g = (ushort)bitStream.ReadBits(16);
ushort b = (ushort)bitStream.ReadBits(16);
ushort a = (ushort)bitStream.ReadBits(16);
int rgba = (r >> 8) | (g & 0xFF00) | ((b) & 0xFF00) << 8 | ((a) & 0xFF00) << 16;
for (int j = 0; j < blockHeight; j++)
{
for (int i = 0; i < blockWidth; i++)
{
outputBuffer[j * blockWidth + i] = rgba;
}
}
}
static void DecodeBlockInfo(ref BitStream128 bitStream, out TexelWeightParams texelParams)
{
texelParams = new TexelWeightParams();
// Read the entire block mode all at once
ushort modeBits = (ushort)bitStream.ReadBits(11);
// Does this match the void extent block mode?
if ((modeBits & 0x01FF) == 0x1FC)
{
if ((modeBits & 0x200) != 0)
{
texelParams.VoidExtentHdr = true;
}
else
{
texelParams.VoidExtentLdr = true;
}
// Next two bits must be one.
if ((modeBits & 0x400) == 0 || bitStream.ReadBits(1) == 0)
{
texelParams.Error = true;
}
return;
}
// First check if the last four bits are zero
if ((modeBits & 0xF) == 0)
{
texelParams.Error = true;
return;
}
// If the last two bits are zero, then if bits
// [6-8] are all ones, this is also reserved.
if ((modeBits & 0x3) == 0 && (modeBits & 0x1C0) == 0x1C0)
{
texelParams.Error = true;
return;
}
// Otherwise, there is no error... Figure out the layout
// of the block mode. Layout is determined by a number
// between 0 and 9 corresponding to table C.2.8 of the
// ASTC spec.
int layout;
if ((modeBits & 0x1) != 0 || (modeBits & 0x2) != 0)
{
// layout is in [0-4]
if ((modeBits & 0x8) != 0)
{
// layout is in [2-4]
if ((modeBits & 0x4) != 0)
{
// layout is in [3-4]
if ((modeBits & 0x100) != 0)
{
layout = 4;
}
else
{
layout = 3;
}
}
else
{
layout = 2;
}
}
else
{
// layout is in [0-1]
if ((modeBits & 0x4) != 0)
{
layout = 1;
}
else
{
layout = 0;
}
}
}
else
{
// layout is in [5-9]
if ((modeBits & 0x100) != 0)
{
// layout is in [7-9]
if ((modeBits & 0x80) != 0)
{
// layout is in [7-8]
Debug.Assert((modeBits & 0x40) == 0);
if ((modeBits & 0x20) != 0)
{
layout = 8;
}
else
{
layout = 7;
}
}
else
{
layout = 9;
}
}
else
{
// layout is in [5-6]
if ((modeBits & 0x80) != 0)
{
layout = 6;
}
else
{
layout = 5;
}
}
}
Debug.Assert(layout < 10);
// Determine R
int r = (modeBits >> 4) & 1;
if (layout < 5)
{
r |= (modeBits & 0x3) << 1;
}
else
{
r |= (modeBits & 0xC) >> 1;
}
Debug.Assert(2 <= r && r <= 7);
// Determine width & height
switch (layout)
{
case 0:
{
int a = (modeBits >> 5) & 0x3;
int b = (modeBits >> 7) & 0x3;
texelParams.Width = b + 4;
texelParams.Height = a + 2;
break;
}
case 1:
{
int a = (modeBits >> 5) & 0x3;
int b = (modeBits >> 7) & 0x3;
texelParams.Width = b + 8;
texelParams.Height = a + 2;
break;
}
case 2:
{
int a = (modeBits >> 5) & 0x3;
int b = (modeBits >> 7) & 0x3;
texelParams.Width = a + 2;
texelParams.Height = b + 8;
break;
}
case 3:
{
int a = (modeBits >> 5) & 0x3;
int b = (modeBits >> 7) & 0x1;
texelParams.Width = a + 2;
texelParams.Height = b + 6;
break;
}
case 4:
{
int a = (modeBits >> 5) & 0x3;
int b = (modeBits >> 7) & 0x1;
texelParams.Width = b + 2;
texelParams.Height = a + 2;
break;
}
case 5:
{
int a = (modeBits >> 5) & 0x3;
texelParams.Width = 12;
texelParams.Height = a + 2;
break;
}
case 6:
{
int a = (modeBits >> 5) & 0x3;
texelParams.Width = a + 2;
texelParams.Height = 12;
break;
}
case 7:
{
texelParams.Width = 6;
texelParams.Height = 10;
break;
}
case 8:
{
texelParams.Width = 10;
texelParams.Height = 6;
break;
}
case 9:
{
int a = (modeBits >> 5) & 0x3;
int b = (modeBits >> 9) & 0x3;
texelParams.Width = a + 6;
texelParams.Height = b + 6;
break;
}
default:
// Don't know this layout...
texelParams.Error = true;
break;
}
// Determine whether or not we're using dual planes
// and/or high precision layouts.
bool d = ((layout != 9) && ((modeBits & 0x400) != 0));
bool h = (layout != 9) && ((modeBits & 0x200) != 0);
if (h)
{
ReadOnlySpan<byte> maxWeights = new byte[] { 9, 11, 15, 19, 23, 31 };
texelParams.MaxWeight = maxWeights[r - 2];
}
else
{
ReadOnlySpan<byte> maxWeights = new byte[] { 1, 2, 3, 4, 5, 7 };
texelParams.MaxWeight = maxWeights[r - 2];
}
texelParams.DualPlane = d;
}
}
}