Use correct shader local memory size instead of a hardcoded size (#914)
* Use correct shader local size instead of a hardcoded size * Remove unused uniform block * Update XML doc * Local memory size has 23 bits on maxwell * Generate compute QMD struct from nv open doc header * Remove dummy arrays when shared or local memory is not used, other improvements
This commit is contained in:
parent
ea14a95524
commit
796e5d14b4
@ -17,29 +17,31 @@ namespace Ryujinx.Graphics.Gpu.Engine
|
||||
/// <param name="argument">Method call argument</param>
|
||||
public void Dispatch(GpuState state, int argument)
|
||||
{
|
||||
uint dispatchParamsAddress = (uint)state.Get<int>(MethodOffset.DispatchParamsAddress);
|
||||
uint qmdAddress = (uint)state.Get<int>(MethodOffset.DispatchParamsAddress);
|
||||
|
||||
var dispatchParams = _context.MemoryAccessor.Read<ComputeParams>((ulong)dispatchParamsAddress << 8);
|
||||
var qmd = _context.MemoryAccessor.Read<ComputeQmd>((ulong)qmdAddress << 8);
|
||||
|
||||
GpuVa shaderBaseAddress = state.Get<GpuVa>(MethodOffset.ShaderBaseAddress);
|
||||
|
||||
ulong shaderGpuVa = shaderBaseAddress.Pack() + (uint)dispatchParams.ShaderOffset;
|
||||
ulong shaderGpuVa = shaderBaseAddress.Pack() + (uint)qmd.ProgramOffset;
|
||||
|
||||
// Note: A size of 0 is also invalid, the size must be at least 1.
|
||||
int sharedMemorySize = Math.Clamp(dispatchParams.SharedMemorySize & 0xffff, 1, _context.Capabilities.MaximumComputeSharedMemorySize);
|
||||
int localMemorySize = qmd.ShaderLocalMemoryLowSize + qmd.ShaderLocalMemoryHighSize;
|
||||
|
||||
int sharedMemorySize = Math.Min(qmd.SharedMemorySize, _context.Capabilities.MaximumComputeSharedMemorySize);
|
||||
|
||||
ComputeShader cs = ShaderCache.GetComputeShader(
|
||||
shaderGpuVa,
|
||||
sharedMemorySize,
|
||||
dispatchParams.UnpackBlockSizeX(),
|
||||
dispatchParams.UnpackBlockSizeY(),
|
||||
dispatchParams.UnpackBlockSizeZ());
|
||||
qmd.CtaThreadDimension0,
|
||||
qmd.CtaThreadDimension1,
|
||||
qmd.CtaThreadDimension2,
|
||||
localMemorySize,
|
||||
sharedMemorySize);
|
||||
|
||||
_context.Renderer.Pipeline.SetProgram(cs.HostProgram);
|
||||
|
||||
var samplerPool = state.Get<PoolState>(MethodOffset.SamplerPoolState);
|
||||
|
||||
TextureManager.SetComputeSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId, dispatchParams.SamplerIndex);
|
||||
TextureManager.SetComputeSamplerPool(samplerPool.Address.Pack(), samplerPool.MaximumId, qmd.SamplerIndex);
|
||||
|
||||
var texturePool = state.Get<PoolState>(MethodOffset.TexturePoolState);
|
||||
|
||||
@ -50,17 +52,19 @@ namespace Ryujinx.Graphics.Gpu.Engine
|
||||
ShaderProgramInfo info = cs.Shader.Program.Info;
|
||||
|
||||
uint sbEnableMask = 0;
|
||||
uint ubEnableMask = dispatchParams.UnpackUniformBuffersEnableMask();
|
||||
uint ubEnableMask = 0;
|
||||
|
||||
for (int index = 0; index < dispatchParams.UniformBuffers.Length; index++)
|
||||
for (int index = 0; index < Constants.TotalCpUniformBuffers; index++)
|
||||
{
|
||||
if ((ubEnableMask & (1 << index)) == 0)
|
||||
if (!qmd.ConstantBufferValid(index))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
ulong gpuVa = dispatchParams.UniformBuffers[index].PackAddress();
|
||||
ulong size = dispatchParams.UniformBuffers[index].UnpackSize();
|
||||
ubEnableMask |= 1u << index;
|
||||
|
||||
ulong gpuVa = (uint)qmd.ConstantBufferAddrLower(index) | (ulong)qmd.ConstantBufferAddrUpper(index) << 32;
|
||||
ulong size = (ulong)qmd.ConstantBufferSize(index);
|
||||
|
||||
BufferManager.SetComputeUniformBuffer(index, gpuVa, size);
|
||||
}
|
||||
@ -131,9 +135,9 @@ namespace Ryujinx.Graphics.Gpu.Engine
|
||||
TextureManager.CommitComputeBindings();
|
||||
|
||||
_context.Renderer.Pipeline.DispatchCompute(
|
||||
dispatchParams.UnpackGridSizeX(),
|
||||
dispatchParams.UnpackGridSizeY(),
|
||||
dispatchParams.UnpackGridSizeZ());
|
||||
qmd.CtaRasterWidth,
|
||||
qmd.CtaRasterHeight,
|
||||
qmd.CtaRasterDepth);
|
||||
|
||||
UpdateShaderState(state);
|
||||
}
|
||||
|
@ -1,173 +0,0 @@
|
||||
using Ryujinx.Graphics.Gpu.State;
|
||||
using System;
|
||||
using System.Runtime.InteropServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Gpu.Engine
|
||||
{
|
||||
/// <summary>
|
||||
/// Compute uniform buffer parameters.
|
||||
/// </summary>
|
||||
struct UniformBufferParams
|
||||
{
|
||||
public int AddressLow;
|
||||
public int AddressHighAndSize;
|
||||
|
||||
/// <summary>
|
||||
/// Packs the split address to a 64-bits integer.
|
||||
/// </summary>
|
||||
/// <returns>Uniform buffer GPU virtual address</returns>
|
||||
public ulong PackAddress()
|
||||
{
|
||||
return (uint)AddressLow | ((ulong)(AddressHighAndSize & 0xff) << 32);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unpacks the uniform buffer size in bytes.
|
||||
/// </summary>
|
||||
/// <returns>Uniform buffer size in bytes</returns>
|
||||
public ulong UnpackSize()
|
||||
{
|
||||
return (ulong)((AddressHighAndSize >> 15) & 0x1ffff);
|
||||
}
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compute dispatch parameters.
|
||||
/// </summary>
|
||||
struct ComputeParams
|
||||
{
|
||||
public int Unknown0;
|
||||
public int Unknown1;
|
||||
public int Unknown2;
|
||||
public int Unknown3;
|
||||
public int Unknown4;
|
||||
public int Unknown5;
|
||||
public int Unknown6;
|
||||
public int Unknown7;
|
||||
public int ShaderOffset;
|
||||
public int Unknown9;
|
||||
public int Unknown10;
|
||||
public SamplerIndex SamplerIndex;
|
||||
public int GridSizeX;
|
||||
public int GridSizeYZ;
|
||||
public int Unknown14;
|
||||
public int Unknown15;
|
||||
public int Unknown16;
|
||||
public int SharedMemorySize;
|
||||
public int BlockSizeX;
|
||||
public int BlockSizeYZ;
|
||||
public int UniformBuffersConfig;
|
||||
public int Unknown21;
|
||||
public int Unknown22;
|
||||
public int Unknown23;
|
||||
public int Unknown24;
|
||||
public int Unknown25;
|
||||
public int Unknown26;
|
||||
public int Unknown27;
|
||||
public int Unknown28;
|
||||
|
||||
private UniformBufferParams _uniformBuffer0;
|
||||
private UniformBufferParams _uniformBuffer1;
|
||||
private UniformBufferParams _uniformBuffer2;
|
||||
private UniformBufferParams _uniformBuffer3;
|
||||
private UniformBufferParams _uniformBuffer4;
|
||||
private UniformBufferParams _uniformBuffer5;
|
||||
private UniformBufferParams _uniformBuffer6;
|
||||
private UniformBufferParams _uniformBuffer7;
|
||||
|
||||
/// <summary>
|
||||
/// Uniform buffer parameters.
|
||||
/// </summary>
|
||||
public Span<UniformBufferParams> UniformBuffers
|
||||
{
|
||||
get
|
||||
{
|
||||
return MemoryMarshal.CreateSpan(ref _uniformBuffer0, 8);
|
||||
}
|
||||
}
|
||||
|
||||
public int Unknown45;
|
||||
public int Unknown46;
|
||||
public int Unknown47;
|
||||
public int Unknown48;
|
||||
public int Unknown49;
|
||||
public int Unknown50;
|
||||
public int Unknown51;
|
||||
public int Unknown52;
|
||||
public int Unknown53;
|
||||
public int Unknown54;
|
||||
public int Unknown55;
|
||||
public int Unknown56;
|
||||
public int Unknown57;
|
||||
public int Unknown58;
|
||||
public int Unknown59;
|
||||
public int Unknown60;
|
||||
public int Unknown61;
|
||||
public int Unknown62;
|
||||
public int Unknown63;
|
||||
|
||||
/// <summary>
|
||||
/// Unpacks the work group X size.
|
||||
/// </summary>
|
||||
/// <returns>Work group X size</returns>
|
||||
public int UnpackGridSizeX()
|
||||
{
|
||||
return GridSizeX & 0x7fffffff;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unpacks the work group Y size.
|
||||
/// </summary>
|
||||
/// <returns>Work group Y size</returns>
|
||||
public int UnpackGridSizeY()
|
||||
{
|
||||
return GridSizeYZ & 0xffff;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unpacks the work group Z size.
|
||||
/// </summary>
|
||||
/// <returns>Work group Z size</returns>
|
||||
public int UnpackGridSizeZ()
|
||||
{
|
||||
return (GridSizeYZ >> 16) & 0xffff;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unpacks the local group X size.
|
||||
/// </summary>
|
||||
/// <returns>Local group X size</returns>
|
||||
public int UnpackBlockSizeX()
|
||||
{
|
||||
return (BlockSizeX >> 16) & 0xffff;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unpacks the local group Y size.
|
||||
/// </summary>
|
||||
/// <returns>Local group Y size</returns>
|
||||
public int UnpackBlockSizeY()
|
||||
{
|
||||
return BlockSizeYZ & 0xffff;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unpacks the local group Z size.
|
||||
/// </summary>
|
||||
/// <returns>Local group Z size</returns>
|
||||
public int UnpackBlockSizeZ()
|
||||
{
|
||||
return (BlockSizeYZ >> 16) & 0xffff;
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Unpacks the uniform buffers enable mask.
|
||||
/// Each bit set on the mask indicates that the respective buffer index is enabled.
|
||||
/// </summary>
|
||||
/// <returns>Uniform buffers enable mask</returns>
|
||||
public uint UnpackUniformBuffersEnableMask()
|
||||
{
|
||||
return (uint)UniformBuffersConfig & 0xff;
|
||||
}
|
||||
}
|
||||
}
|
275
Ryujinx.Graphics.Gpu/Engine/ComputeQmd.cs
Normal file
275
Ryujinx.Graphics.Gpu/Engine/ComputeQmd.cs
Normal file
@ -0,0 +1,275 @@
|
||||
using Ryujinx.Graphics.Gpu.State;
|
||||
using System;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace Ryujinx.Graphics.Gpu.Engine
|
||||
{
|
||||
/// <summary>
|
||||
/// Type of the dependent Queue Meta Data.
|
||||
/// </summary>
|
||||
enum DependentQmdType
|
||||
{
|
||||
Queue,
|
||||
Grid
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Type of the release memory barrier.
|
||||
/// </summary>
|
||||
enum ReleaseMembarType
|
||||
{
|
||||
FeNone,
|
||||
FeSysmembar
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Type of the CWD memory barrier.
|
||||
/// </summary>
|
||||
enum CwdMembarType
|
||||
{
|
||||
L1None,
|
||||
L1Sysmembar,
|
||||
L1Membar
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// NaN behavior of 32-bits float operations on the shader.
|
||||
/// </summary>
|
||||
enum Fp32NanBehavior
|
||||
{
|
||||
Legacy,
|
||||
Fp64Compatible
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// NaN behavior of 32-bits float to integer conversion on the shader.
|
||||
/// </summary>
|
||||
enum Fp32F2iNanBehavior
|
||||
{
|
||||
PassZero,
|
||||
PassIndefinite
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Limit of calls.
|
||||
/// </summary>
|
||||
enum ApiVisibleCallLimit
|
||||
{
|
||||
_32,
|
||||
NoCheck
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Shared memory bank mapping mode.
|
||||
/// </summary>
|
||||
enum SharedMemoryBankMapping
|
||||
{
|
||||
FourBytesPerBank,
|
||||
EightBytesPerBank
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Denormal behavior of 32-bits float narrowing instructions.
|
||||
/// </summary>
|
||||
enum Fp32NarrowInstruction
|
||||
{
|
||||
KeepDenorms,
|
||||
FlushDenorms
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Configuration of the L1 cache.
|
||||
/// </summary>
|
||||
enum L1Configuration
|
||||
{
|
||||
DirectlyAddressableMemorySize16kb,
|
||||
DirectlyAddressableMemorySize32kb,
|
||||
DirectlyAddressableMemorySize48kb
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reduction operation.
|
||||
/// </summary>
|
||||
enum ReductionOp
|
||||
{
|
||||
RedAdd,
|
||||
RedMin,
|
||||
RedMax,
|
||||
RedInc,
|
||||
RedDec,
|
||||
RedAnd,
|
||||
RedOr,
|
||||
RedXor
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Reduction format.
|
||||
/// </summary>
|
||||
enum ReductionFormat
|
||||
{
|
||||
Unsigned32,
|
||||
Signed32
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Size of a structure in words.
|
||||
/// </summary>
|
||||
enum StructureSize
|
||||
{
|
||||
FourWords,
|
||||
OneWord
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Compute Queue Meta Data.
|
||||
/// </summary>
|
||||
unsafe struct ComputeQmd
|
||||
{
|
||||
private fixed int _words[64];
|
||||
|
||||
public int OuterPut => BitRange(30, 0);
|
||||
public bool OuterOverflow => Bit(31);
|
||||
public int OuterGet => BitRange(62, 32);
|
||||
public bool OuterStickyOverflow => Bit(63);
|
||||
public int InnerGet => BitRange(94, 64);
|
||||
public bool InnerOverflow => Bit(95);
|
||||
public int InnerPut => BitRange(126, 96);
|
||||
public bool InnerStickyOverflow => Bit(127);
|
||||
public int QmdReservedAA => BitRange(159, 128);
|
||||
public int DependentQmdPointer => BitRange(191, 160);
|
||||
public int QmdGroupId => BitRange(197, 192);
|
||||
public bool SmGlobalCachingEnable => Bit(198);
|
||||
public bool RunCtaInOneSmPartition => Bit(199);
|
||||
public bool IsQueue => Bit(200);
|
||||
public bool AddToHeadOfQmdGroupLinkedList => Bit(201);
|
||||
public bool SemaphoreReleaseEnable0 => Bit(202);
|
||||
public bool SemaphoreReleaseEnable1 => Bit(203);
|
||||
public bool RequireSchedulingPcas => Bit(204);
|
||||
public bool DependentQmdScheduleEnable => Bit(205);
|
||||
public DependentQmdType DependentQmdType => (DependentQmdType)BitRange(206, 206);
|
||||
public bool DependentQmdFieldCopy => Bit(207);
|
||||
public int QmdReservedB => BitRange(223, 208);
|
||||
public int CircularQueueSize => BitRange(248, 224);
|
||||
public bool QmdReservedC => Bit(249);
|
||||
public bool InvalidateTextureHeaderCache => Bit(250);
|
||||
public bool InvalidateTextureSamplerCache => Bit(251);
|
||||
public bool InvalidateTextureDataCache => Bit(252);
|
||||
public bool InvalidateShaderDataCache => Bit(253);
|
||||
public bool InvalidateInstructionCache => Bit(254);
|
||||
public bool InvalidateShaderConstantCache => Bit(255);
|
||||
public int ProgramOffset => BitRange(287, 256);
|
||||
public int CircularQueueAddrLower => BitRange(319, 288);
|
||||
public int CircularQueueAddrUpper => BitRange(327, 320);
|
||||
public int QmdReservedD => BitRange(335, 328);
|
||||
public int CircularQueueEntrySize => BitRange(351, 336);
|
||||
public int CwdReferenceCountId => BitRange(357, 352);
|
||||
public int CwdReferenceCountDeltaMinusOne => BitRange(365, 358);
|
||||
public ReleaseMembarType ReleaseMembarType => (ReleaseMembarType)BitRange(366, 366);
|
||||
public bool CwdReferenceCountIncrEnable => Bit(367);
|
||||
public CwdMembarType CwdMembarType => (CwdMembarType)BitRange(369, 368);
|
||||
public bool SequentiallyRunCtas => Bit(370);
|
||||
public bool CwdReferenceCountDecrEnable => Bit(371);
|
||||
public bool Throttled => Bit(372);
|
||||
public Fp32NanBehavior Fp32NanBehavior => (Fp32NanBehavior)BitRange(376, 376);
|
||||
public Fp32F2iNanBehavior Fp32F2iNanBehavior => (Fp32F2iNanBehavior)BitRange(377, 377);
|
||||
public ApiVisibleCallLimit ApiVisibleCallLimit => (ApiVisibleCallLimit)BitRange(378, 378);
|
||||
public SharedMemoryBankMapping SharedMemoryBankMapping => (SharedMemoryBankMapping)BitRange(379, 379);
|
||||
public SamplerIndex SamplerIndex => (SamplerIndex)BitRange(382, 382);
|
||||
public Fp32NarrowInstruction Fp32NarrowInstruction => (Fp32NarrowInstruction)BitRange(383, 383);
|
||||
public int CtaRasterWidth => BitRange(415, 384);
|
||||
public int CtaRasterHeight => BitRange(431, 416);
|
||||
public int CtaRasterDepth => BitRange(447, 432);
|
||||
public int CtaRasterWidthResume => BitRange(479, 448);
|
||||
public int CtaRasterHeightResume => BitRange(495, 480);
|
||||
public int CtaRasterDepthResume => BitRange(511, 496);
|
||||
public int QueueEntriesPerCtaMinusOne => BitRange(518, 512);
|
||||
public int CoalesceWaitingPeriod => BitRange(529, 522);
|
||||
public int SharedMemorySize => BitRange(561, 544);
|
||||
public int QmdReservedG => BitRange(575, 562);
|
||||
public int QmdVersion => BitRange(579, 576);
|
||||
public int QmdMajorVersion => BitRange(583, 580);
|
||||
public int QmdReservedH => BitRange(591, 584);
|
||||
public int CtaThreadDimension0 => BitRange(607, 592);
|
||||
public int CtaThreadDimension1 => BitRange(623, 608);
|
||||
public int CtaThreadDimension2 => BitRange(639, 624);
|
||||
public bool ConstantBufferValid(int i) => Bit(640 + i * 1);
|
||||
public int QmdReservedI => BitRange(668, 648);
|
||||
public L1Configuration L1Configuration => (L1Configuration)BitRange(671, 669);
|
||||
public int SmDisableMaskLower => BitRange(703, 672);
|
||||
public int SmDisableMaskUpper => BitRange(735, 704);
|
||||
public int Release0AddressLower => BitRange(767, 736);
|
||||
public int Release0AddressUpper => BitRange(775, 768);
|
||||
public int QmdReservedJ => BitRange(783, 776);
|
||||
public ReductionOp Release0ReductionOp => (ReductionOp)BitRange(790, 788);
|
||||
public bool QmdReservedK => Bit(791);
|
||||
public ReductionFormat Release0ReductionFormat => (ReductionFormat)BitRange(793, 792);
|
||||
public bool Release0ReductionEnable => Bit(794);
|
||||
public StructureSize Release0StructureSize => (StructureSize)BitRange(799, 799);
|
||||
public int Release0Payload => BitRange(831, 800);
|
||||
public int Release1AddressLower => BitRange(863, 832);
|
||||
public int Release1AddressUpper => BitRange(871, 864);
|
||||
public int QmdReservedL => BitRange(879, 872);
|
||||
public ReductionOp Release1ReductionOp => (ReductionOp)BitRange(886, 884);
|
||||
public bool QmdReservedM => Bit(887);
|
||||
public ReductionFormat Release1ReductionFormat => (ReductionFormat)BitRange(889, 888);
|
||||
public bool Release1ReductionEnable => Bit(890);
|
||||
public StructureSize Release1StructureSize => (StructureSize)BitRange(895, 895);
|
||||
public int Release1Payload => BitRange(927, 896);
|
||||
public int ConstantBufferAddrLower(int i) => BitRange(959 + i * 64, 928 + i * 64);
|
||||
public int ConstantBufferAddrUpper(int i) => BitRange(967 + i * 64, 960 + i * 64);
|
||||
public int ConstantBufferReservedAddr(int i) => BitRange(973 + i * 64, 968 + i * 64);
|
||||
public bool ConstantBufferInvalidate(int i) => Bit(974 + i * 64);
|
||||
public int ConstantBufferSize(int i) => BitRange(991 + i * 64, 975 + i * 64);
|
||||
public int ShaderLocalMemoryLowSize => BitRange(1463, 1440);
|
||||
public int QmdReservedN => BitRange(1466, 1464);
|
||||
public int BarrierCount => BitRange(1471, 1467);
|
||||
public int ShaderLocalMemoryHighSize => BitRange(1495, 1472);
|
||||
public int RegisterCount => BitRange(1503, 1496);
|
||||
public int ShaderLocalMemoryCrsSize => BitRange(1527, 1504);
|
||||
public int SassVersion => BitRange(1535, 1528);
|
||||
public int HwOnlyInnerGet => BitRange(1566, 1536);
|
||||
public bool HwOnlyRequireSchedulingPcas => Bit(1567);
|
||||
public int HwOnlyInnerPut => BitRange(1598, 1568);
|
||||
public bool HwOnlyScgType => Bit(1599);
|
||||
public int HwOnlySpanListHeadIndex => BitRange(1629, 1600);
|
||||
public bool QmdReservedQ => Bit(1630);
|
||||
public bool HwOnlySpanListHeadIndexValid => Bit(1631);
|
||||
public int HwOnlySkedNextQmdPointer => BitRange(1663, 1632);
|
||||
public int QmdSpareE => BitRange(1695, 1664);
|
||||
public int QmdSpareF => BitRange(1727, 1696);
|
||||
public int QmdSpareG => BitRange(1759, 1728);
|
||||
public int QmdSpareH => BitRange(1791, 1760);
|
||||
public int QmdSpareI => BitRange(1823, 1792);
|
||||
public int QmdSpareJ => BitRange(1855, 1824);
|
||||
public int QmdSpareK => BitRange(1887, 1856);
|
||||
public int QmdSpareL => BitRange(1919, 1888);
|
||||
public int QmdSpareM => BitRange(1951, 1920);
|
||||
public int QmdSpareN => BitRange(1983, 1952);
|
||||
public int DebugIdUpper => BitRange(2015, 1984);
|
||||
public int DebugIdLower => BitRange(2047, 2016);
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private bool Bit(int bit)
|
||||
{
|
||||
if ((uint)bit >= 64 * 32)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(bit));
|
||||
}
|
||||
|
||||
return (_words[bit >> 5] & (1 << (bit & 31))) != 0;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private int BitRange(int upper, int lower)
|
||||
{
|
||||
if ((uint)lower >= 64 * 32)
|
||||
{
|
||||
throw new ArgumentOutOfRangeException(nameof(lower));
|
||||
}
|
||||
|
||||
int mask = (int)(uint.MaxValue >> (32 - (upper - lower + 1)));
|
||||
|
||||
return (_words[lower >> 5] >> (lower & 31)) & mask;
|
||||
}
|
||||
}
|
||||
}
|
@ -13,4 +13,12 @@
|
||||
<RuntimeIdentifiers>win-x64;osx-x64;linux-x64</RuntimeIdentifiers>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|AnyCPU'">
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|AnyCPU'">
|
||||
<AllowUnsafeBlocks>true</AllowUnsafeBlocks>
|
||||
</PropertyGroup>
|
||||
|
||||
</Project>
|
||||
|
@ -51,12 +51,19 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
/// This automatically translates, compiles and adds the code to the cache if not present.
|
||||
/// </remarks>
|
||||
/// <param name="gpuVa">GPU virtual address of the binary shader code</param>
|
||||
/// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
|
||||
/// <param name="localSizeX">Local group size X of the computer shader</param>
|
||||
/// <param name="localSizeY">Local group size Y of the computer shader</param>
|
||||
/// <param name="localSizeZ">Local group size Z of the computer shader</param>
|
||||
/// <param name="localMemorySize">Local memory size of the compute shader</param>
|
||||
/// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
|
||||
/// <returns>Compiled compute shader code</returns>
|
||||
public ComputeShader GetComputeShader(ulong gpuVa, int sharedMemorySize, int localSizeX, int localSizeY, int localSizeZ)
|
||||
public ComputeShader GetComputeShader(
|
||||
ulong gpuVa,
|
||||
int localSizeX,
|
||||
int localSizeY,
|
||||
int localSizeZ,
|
||||
int localMemorySize,
|
||||
int sharedMemorySize)
|
||||
{
|
||||
bool isCached = _cpPrograms.TryGetValue(gpuVa, out List<ComputeShader> list);
|
||||
|
||||
@ -71,7 +78,13 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
}
|
||||
}
|
||||
|
||||
CachedShader shader = TranslateComputeShader(gpuVa, sharedMemorySize, localSizeX, localSizeY, localSizeZ);
|
||||
CachedShader shader = TranslateComputeShader(
|
||||
gpuVa,
|
||||
localSizeX,
|
||||
localSizeY,
|
||||
localSizeZ,
|
||||
localMemorySize,
|
||||
sharedMemorySize);
|
||||
|
||||
shader.HostShader = _context.Renderer.CompileShader(shader.Program);
|
||||
|
||||
@ -237,12 +250,19 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
/// Translates the binary Maxwell shader code to something that the host API accepts.
|
||||
/// </summary>
|
||||
/// <param name="gpuVa">GPU virtual address of the binary shader code</param>
|
||||
/// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
|
||||
/// <param name="localSizeX">Local group size X of the computer shader</param>
|
||||
/// <param name="localSizeY">Local group size Y of the computer shader</param>
|
||||
/// <param name="localSizeZ">Local group size Z of the computer shader</param>
|
||||
/// <param name="localMemorySize">Local memory size of the compute shader</param>
|
||||
/// <param name="sharedMemorySize">Shared memory size of the compute shader</param>
|
||||
/// <returns>Compiled compute shader code</returns>
|
||||
private CachedShader TranslateComputeShader(ulong gpuVa, int sharedMemorySize, int localSizeX, int localSizeY, int localSizeZ)
|
||||
private CachedShader TranslateComputeShader(
|
||||
ulong gpuVa,
|
||||
int localSizeX,
|
||||
int localSizeY,
|
||||
int localSizeZ,
|
||||
int localMemorySize,
|
||||
int sharedMemorySize)
|
||||
{
|
||||
if (gpuVa == 0)
|
||||
{
|
||||
@ -256,6 +276,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
|
||||
QueryInfoName.ComputeLocalSizeX => localSizeX,
|
||||
QueryInfoName.ComputeLocalSizeY => localSizeY,
|
||||
QueryInfoName.ComputeLocalSizeZ => localSizeZ,
|
||||
QueryInfoName.ComputeLocalMemorySize => localMemorySize,
|
||||
QueryInfoName.ComputeSharedMemorySize => sharedMemorySize,
|
||||
_ => QueryInfoCommon(info)
|
||||
};
|
||||
|
@ -77,14 +77,7 @@ namespace Ryujinx.Graphics.OpenGL
|
||||
|
||||
Bind();
|
||||
|
||||
int extraBlockindex = GL.GetUniformBlockIndex(Handle, "Extra");
|
||||
|
||||
if (extraBlockindex >= 0)
|
||||
{
|
||||
GL.UniformBlockBinding(Handle, extraBlockindex, 0);
|
||||
}
|
||||
|
||||
int ubBindingPoint = 1;
|
||||
int ubBindingPoint = 0;
|
||||
int sbBindingPoint = 0;
|
||||
int textureUnit = 0;
|
||||
int imageUnit = 0;
|
||||
|
@ -47,25 +47,35 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
|
||||
context.AppendLine();
|
||||
}
|
||||
|
||||
context.AppendLine("layout (std140) uniform Extra");
|
||||
|
||||
context.EnterScope();
|
||||
|
||||
context.AppendLine("vec2 flip;");
|
||||
context.AppendLine("int instance;");
|
||||
|
||||
context.LeaveScope(";");
|
||||
|
||||
context.AppendLine();
|
||||
|
||||
context.AppendLine($"uint {DefaultNames.LocalMemoryName}[0x100];");
|
||||
context.AppendLine();
|
||||
|
||||
if (context.Config.Stage == ShaderStage.Compute)
|
||||
{
|
||||
string size = NumberFormatter.FormatInt(BitUtils.DivRoundUp(context.Config.QueryInfo(QueryInfoName.ComputeSharedMemorySize), 4));
|
||||
int localMemorySize = BitUtils.DivRoundUp(context.Config.QueryInfo(QueryInfoName.ComputeLocalMemorySize), 4);
|
||||
|
||||
context.AppendLine($"shared uint {DefaultNames.SharedMemoryName}[{size}];");
|
||||
if (localMemorySize != 0)
|
||||
{
|
||||
string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize);
|
||||
|
||||
context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];");
|
||||
context.AppendLine();
|
||||
}
|
||||
|
||||
int sharedMemorySize = BitUtils.DivRoundUp(context.Config.QueryInfo(QueryInfoName.ComputeSharedMemorySize), 4);
|
||||
|
||||
if (sharedMemorySize != 0)
|
||||
{
|
||||
string sharedMemorySizeStr = NumberFormatter.FormatInt(sharedMemorySize);
|
||||
|
||||
context.AppendLine($"shared uint {DefaultNames.SharedMemoryName}[{sharedMemorySizeStr}];");
|
||||
context.AppendLine();
|
||||
}
|
||||
}
|
||||
else if (context.Config.LocalMemorySize != 0)
|
||||
{
|
||||
int localMemorySize = BitUtils.DivRoundUp(context.Config.LocalMemorySize, 4);
|
||||
|
||||
string localMemorySizeStr = NumberFormatter.FormatInt(localMemorySize);
|
||||
|
||||
context.AppendLine($"uint {DefaultNames.LocalMemoryName}[{localMemorySizeStr}];");
|
||||
context.AppendLine();
|
||||
}
|
||||
|
||||
|
@ -5,6 +5,7 @@ namespace Ryujinx.Graphics.Shader
|
||||
ComputeLocalSizeX,
|
||||
ComputeLocalSizeY,
|
||||
ComputeLocalSizeZ,
|
||||
ComputeLocalMemorySize,
|
||||
ComputeSharedMemorySize,
|
||||
IsTextureBuffer,
|
||||
IsTextureRectangle,
|
||||
|
@ -10,6 +10,8 @@ namespace Ryujinx.Graphics.Shader.Translation
|
||||
|
||||
public int MaxOutputVertices { get; }
|
||||
|
||||
public int LocalMemorySize { get; }
|
||||
|
||||
public OutputMapTarget[] OmapTargets { get; }
|
||||
public bool OmapSampleMask { get; }
|
||||
public bool OmapDepth { get; }
|
||||
@ -23,6 +25,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
||||
Stage = ShaderStage.Compute;
|
||||
OutputTopology = OutputTopology.PointList;
|
||||
MaxOutputVertices = 0;
|
||||
LocalMemorySize = 0;
|
||||
OmapTargets = null;
|
||||
OmapSampleMask = false;
|
||||
OmapDepth = false;
|
||||
@ -35,6 +38,7 @@ namespace Ryujinx.Graphics.Shader.Translation
|
||||
Stage = header.Stage;
|
||||
OutputTopology = header.OutputTopology;
|
||||
MaxOutputVertices = header.MaxOutputVertexCount;
|
||||
LocalMemorySize = header.ShaderLocalMemoryLowSize + header.ShaderLocalMemoryHighSize;
|
||||
OmapTargets = header.OmapTargets;
|
||||
OmapSampleMask = header.OmapSampleMask;
|
||||
OmapDepth = header.OmapDepth;
|
||||
@ -80,6 +84,8 @@ namespace Ryujinx.Graphics.Shader.Translation
|
||||
case QueryInfoName.ComputeLocalSizeY:
|
||||
case QueryInfoName.ComputeLocalSizeZ:
|
||||
return 1;
|
||||
case QueryInfoName.ComputeLocalMemorySize:
|
||||
return 0x1000;
|
||||
case QueryInfoName.ComputeSharedMemorySize:
|
||||
return 0xc000;
|
||||
case QueryInfoName.IsTextureBuffer:
|
||||
|
Loading…
Reference in New Issue
Block a user