Use shader subgroup extensions if shader ballot is not supported (#2627)

* Use shader subgroup extensions if shader ballot is not supported

* Shader cache version bump + cleanup

* The type is still required on the table
This commit is contained in:
gdkchan 2021-09-19 09:38:39 -03:00 committed by GitHub
parent 7379bc2f39
commit f08a280ade
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
16 changed files with 125 additions and 42 deletions

View File

@ -9,6 +9,7 @@ namespace Ryujinx.Graphics.GAL
public bool SupportsImageLoadFormatted { get; } public bool SupportsImageLoadFormatted { get; }
public bool SupportsMismatchingViewFormat { get; } public bool SupportsMismatchingViewFormat { get; }
public bool SupportsNonConstantTextureOffset { get; } public bool SupportsNonConstantTextureOffset { get; }
public bool SupportsShaderBallot { get; }
public bool SupportsTextureShadowLod { get; } public bool SupportsTextureShadowLod { get; }
public bool SupportsViewportSwizzle { get; } public bool SupportsViewportSwizzle { get; }
public bool SupportsIndirectParameters { get; } public bool SupportsIndirectParameters { get; }
@ -24,6 +25,7 @@ namespace Ryujinx.Graphics.GAL
bool supportsImageLoadFormatted, bool supportsImageLoadFormatted,
bool supportsMismatchingViewFormat, bool supportsMismatchingViewFormat,
bool supportsNonConstantTextureOffset, bool supportsNonConstantTextureOffset,
bool supportsShaderBallot,
bool supportsTextureShadowLod, bool supportsTextureShadowLod,
bool supportsViewportSwizzle, bool supportsViewportSwizzle,
bool supportsIndirectParameters, bool supportsIndirectParameters,
@ -37,6 +39,7 @@ namespace Ryujinx.Graphics.GAL
SupportsImageLoadFormatted = supportsImageLoadFormatted; SupportsImageLoadFormatted = supportsImageLoadFormatted;
SupportsMismatchingViewFormat = supportsMismatchingViewFormat; SupportsMismatchingViewFormat = supportsMismatchingViewFormat;
SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset; SupportsNonConstantTextureOffset = supportsNonConstantTextureOffset;
SupportsShaderBallot = supportsShaderBallot;
SupportsTextureShadowLod = supportsTextureShadowLod; SupportsTextureShadowLod = supportsTextureShadowLod;
SupportsViewportSwizzle = supportsViewportSwizzle; SupportsViewportSwizzle = supportsViewportSwizzle;
SupportsIndirectParameters = supportsIndirectParameters; SupportsIndirectParameters = supportsIndirectParameters;

View File

@ -40,7 +40,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <summary> /// <summary>
/// Version of the codegen (to be changed when codegen or guest format change). /// Version of the codegen (to be changed when codegen or guest format change).
/// </summary> /// </summary>
private const ulong ShaderCodeGenVersion = 2613; private const ulong ShaderCodeGenVersion = 2627;
// Progress reporting helpers // Progress reporting helpers
private volatile int _shaderCount; private volatile int _shaderCount;

View File

@ -47,6 +47,12 @@ namespace Ryujinx.Graphics.Gpu.Shader
/// <returns>True if the GPU and driver supports non-constant texture offsets, false otherwise</returns> /// <returns>True if the GPU and driver supports non-constant texture offsets, false otherwise</returns>
public bool QueryHostSupportsNonConstantTextureOffset() => _context.Capabilities.SupportsNonConstantTextureOffset; public bool QueryHostSupportsNonConstantTextureOffset() => _context.Capabilities.SupportsNonConstantTextureOffset;
/// <summary>
/// Queries host GPU shader ballot support.
/// </summary>
/// <returns>True if the GPU and driver supports shader ballot, false otherwise</returns>
public bool QueryHostSupportsShaderBallot() => _context.Capabilities.SupportsShaderBallot;
/// <summary> /// <summary>
/// Queries host GPU texture shadow LOD support. /// Queries host GPU texture shadow LOD support.
/// </summary> /// </summary>

View File

@ -11,6 +11,7 @@ namespace Ryujinx.Graphics.OpenGL
private static readonly Lazy<bool> _supportsPolygonOffsetClamp = new Lazy<bool>(() => HasExtension("GL_EXT_polygon_offset_clamp")); private static readonly Lazy<bool> _supportsPolygonOffsetClamp = new Lazy<bool>(() => HasExtension("GL_EXT_polygon_offset_clamp"));
private static readonly Lazy<bool> _supportsQuads = new Lazy<bool>(SupportsQuadsCheck); private static readonly Lazy<bool> _supportsQuads = new Lazy<bool>(SupportsQuadsCheck);
private static readonly Lazy<bool> _supportsSeamlessCubemapPerTexture = new Lazy<bool>(() => HasExtension("GL_ARB_seamless_cubemap_per_texture")); private static readonly Lazy<bool> _supportsSeamlessCubemapPerTexture = new Lazy<bool>(() => HasExtension("GL_ARB_seamless_cubemap_per_texture"));
private static readonly Lazy<bool> _supportsShaderBallot = new Lazy<bool>(() => HasExtension("GL_ARB_shader_ballot"));
private static readonly Lazy<bool> _supportsTextureShadowLod = new Lazy<bool>(() => HasExtension("GL_EXT_texture_shadow_lod")); private static readonly Lazy<bool> _supportsTextureShadowLod = new Lazy<bool>(() => HasExtension("GL_EXT_texture_shadow_lod"));
private static readonly Lazy<bool> _supportsViewportSwizzle = new Lazy<bool>(() => HasExtension("GL_NV_viewport_swizzle")); private static readonly Lazy<bool> _supportsViewportSwizzle = new Lazy<bool>(() => HasExtension("GL_NV_viewport_swizzle"));
private static readonly Lazy<bool> _supportsIndirectParameters = new Lazy<bool>(() => HasExtension("GL_ARB_indirect_parameters")); private static readonly Lazy<bool> _supportsIndirectParameters = new Lazy<bool>(() => HasExtension("GL_ARB_indirect_parameters"));
@ -45,6 +46,7 @@ namespace Ryujinx.Graphics.OpenGL
public static bool SupportsPolygonOffsetClamp => _supportsPolygonOffsetClamp.Value; public static bool SupportsPolygonOffsetClamp => _supportsPolygonOffsetClamp.Value;
public static bool SupportsQuads => _supportsQuads.Value; public static bool SupportsQuads => _supportsQuads.Value;
public static bool SupportsSeamlessCubemapPerTexture => _supportsSeamlessCubemapPerTexture.Value; public static bool SupportsSeamlessCubemapPerTexture => _supportsSeamlessCubemapPerTexture.Value;
public static bool SupportsShaderBallot => _supportsShaderBallot.Value;
public static bool SupportsTextureShadowLod => _supportsTextureShadowLod.Value; public static bool SupportsTextureShadowLod => _supportsTextureShadowLod.Value;
public static bool SupportsViewportSwizzle => _supportsViewportSwizzle.Value; public static bool SupportsViewportSwizzle => _supportsViewportSwizzle.Value;
public static bool SupportsIndirectParameters => _supportsIndirectParameters.Value; public static bool SupportsIndirectParameters => _supportsIndirectParameters.Value;

View File

@ -107,6 +107,7 @@ namespace Ryujinx.Graphics.OpenGL
HwCapabilities.SupportsImageLoadFormatted, HwCapabilities.SupportsImageLoadFormatted,
HwCapabilities.SupportsMismatchingViewFormat, HwCapabilities.SupportsMismatchingViewFormat,
HwCapabilities.SupportsNonConstantTextureOffset, HwCapabilities.SupportsNonConstantTextureOffset,
HwCapabilities.SupportsShaderBallot,
HwCapabilities.SupportsTextureShadowLod, HwCapabilities.SupportsTextureShadowLod,
HwCapabilities.SupportsViewportSwizzle, HwCapabilities.SupportsViewportSwizzle,
HwCapabilities.SupportsIndirectParameters, HwCapabilities.SupportsIndirectParameters,

View File

@ -13,7 +13,17 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{ {
context.AppendLine("#version 450 core"); context.AppendLine("#version 450 core");
context.AppendLine("#extension GL_ARB_gpu_shader_int64 : enable"); context.AppendLine("#extension GL_ARB_gpu_shader_int64 : enable");
if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot())
{
context.AppendLine("#extension GL_ARB_shader_ballot : enable"); context.AppendLine("#extension GL_ARB_shader_ballot : enable");
}
else
{
context.AppendLine("#extension GL_KHR_shader_subgroup_basic : enable");
context.AppendLine("#extension GL_KHR_shader_subgroup_ballot : enable");
}
context.AppendLine("#extension GL_ARB_shader_group_vote : enable"); context.AppendLine("#extension GL_ARB_shader_group_vote : enable");
context.AppendLine("#extension GL_EXT_shader_image_load_formatted : enable"); context.AppendLine("#extension GL_EXT_shader_image_load_formatted : enable");
context.AppendLine("#extension GL_EXT_texture_shadow_lod : enable"); context.AppendLine("#extension GL_EXT_texture_shadow_lod : enable");
@ -531,6 +541,17 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
code = code.Replace("$SHARED_MEM$", DefaultNames.SharedMemoryName); code = code.Replace("$SHARED_MEM$", DefaultNames.SharedMemoryName);
code = code.Replace("$STORAGE_MEM$", OperandManager.GetShaderStagePrefix(context.Config.Stage) + "_" + DefaultNames.StorageNamePrefix); code = code.Replace("$STORAGE_MEM$", OperandManager.GetShaderStagePrefix(context.Config.Stage) + "_" + DefaultNames.StorageNamePrefix);
if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot())
{
code = code.Replace("$SUBGROUP_INVOCATION$", "gl_SubGroupInvocationARB");
code = code.Replace("$SUBGROUP_BROADCAST$", "readInvocationARB");
}
else
{
code = code.Replace("$SUBGROUP_INVOCATION$", "gl_SubgroupInvocationID");
code = code.Replace("$SUBGROUP_BROADCAST$", "subgroupBroadcast");
}
context.AppendLine(code); context.AppendLine(code);
context.AppendLine(); context.AppendLine();
} }

View File

@ -2,10 +2,10 @@ float Helper_Shuffle(float x, uint index, uint mask, out bool valid)
{ {
uint clamp = mask & 0x1fu; uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu; uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask; uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask); uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = (index & ~segMask) | minThreadId; uint srcThreadId = (index & ~segMask) | minThreadId;
valid = srcThreadId <= maxThreadId; valid = srcThreadId <= maxThreadId;
float v = readInvocationARB(x, srcThreadId); float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x; return valid ? v : x;
} }

View File

@ -2,10 +2,10 @@ float Helper_ShuffleDown(float x, uint index, uint mask, out bool valid)
{ {
uint clamp = mask & 0x1fu; uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu; uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask; uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask); uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = gl_SubGroupInvocationARB + index; uint srcThreadId = $SUBGROUP_INVOCATION$ + index;
valid = srcThreadId <= maxThreadId; valid = srcThreadId <= maxThreadId;
float v = readInvocationARB(x, srcThreadId); float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x; return valid ? v : x;
} }

View File

@ -1,9 +1,9 @@
float Helper_ShuffleUp(float x, uint index, uint mask, out bool valid) float Helper_ShuffleUp(float x, uint index, uint mask, out bool valid)
{ {
uint segMask = (mask >> 8) & 0x1fu; uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask; uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint srcThreadId = gl_SubGroupInvocationARB - index; uint srcThreadId = $SUBGROUP_INVOCATION$ - index;
valid = int(srcThreadId) >= int(minThreadId); valid = int(srcThreadId) >= int(minThreadId);
float v = readInvocationARB(x, srcThreadId); float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x; return valid ? v : x;
} }

View File

@ -2,10 +2,10 @@ float Helper_ShuffleXor(float x, uint index, uint mask, out bool valid)
{ {
uint clamp = mask & 0x1fu; uint clamp = mask & 0x1fu;
uint segMask = (mask >> 8) & 0x1fu; uint segMask = (mask >> 8) & 0x1fu;
uint minThreadId = gl_SubGroupInvocationARB & segMask; uint minThreadId = $SUBGROUP_INVOCATION$ & segMask;
uint maxThreadId = minThreadId | (clamp & ~segMask); uint maxThreadId = minThreadId | (clamp & ~segMask);
uint srcThreadId = gl_SubGroupInvocationARB ^ index; uint srcThreadId = $SUBGROUP_INVOCATION$ ^ index;
valid = srcThreadId <= maxThreadId; valid = srcThreadId <= maxThreadId;
float v = readInvocationARB(x, srcThreadId); float v = $SUBGROUP_BROADCAST$(x, srcThreadId);
return valid ? v : x; return valid ? v : x;
} }

View File

@ -2,6 +2,6 @@ float Helper_SwizzleAdd(float x, float y, int mask)
{ {
vec4 xLut = vec4(1.0, -1.0, 1.0, 0.0); vec4 xLut = vec4(1.0, -1.0, 1.0, 0.0);
vec4 yLut = vec4(1.0, 1.0, -1.0, 1.0); vec4 yLut = vec4(1.0, 1.0, -1.0, 1.0);
int lutIdx = mask >> int(gl_SubGroupInvocationARB & 3u) * 2; int lutIdx = mask >> int($SUBGROUP_INVOCATION$ & 3u) * 2;
return x * xLut[lutIdx] + y * yLut[lutIdx]; return x * xLut[lutIdx] + y * yLut[lutIdx];
} }

View File

@ -2,6 +2,7 @@ using Ryujinx.Graphics.Shader.IntermediateRepresentation;
using Ryujinx.Graphics.Shader.StructuredIr; using Ryujinx.Graphics.Shader.StructuredIr;
using System; using System;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenBallot;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenCall; using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenCall;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper; using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenMemory; using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenMemory;
@ -75,14 +76,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
} }
} }
if (inst == Instruction.Ballot) return info.OpName + '(' + args + ')';
{
return $"unpackUint2x32({info.OpName}({args})).x";
}
else
{
return info.OpName + "(" + args + ")";
}
} }
else if ((info.Type & InstType.Op) != 0) else if ((info.Type & InstType.Op) != 0)
{ {
@ -128,6 +122,9 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{ {
switch (inst) switch (inst)
{ {
case Instruction.Ballot:
return Ballot(context, operation);
case Instruction.Call: case Instruction.Call:
return Call(context, operation); return Call(context, operation);

View File

@ -0,0 +1,26 @@
using Ryujinx.Graphics.Shader.StructuredIr;
using static Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions.InstGenHelper;
using static Ryujinx.Graphics.Shader.StructuredIr.InstructionInfo;
namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
{
static class InstGenBallot
{
public static string Ballot(CodeGenContext context, AstOperation operation)
{
VariableType dstType = GetSrcVarType(operation.Inst, 0);
string arg = GetSoureExpr(context, operation.GetSource(0), dstType);
if (context.Config.GpuAccessor.QueryHostSupportsShaderBallot())
{
return $"unpackUint2x32(ballotARB({arg})).x";
}
else
{
return $"subgroupBallot({arg}).x";
}
}
}
}

View File

@ -25,7 +25,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl.Instructions
Add(Instruction.AtomicXor, InstType.AtomicBinary, "atomicXor"); Add(Instruction.AtomicXor, InstType.AtomicBinary, "atomicXor");
Add(Instruction.Absolute, InstType.CallUnary, "abs"); Add(Instruction.Absolute, InstType.CallUnary, "abs");
Add(Instruction.Add, InstType.OpBinaryCom, "+", 2); Add(Instruction.Add, InstType.OpBinaryCom, "+", 2);
Add(Instruction.Ballot, InstType.CallUnary, "ballotARB"); Add(Instruction.Ballot, InstType.Special);
Add(Instruction.Barrier, InstType.CallNullary, "barrier"); Add(Instruction.Barrier, InstType.CallNullary, "barrier");
Add(Instruction.BitCount, InstType.CallUnary, "bitCount"); Add(Instruction.BitCount, InstType.CallUnary, "bitCount");
Add(Instruction.BitfieldExtractS32, InstType.CallTernary, "bitfieldExtract"); Add(Instruction.BitfieldExtractS32, InstType.CallTernary, "bitfieldExtract");

View File

@ -60,12 +60,12 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
{ AttributeConsts.CtaIdX, new BuiltInAttribute("gl_WorkGroupID.x", VariableType.U32) }, { AttributeConsts.CtaIdX, new BuiltInAttribute("gl_WorkGroupID.x", VariableType.U32) },
{ AttributeConsts.CtaIdY, new BuiltInAttribute("gl_WorkGroupID.y", VariableType.U32) }, { AttributeConsts.CtaIdY, new BuiltInAttribute("gl_WorkGroupID.y", VariableType.U32) },
{ AttributeConsts.CtaIdZ, new BuiltInAttribute("gl_WorkGroupID.z", VariableType.U32) }, { AttributeConsts.CtaIdZ, new BuiltInAttribute("gl_WorkGroupID.z", VariableType.U32) },
{ AttributeConsts.LaneId, new BuiltInAttribute("gl_SubGroupInvocationARB", VariableType.U32) }, { AttributeConsts.LaneId, new BuiltInAttribute(null, VariableType.U32) },
{ AttributeConsts.EqMask, new BuiltInAttribute("unpackUint2x32(gl_SubGroupEqMaskARB).x", VariableType.U32) }, { AttributeConsts.EqMask, new BuiltInAttribute(null, VariableType.U32) },
{ AttributeConsts.GeMask, new BuiltInAttribute("unpackUint2x32(gl_SubGroupGeMaskARB).x", VariableType.U32) }, { AttributeConsts.GeMask, new BuiltInAttribute(null, VariableType.U32) },
{ AttributeConsts.GtMask, new BuiltInAttribute("unpackUint2x32(gl_SubGroupGtMaskARB).x", VariableType.U32) }, { AttributeConsts.GtMask, new BuiltInAttribute(null, VariableType.U32) },
{ AttributeConsts.LeMask, new BuiltInAttribute("unpackUint2x32(gl_SubGroupLeMaskARB).x", VariableType.U32) }, { AttributeConsts.LeMask, new BuiltInAttribute(null, VariableType.U32) },
{ AttributeConsts.LtMask, new BuiltInAttribute("unpackUint2x32(gl_SubGroupLtMaskARB).x", VariableType.U32) }, { AttributeConsts.LtMask, new BuiltInAttribute(null, VariableType.U32) },
// Support uniforms. // Support uniforms.
{ AttributeConsts.FragmentOutputIsBgraBase + 0, new BuiltInAttribute($"{DefaultNames.SupportBlockIsBgraName}[0]", VariableType.Bool) }, { AttributeConsts.FragmentOutputIsBgraBase + 0, new BuiltInAttribute($"{DefaultNames.SupportBlockIsBgraName}[0]", VariableType.Bool) },
@ -149,6 +149,7 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
public static string GetAttributeName(int value, ShaderConfig config, bool isOutAttr = false, string indexExpr = "0") public static string GetAttributeName(int value, ShaderConfig config, bool isOutAttr = false, string indexExpr = "0")
{ {
value &= ~3;
char swzMask = GetSwizzleMask((value >> 2) & 3); char swzMask = GetSwizzleMask((value >> 2) & 3);
if (value >= AttributeConsts.UserAttributeBase && value < AttributeConsts.UserAttributeEnd) if (value >= AttributeConsts.UserAttributeBase && value < AttributeConsts.UserAttributeEnd)
@ -201,12 +202,35 @@ namespace Ryujinx.Graphics.Shader.CodeGen.Glsl
return $"{DefaultNames.OAttributePrefix}{(value >> 4)}.{swzMask}"; return $"{DefaultNames.OAttributePrefix}{(value >> 4)}.{swzMask}";
} }
else if (_builtInAttributes.TryGetValue(value & ~3, out BuiltInAttribute builtInAttr)) else if (_builtInAttributes.TryGetValue(value, out BuiltInAttribute builtInAttr))
{ {
string subgroupMask = value switch
{
AttributeConsts.EqMask => "Eq",
AttributeConsts.GeMask => "Ge",
AttributeConsts.GtMask => "Gt",
AttributeConsts.LeMask => "Le",
AttributeConsts.LtMask => "Lt",
_ => null
};
if (subgroupMask != null)
{
return config.GpuAccessor.QueryHostSupportsShaderBallot()
? $"unpackUint2x32(gl_SubGroup{subgroupMask}MaskARB).x"
: $"gl_Subgroup{subgroupMask}Mask.x";
}
else if (value == AttributeConsts.LaneId)
{
return config.GpuAccessor.QueryHostSupportsShaderBallot()
? "gl_SubGroupInvocationARB"
: "gl_SubgroupInvocationID";
}
// TODO: There must be a better way to handle this... // TODO: There must be a better way to handle this...
if (config.Stage == ShaderStage.Fragment) if (config.Stage == ShaderStage.Fragment)
{ {
switch (value & ~3) switch (value)
{ {
case AttributeConsts.PositionX: return $"(gl_FragCoord.x / {DefaultNames.SupportBlockRenderScaleName}[0])"; case AttributeConsts.PositionX: return $"(gl_FragCoord.x / {DefaultNames.SupportBlockRenderScaleName}[0])";
case AttributeConsts.PositionY: return $"(gl_FragCoord.y / {DefaultNames.SupportBlockRenderScaleName}[0])"; case AttributeConsts.PositionY: return $"(gl_FragCoord.y / {DefaultNames.SupportBlockRenderScaleName}[0])";

View File

@ -1,6 +1,4 @@
using Ryujinx.Graphics.Shader.Decoders; namespace Ryujinx.Graphics.Shader
namespace Ryujinx.Graphics.Shader
{ {
public interface IGpuAccessor public interface IGpuAccessor
{ {
@ -76,6 +74,11 @@ namespace Ryujinx.Graphics.Shader
return true; return true;
} }
bool QueryHostSupportsShaderBallot()
{
return true;
}
bool QueryHostSupportsTextureShadowLod() bool QueryHostSupportsTextureShadowLod()
{ {
return true; return true;