diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheCollection.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheCollection.cs
index 2660e528..316e027f 100644
--- a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheCollection.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheCollection.cs
@@ -38,6 +38,11 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
///
RemoveManifestEntries,
+ ///
+ /// Remove entries from the hash manifest and save it, and also deletes the temporary file.
+ ///
+ RemoveManifestEntryAndTempFile,
+
///
/// Flush temporary cache to archive.
///
@@ -116,6 +121,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
///
private ZipArchive _cacheArchive;
+ ///
+ /// Indicates if the cache collection supports modification.
+ ///
public bool IsReadOnly { get; }
///
@@ -264,6 +272,21 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
}
}
+ ///
+ /// Remove given entry from the manifest and delete the temporary file.
+ ///
+ /// Entry to remove from the manifest
+ private void RemoveManifestEntryAndTempFile(Hash128 entry)
+ {
+ lock (_hashTable)
+ {
+ _hashTable.Remove(entry);
+ SaveManifest();
+ }
+
+ File.Delete(GenCacheTempFilePath(entry));
+ }
+
///
/// Queue a task to flush temporary files to the archive on the worker.
///
@@ -440,6 +463,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
case CacheFileOperation.RemoveManifestEntries:
RemoveManifestEntries((HashSet)task.Data);
break;
+ case CacheFileOperation.RemoveManifestEntryAndTempFile:
+ RemoveManifestEntryAndTempFile((Hash128)task.Data);
+ break;
case CacheFileOperation.FlushToArchive:
FlushToArchive();
break;
@@ -472,7 +498,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
{
if (IsReadOnly)
{
- Logger.Warning?.Print(LogClass.Gpu, "Trying to add {keyHash} on a read-only cache, ignoring.");
+ Logger.Warning?.Print(LogClass.Gpu, $"Trying to add {keyHash} on a read-only cache, ignoring.");
return;
}
@@ -521,7 +547,7 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
{
if (IsReadOnly)
{
- Logger.Warning?.Print(LogClass.Gpu, "Trying to replace {keyHash} on a read-only cache, ignoring.");
+ Logger.Warning?.Print(LogClass.Gpu, $"Trying to replace {keyHash} on a read-only cache, ignoring.");
return;
}
@@ -540,6 +566,27 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
});
}
+ ///
+ /// Removes a value at the given hash from the cache.
+ ///
+ /// The hash of the value in the cache
+ public void RemoveValue(ref Hash128 keyHash)
+ {
+ if (IsReadOnly)
+ {
+ Logger.Warning?.Print(LogClass.Gpu, $"Trying to remove {keyHash} on a read-only cache, ignoring.");
+
+ return;
+ }
+
+ // Only queue file change operations
+ _fileWriterWorkerQueue.Add(new CacheFileOperationTask
+ {
+ Type = CacheFileOperation.RemoveManifestEntryAndTempFile,
+ Data = keyHash
+ });
+ }
+
public void Dispose()
{
Dispose(true);
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheHelper.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheHelper.cs
index f6caddef..33da42db 100644
--- a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheHelper.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheHelper.cs
@@ -371,11 +371,13 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
///
/// Create guest shader cache entries from the runtime contexts.
///
- /// The GPU memory manager in use
+ /// The GPU channel in use
/// The runtime contexts
/// Guest shader cahe entries from the runtime contexts
- public static GuestShaderCacheEntry[] CreateShaderCacheEntries(MemoryManager memoryManager, ReadOnlySpan shaderContexts)
+ public static GuestShaderCacheEntry[] CreateShaderCacheEntries(GpuChannel channel, ReadOnlySpan shaderContexts)
{
+ MemoryManager memoryManager = channel.MemoryManager;
+
int startIndex = shaderContexts.Length > 1 ? 1 : 0;
GuestShaderCacheEntry[] entries = new GuestShaderCacheEntry[shaderContexts.Length - startIndex];
@@ -389,31 +391,66 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
continue;
}
+ GpuAccessor gpuAccessor = context.GpuAccessor as GpuAccessor;
+
+ ulong cb1DataAddress;
+ int cb1DataSize = gpuAccessor?.Cb1DataSize ?? 0;
+
+ if (context.Stage == ShaderStage.Compute)
+ {
+ cb1DataAddress = channel.BufferManager.GetComputeUniformBufferAddress(1);
+ }
+ else
+ {
+ int stageIndex = context.Stage switch
+ {
+ ShaderStage.TessellationControl => 1,
+ ShaderStage.TessellationEvaluation => 2,
+ ShaderStage.Geometry => 3,
+ ShaderStage.Fragment => 4,
+ _ => 0
+ };
+
+ cb1DataAddress = channel.BufferManager.GetGraphicsUniformBufferAddress(stageIndex, 1);
+ }
+
+ int size = context.Size;
+
TranslatorContext translatorContext2 = i == 1 ? shaderContexts[0] : null;
int sizeA = translatorContext2 != null ? translatorContext2.Size : 0;
- byte[] code = new byte[context.Size + sizeA];
+ byte[] code = new byte[size + cb1DataSize + sizeA];
- memoryManager.GetSpan(context.Address, context.Size).CopyTo(code);
+ memoryManager.GetSpan(context.Address, size).CopyTo(code);
+
+ if (cb1DataAddress != 0 && cb1DataSize != 0)
+ {
+ memoryManager.Physical.GetSpan(cb1DataAddress, cb1DataSize).CopyTo(code.AsSpan().Slice(size, cb1DataSize));
+ }
if (translatorContext2 != null)
{
- memoryManager.GetSpan(translatorContext2.Address, sizeA).CopyTo(code.AsSpan().Slice(context.Size, sizeA));
+ memoryManager.GetSpan(translatorContext2.Address, sizeA).CopyTo(code.AsSpan().Slice(size + cb1DataSize, sizeA));
}
GuestGpuAccessorHeader gpuAccessorHeader = CreateGuestGpuAccessorCache(context.GpuAccessor);
- if (context.GpuAccessor is GpuAccessor)
+ if (gpuAccessor != null)
{
gpuAccessorHeader.TextureDescriptorCount = context.TextureHandlesForCache.Count;
}
- GuestShaderCacheEntryHeader header = new GuestShaderCacheEntryHeader(context.Stage, context.Size, sizeA, gpuAccessorHeader);
+ GuestShaderCacheEntryHeader header = new GuestShaderCacheEntryHeader(
+ context.Stage,
+ size + cb1DataSize,
+ sizeA,
+ cb1DataSize,
+ gpuAccessorHeader);
GuestShaderCacheEntry entry = new GuestShaderCacheEntry(header, code);
- if (context.GpuAccessor is GpuAccessor gpuAccessor)
+ if (gpuAccessor != null)
{
foreach (int textureHandle in context.TextureHandlesForCache)
{
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs
index 1ac37704..3fc11e82 100644
--- a/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/CacheManager.cs
@@ -114,6 +114,16 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache
_hostProgramCache.ReplaceValue(ref programCodeHash, data);
}
+ ///
+ /// Removes a shader program present in the program cache.
+ ///
+ /// Target program code hash
+ public void RemoveProgram(ref Hash128 programCodeHash)
+ {
+ _guestProgramCache.RemoveValue(ref programCodeHash);
+ _hostProgramCache.RemoveValue(ref programCodeHash);
+ }
+
///
/// Get all guest program hashes.
///
diff --git a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheEntryHeader.cs b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheEntryHeader.cs
index 6d5bb28d..9b22cac5 100644
--- a/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheEntryHeader.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/Cache/Definition/GuestShaderCacheEntryHeader.cs
@@ -40,9 +40,9 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
public int SizeA;
///
- /// Unused/reserved.
+ /// Constant buffer 1 data size.
///
- public int Reserved4;
+ public int Cb1DataSize;
///
/// The header of the cached gpu accessor.
@@ -55,12 +55,14 @@ namespace Ryujinx.Graphics.Gpu.Shader.Cache.Definition
/// The stage of this shader
/// The size of the code section
/// The size of the code2 section if present (Vertex A)
+ /// Constant buffer 1 data size
/// The header of the cached gpu accessor
- public GuestShaderCacheEntryHeader(ShaderStage stage, int size, int sizeA, GuestGpuAccessorHeader gpuAccessorHeader) : this()
+ public GuestShaderCacheEntryHeader(ShaderStage stage, int size, int sizeA, int cb1DataSize, GuestGpuAccessorHeader gpuAccessorHeader) : this()
{
Stage = stage;
- Size = size;
+ Size = size;
SizeA = sizeA;
+ Cb1DataSize = cb1DataSize;
GpuAccessorHeader = gpuAccessorHeader;
}
}
diff --git a/Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs
index a7bd4edb..452dfd83 100644
--- a/Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/CachedGpuAccessor.cs
@@ -11,6 +11,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
{
private readonly GpuContext _context;
private readonly ReadOnlyMemory _data;
+ private readonly ReadOnlyMemory _cb1Data;
private readonly GuestGpuAccessorHeader _header;
private readonly Dictionary _textureDescriptors;
@@ -19,12 +20,19 @@ namespace Ryujinx.Graphics.Gpu.Shader
///
/// GPU context
/// The data of the shader
+ /// The constant buffer 1 data of the shader
/// The cache of the GPU accessor
/// The cache of the texture descriptors
- public CachedGpuAccessor(GpuContext context, ReadOnlyMemory data, GuestGpuAccessorHeader header, Dictionary guestTextureDescriptors)
+ public CachedGpuAccessor(
+ GpuContext context,
+ ReadOnlyMemory data,
+ ReadOnlyMemory cb1Data,
+ GuestGpuAccessorHeader header,
+ Dictionary guestTextureDescriptors)
{
_context = context;
_data = data;
+ _cb1Data = cb1Data;
_header = header;
_textureDescriptors = new Dictionary();
@@ -34,6 +42,16 @@ namespace Ryujinx.Graphics.Gpu.Shader
}
}
+ ///
+ /// Reads data from the constant buffer 1.
+ ///
+ /// Offset in bytes to read from
+ /// Value at the given offset
+ public uint ConstantBuffer1Read(int offset)
+ {
+ return MemoryMarshal.Cast(_cb1Data.Span.Slice(offset))[0];
+ }
+
///
/// Prints a log message.
///
diff --git a/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs b/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
index b7059b51..6254b1c2 100644
--- a/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/GpuAccessor.cs
@@ -20,6 +20,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
private readonly int _localMemorySize;
private readonly int _sharedMemorySize;
+ public int Cb1DataSize { get; private set; }
+
///
/// Creates a new instance of the GPU state accessor for graphics shader translation.
///
@@ -67,6 +69,25 @@ namespace Ryujinx.Graphics.Gpu.Shader
_sharedMemorySize = sharedMemorySize;
}
+ ///
+ /// Reads data from the constant buffer 1.
+ ///
+ /// Offset in bytes to read from
+ /// Value at the given offset
+ public uint ConstantBuffer1Read(int offset)
+ {
+ if (Cb1DataSize < offset + 4)
+ {
+ Cb1DataSize = offset + 4;
+ }
+
+ ulong baseAddress = _compute
+ ? _channel.BufferManager.GetComputeUniformBufferAddress(1)
+ : _channel.BufferManager.GetGraphicsUniformBufferAddress(_stageIndex, 1);
+
+ return _channel.MemoryManager.Physical.Read(baseAddress + (ulong)offset);
+ }
+
///
/// Prints a log message.
///
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
index a5712a14..754449fb 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCache.cs
@@ -38,7 +38,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
///
/// Version of the codegen (to be changed when codegen or guest format change).
///
- private const ulong ShaderCodeGenVersion = 2469;
+ private const ulong ShaderCodeGenVersion = 2530;
// Progress reporting helpers
private volatile int _shaderCount;
@@ -112,7 +112,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
int programIndex = 0;
List activeTasks = new List();
- AutoResetEvent taskDoneEvent = new AutoResetEvent(false);
+ using AutoResetEvent taskDoneEvent = new AutoResetEvent(false);
// This thread dispatches tasks to do shader translation, and creates programs that OpenGL will link in the background.
// The program link status is checked in a non-blocking manner so that multiple shaders can be compiled at once.
@@ -191,7 +191,14 @@ namespace Ryujinx.Graphics.Gpu.Shader
Task compileTask = Task.Run(() =>
{
- IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
+ var binaryCode = new Memory(entry.Code);
+
+ var gpuAccessor = new CachedGpuAccessor(
+ _context,
+ binaryCode,
+ binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize),
+ entry.Header.GpuAccessorHeader,
+ entry.TextureDescriptors);
var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, DefaultFlags | TranslationFlags.Compute);
program = Translator.CreateContext(0, gpuAccessor, options).Translate(out shaderProgramInfo);
@@ -199,12 +206,20 @@ namespace Ryujinx.Graphics.Gpu.Shader
task.OnTask(compileTask, (bool _, ShaderCompileTask task) =>
{
+ if (task.IsFaulted)
+ {
+ Logger.Warning?.Print(LogClass.Gpu, $"Host shader {key} is corrupted or incompatible, discarding...");
+
+ _cacheManager.RemoveProgram(ref key);
+ return true; // Exit early, the decoding step failed.
+ }
+
ShaderCodeHolder shader = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code);
Logger.Info?.Print(LogClass.Gpu, $"Host shader {key} got invalidated, rebuilding from guest...");
// Compile shader and create program as the shader program binary got invalidated.
- shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, shader.Program.Code);
+ shader.HostShader = _context.Renderer.CompileShader(ShaderStage.Compute, program.Code);
hostProgram = _context.Renderer.CreateProgram(new IShader[] { shader.HostShader }, null);
task.OnCompiled(hostProgram, (bool isNewProgramValid, ShaderCompileTask task) =>
@@ -298,7 +313,14 @@ namespace Ryujinx.Graphics.Gpu.Shader
}
else
{
- IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
+ var binaryCode = new Memory(entry.Code);
+
+ var gpuAccessor = new CachedGpuAccessor(
+ _context,
+ binaryCode,
+ binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize),
+ entry.Header.GpuAccessorHeader,
+ entry.TextureDescriptors);
var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags);
var options2 = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags | TranslationFlags.VertexA);
@@ -310,7 +332,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
}
// NOTE: Vertex B comes first in the shader cache.
- byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size).ToArray();
+ byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray();
byte[] code2 = entry.Code.AsSpan().Slice(entry.Header.Size, entry.Header.SizeA).ToArray();
shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code, code2);
@@ -326,13 +348,22 @@ namespace Ryujinx.Graphics.Gpu.Shader
}
else
{
- IGpuAccessor gpuAccessor = new CachedGpuAccessor(_context, entry.Code, entry.Header.GpuAccessorHeader, entry.TextureDescriptors);
+ var binaryCode = new Memory(entry.Code);
+
+ var gpuAccessor = new CachedGpuAccessor(
+ _context,
+ binaryCode,
+ binaryCode.Slice(binaryCode.Length - entry.Header.Cb1DataSize),
+ entry.Header.GpuAccessorHeader,
+ entry.TextureDescriptors);
var options = new TranslationOptions(TargetLanguage.Glsl, TargetApi.OpenGL, flags);
program = Translator.CreateContext(0, gpuAccessor, options, counts).Translate(out shaderProgramInfo);
}
- shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, entry.Code);
+ byte[] code = entry.Code.AsSpan().Slice(0, entry.Header.Size - entry.Header.Cb1DataSize).ToArray();
+
+ shaders[i] = new ShaderCodeHolder(program, shaderProgramInfo, code);
}
shaderPrograms.Add(program);
@@ -341,6 +372,14 @@ namespace Ryujinx.Graphics.Gpu.Shader
task.OnTask(compileTask, (bool _, ShaderCompileTask task) =>
{
+ if (task.IsFaulted)
+ {
+ Logger.Warning?.Print(LogClass.Gpu, $"Host shader {key} is corrupted or incompatible, discarding...");
+
+ _cacheManager.RemoveProgram(ref key);
+ return true; // Exit early, the decoding step failed.
+ }
+
// If the host program was rejected by the gpu driver or isn't in cache, try to build from program sources again.
if (!isHostProgramValid)
{
@@ -537,7 +576,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
isShaderCacheReadOnly = _cacheManager.IsReadOnly;
// Compute hash and prepare data for shader disk cache comparison.
- shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(channel.MemoryManager, shaderContexts);
+ shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(channel, shaderContexts);
programCodeHash = CacheHelper.ComputeGuestHashFromCache(shaderCacheEntries);
}
@@ -659,7 +698,7 @@ namespace Ryujinx.Graphics.Gpu.Shader
isShaderCacheReadOnly = _cacheManager.IsReadOnly;
// Compute hash and prepare data for shader disk cache comparison.
- shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(channel.MemoryManager, shaderContexts);
+ shaderCacheEntries = CacheHelper.CreateShaderCacheEntries(channel, shaderContexts);
programCodeHash = CacheHelper.ComputeGuestHashFromCache(shaderCacheEntries, tfd);
}
diff --git a/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs b/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs
index ff48fab0..a9283de2 100644
--- a/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs
+++ b/Ryujinx.Graphics.Gpu/Shader/ShaderCompileTask.cs
@@ -1,5 +1,4 @@
using Ryujinx.Graphics.GAL;
-using System;
using System.Threading;
using System.Threading.Tasks;
@@ -20,6 +19,8 @@ namespace Ryujinx.Graphics.Gpu.Shader
private ShaderCompileTaskCallback _action;
private AutoResetEvent _taskDoneEvent;
+ public bool IsFaulted => _programsTask.IsFaulted;
+
///
/// Create a new shader compile task, with an event to signal whenever a subtask completes.
///
diff --git a/Ryujinx.Graphics.Shader/Decoders/Block.cs b/Ryujinx.Graphics.Shader/Decoders/Block.cs
index e1470237..69cb55b9 100644
--- a/Ryujinx.Graphics.Shader/Decoders/Block.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/Block.cs
@@ -8,10 +8,38 @@ namespace Ryujinx.Graphics.Shader.Decoders
public ulong Address { get; set; }
public ulong EndAddress { get; set; }
- public Block Next { get; set; }
- public Block Branch { get; set; }
+ private Block _next;
+ private Block _branch;
- public OpCodeBranchIndir BrIndir { get; set; }
+ public Block Next
+ {
+ get
+ {
+ return _next;
+ }
+ set
+ {
+ _next?.Predecessors.Remove(this);
+ value?.Predecessors.Add(this);
+ _next = value;
+ }
+ }
+
+ public Block Branch
+ {
+ get
+ {
+ return _branch;
+ }
+ set
+ {
+ _branch?.Predecessors.Remove(this);
+ value?.Predecessors.Add(this);
+ _branch = value;
+ }
+ }
+
+ public HashSet Predecessors { get; }
public List OpCodes { get; }
public List PushOpCodes { get; }
@@ -20,6 +48,8 @@ namespace Ryujinx.Graphics.Shader.Decoders
{
Address = address;
+ Predecessors = new HashSet();
+
OpCodes = new List();
PushOpCodes = new List();
}
diff --git a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
index 9ca58177..c916935e 100644
--- a/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
+++ b/Ryujinx.Graphics.Shader/Decoders/Decoder.cs
@@ -9,8 +9,6 @@ namespace Ryujinx.Graphics.Shader.Decoders
{
static class Decoder
{
- public const ulong ShaderEndDelimiter = 0xe2400fffff87000f;
-
public static Block[][] Decode(IGpuAccessor gpuAccessor, ulong startAddress, out bool hasBindless)
{
hasBindless = false;
@@ -51,130 +49,139 @@ namespace Ryujinx.Graphics.Shader.Decoders
GetBlock(funcAddress);
- while (workQueue.TryDequeue(out Block currBlock))
+ bool hasNewTarget;
+
+ do
{
- // Check if the current block is inside another block.
- if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
+ while (workQueue.TryDequeue(out Block currBlock))
{
- Block nBlock = blocks[nBlkIndex];
-
- if (nBlock.Address == currBlock.Address)
+ // Check if the current block is inside another block.
+ if (BinarySearch(blocks, currBlock.Address, out int nBlkIndex))
{
- throw new InvalidOperationException("Found duplicate block address on the list.");
- }
+ Block nBlock = blocks[nBlkIndex];
- nBlock.Split(currBlock);
- blocks.Insert(nBlkIndex + 1, currBlock);
-
- continue;
- }
-
- // If we have a block after the current one, set the limit address.
- ulong limitAddress = ulong.MaxValue;
-
- if (nBlkIndex != blocks.Count)
- {
- Block nBlock = blocks[nBlkIndex];
-
- int nextIndex = nBlkIndex + 1;
-
- if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
- {
- limitAddress = blocks[nextIndex].Address;
- }
- else if (nBlock.Address > currBlock.Address)
- {
- limitAddress = blocks[nBlkIndex].Address;
- }
- }
-
- FillBlock(gpuAccessor, currBlock, limitAddress, startAddress, out bool blockHasBindless);
- hasBindless |= blockHasBindless;
-
- if (currBlock.OpCodes.Count != 0)
- {
- // We should have blocks for all possible branch targets,
- // including those from SSY/PBK instructions.
- foreach (OpCodePush pushOp in currBlock.PushOpCodes)
- {
- GetBlock(pushOp.GetAbsoluteAddress());
- }
-
- // Set child blocks. "Branch" is the block the branch instruction
- // points to (when taken), "Next" is the block at the next address,
- // executed when the branch is not taken. For Unconditional Branches
- // or end of program, Next is null.
- OpCode lastOp = currBlock.GetLastOp();
-
- if (lastOp is OpCodeBranch opBr)
- {
- if (lastOp.Emitter == InstEmit.Cal)
+ if (nBlock.Address == currBlock.Address)
{
- EnqueueFunction(opBr.GetAbsoluteAddress());
+ throw new InvalidOperationException("Found duplicate block address on the list.");
}
- else
+
+ nBlock.Split(currBlock);
+ blocks.Insert(nBlkIndex + 1, currBlock);
+
+ continue;
+ }
+
+ // If we have a block after the current one, set the limit address.
+ ulong limitAddress = ulong.MaxValue;
+
+ if (nBlkIndex != blocks.Count)
+ {
+ Block nBlock = blocks[nBlkIndex];
+
+ int nextIndex = nBlkIndex + 1;
+
+ if (nBlock.Address < currBlock.Address && nextIndex < blocks.Count)
{
- currBlock.Branch = GetBlock(opBr.GetAbsoluteAddress());
+ limitAddress = blocks[nextIndex].Address;
+ }
+ else if (nBlock.Address > currBlock.Address)
+ {
+ limitAddress = blocks[nBlkIndex].Address;
}
}
- else if (lastOp is OpCodeBranchIndir opBrIndir)
+
+ FillBlock(gpuAccessor, currBlock, limitAddress, startAddress, out bool blockHasBindless);
+ hasBindless |= blockHasBindless;
+
+ if (currBlock.OpCodes.Count != 0)
{
- // An indirect branch could go anywhere, we don't know the target.
- // Those instructions are usually used on a switch to jump table
- // compiler optimization, and in those cases the possible targets
- // seems to be always right after the BRX itself. We can assume
- // that the possible targets are all the blocks in-between the
- // instruction right after the BRX, and the common target that
- // all the "cases" should eventually jump to, acting as the
- // switch break.
- Block firstTarget = GetBlock(currBlock.EndAddress);
+ // We should have blocks for all possible branch targets,
+ // including those from SSY/PBK instructions.
+ foreach (OpCodePush pushOp in currBlock.PushOpCodes)
+ {
+ GetBlock(pushOp.GetAbsoluteAddress());
+ }
- firstTarget.BrIndir = opBrIndir;
+ // Set child blocks. "Branch" is the block the branch instruction
+ // points to (when taken), "Next" is the block at the next address,
+ // executed when the branch is not taken. For Unconditional Branches
+ // or end of program, Next is null.
+ OpCode lastOp = currBlock.GetLastOp();
- opBrIndir.PossibleTargets.Add(firstTarget);
+ if (lastOp is OpCodeBranch opBr)
+ {
+ if (lastOp.Emitter == InstEmit.Cal)
+ {
+ EnqueueFunction(opBr.GetAbsoluteAddress());
+ }
+ else
+ {
+ currBlock.Branch = GetBlock(opBr.GetAbsoluteAddress());
+ }
+ }
+
+ if (!IsUnconditionalBranch(lastOp))
+ {
+ currBlock.Next = GetBlock(currBlock.EndAddress);
+ }
}
- if (!IsUnconditionalBranch(lastOp))
+ // Insert the new block on the list (sorted by address).
+ if (blocks.Count != 0)
{
- currBlock.Next = GetBlock(currBlock.EndAddress);
+ Block nBlock = blocks[nBlkIndex];
+
+ blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
+ }
+ else
+ {
+ blocks.Add(currBlock);
}
}
- // Insert the new block on the list (sorted by address).
- if (blocks.Count != 0)
+ // Propagate SSY/PBK addresses into their uses (SYNC/BRK).
+ foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
{
- Block nBlock = blocks[nBlkIndex];
-
- blocks.Insert(nBlkIndex + (nBlock.Address < currBlock.Address ? 1 : 0), currBlock);
- }
- else
- {
- blocks.Add(currBlock);
- }
-
- // Do we have a block after the current one?
- if (currBlock.BrIndir != null && HasBlockAfter(gpuAccessor, currBlock, startAddress))
- {
- bool targetVisited = visited.ContainsKey(currBlock.EndAddress);
-
- Block possibleTarget = GetBlock(currBlock.EndAddress);
-
- currBlock.BrIndir.PossibleTargets.Add(possibleTarget);
-
- if (!targetVisited)
+ for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
{
- possibleTarget.BrIndir = currBlock.BrIndir;
+ PropagatePushOp(visited, block, pushOpIndex);
}
}
+
+ // Try to find target for BRX (indirect branch) instructions.
+ hasNewTarget = false;
+
+ foreach (Block block in blocks)
+ {
+ if (block.GetLastOp() is OpCodeBranchIndir opBrIndir && opBrIndir.PossibleTargets.Count == 0)
+ {
+ ulong baseOffset = opBrIndir.Address + 8 + (ulong)opBrIndir.Offset;
+
+ // An indirect branch could go anywhere,
+ // try to get the possible target offsets from the constant buffer.
+ (int cbBaseOffset, int cbOffsetsCount) = FindBrxTargetRange(block, opBrIndir.Ra.Index);
+
+ if (cbOffsetsCount != 0)
+ {
+ hasNewTarget = true;
+ }
+
+ for (int i = 0; i < cbOffsetsCount; i++)
+ {
+ uint targetOffset = gpuAccessor.ConstantBuffer1Read(cbBaseOffset + i * 4);
+ Block target = GetBlock(baseOffset + targetOffset);
+ opBrIndir.PossibleTargets.Add(target);
+ target.Predecessors.Add(block);
+ }
+ }
+ }
+
+ // If we discovered new branch targets from the BRX instruction,
+ // we need another round of decoding to decode the new blocks.
+ // Additionally, we may have more SSY/PBK targets to propagate,
+ // and new BRX instructions.
}
-
- foreach (Block block in blocks.Where(x => x.PushOpCodes.Count != 0))
- {
- for (int pushOpIndex = 0; pushOpIndex < block.PushOpCodes.Count; pushOpIndex++)
- {
- PropagatePushOp(visited, block, pushOpIndex);
- }
- }
+ while (hasNewTarget);
funcs.Add(blocks.ToArray());
}
@@ -182,19 +189,6 @@ namespace Ryujinx.Graphics.Shader.Decoders
return funcs.ToArray();
}
- private static bool HasBlockAfter(IGpuAccessor gpuAccessor, Block currBlock, ulong startAdddress)
- {
- if (!gpuAccessor.MemoryMapped(startAdddress + currBlock.EndAddress) ||
- !gpuAccessor.MemoryMapped(startAdddress + currBlock.EndAddress + 7))
- {
- return false;
- }
-
- ulong inst = gpuAccessor.MemoryRead(startAdddress + currBlock.EndAddress);
-
- return inst != 0UL && inst != ShaderEndDelimiter;
- }
-
private static bool BinarySearch(List blocks, ulong address, out int index)
{
index = 0;
@@ -320,6 +314,115 @@ namespace Ryujinx.Graphics.Shader.Decoders
opCode is OpCodeExit;
}
+ private static (int, int) FindBrxTargetRange(Block block, int brxReg)
+ {
+ // Try to match the following pattern:
+ //
+ // IMNMX.U32 Rx, Rx, UpperBound, PT
+ // SHL Rx, Rx, 0x2
+ // LDC Rx, c[0x1][Rx+BaseOffset]
+ //
+ // Here, Rx is an arbitrary register, "UpperBound" and "BaseOffset" are constants.
+ // The above pattern is assumed to be generated by the compiler before BRX,
+ // as the instruction is usually used to implement jump tables for switch statement optimizations.
+ // On a successful match, "BaseOffset" is the offset in bytes where the jump offsets are
+ // located on the constant buffer, and "UpperBound" is the total number of offsets for the BRX, minus 1.
+
+ HashSet visited = new HashSet();
+
+ var ldcLocation = FindFirstRegWrite(visited, new BlockLocation(block, block.OpCodes.Count - 1), brxReg);
+ if (ldcLocation.Block == null || ldcLocation.Block.OpCodes[ldcLocation.Index] is not OpCodeLdc opLdc)
+ {
+ return (0, 0);
+ }
+
+ if (opLdc.Slot != 1 || opLdc.IndexMode != CbIndexMode.Default)
+ {
+ return (0, 0);
+ }
+
+ var shlLocation = FindFirstRegWrite(visited, ldcLocation, opLdc.Ra.Index);
+ if (shlLocation.Block == null || shlLocation.Block.OpCodes[shlLocation.Index] is not OpCodeAluImm opShl)
+ {
+ return (0, 0);
+ }
+
+ if (opShl.Emitter != InstEmit.Shl || opShl.Immediate != 2)
+ {
+ return (0, 0);
+ }
+
+ var imnmxLocation = FindFirstRegWrite(visited, shlLocation, opShl.Ra.Index);
+ if (imnmxLocation.Block == null || imnmxLocation.Block.OpCodes[imnmxLocation.Index] is not OpCodeAluImm opImnmx)
+ {
+ return (0, 0);
+ }
+
+ bool isImnmxS32 = opImnmx.RawOpCode.Extract(48);
+
+ if (opImnmx.Emitter != InstEmit.Imnmx || isImnmxS32 || !opImnmx.Predicate39.IsPT || opImnmx.InvertP)
+ {
+ return (0, 0);
+ }
+
+ return (opLdc.Offset, opImnmx.Immediate + 1);
+ }
+
+ private struct BlockLocation
+ {
+ public Block Block { get; }
+ public int Index { get; }
+
+ public BlockLocation(Block block, int index)
+ {
+ Block = block;
+ Index = index;
+ }
+ }
+
+ private static BlockLocation FindFirstRegWrite(HashSet visited, BlockLocation location, int regIndex)
+ {
+ Queue toVisit = new Queue();
+ toVisit.Enqueue(location);
+ visited.Add(location.Block);
+
+ while (toVisit.TryDequeue(out var currentLocation))
+ {
+ Block block = currentLocation.Block;
+ for (int i = currentLocation.Index - 1; i >= 0; i--)
+ {
+ if (WritesToRegister(block.OpCodes[i], regIndex))
+ {
+ return new BlockLocation(block, i);
+ }
+ }
+
+ foreach (Block predecessor in block.Predecessors)
+ {
+ if (visited.Add(predecessor))
+ {
+ toVisit.Enqueue(new BlockLocation(predecessor, predecessor.OpCodes.Count));
+ }
+ }
+ }
+
+ return new BlockLocation(null, 0);
+ }
+
+ private static bool WritesToRegister(OpCode opCode, int regIndex)
+ {
+ // Predicate instruction only ever writes to predicate, so we shouldn't check those.
+ if (opCode.Emitter == InstEmit.Fsetp ||
+ opCode.Emitter == InstEmit.Hsetp2 ||
+ opCode.Emitter == InstEmit.Isetp ||
+ opCode.Emitter == InstEmit.R2p)
+ {
+ return false;
+ }
+
+ return opCode is IOpCodeRd opRd && opRd.Rd.Index == regIndex;
+ }
+
private enum MergeType
{
Brk = 0,
@@ -388,6 +491,8 @@ namespace Ryujinx.Graphics.Shader.Decoders
{
OpCodePush pushOp = currBlock.PushOpCodes[pushOpIndex];
+ Block target = blocks[pushOp.GetAbsoluteAddress()];
+
Stack workQueue = new Stack();
HashSet visited = new HashSet();
@@ -497,10 +602,12 @@ namespace Ryujinx.Graphics.Shader.Decoders
if (branchStack.Count == 0)
{
// If the entire stack was consumed, then the current pop instruction
- // just consumed the address from out push instruction.
- op.Targets.Add(pushOp, op.Targets.Count);
-
- pushOp.PopOps.TryAdd(op, Local());
+ // just consumed the address from our push instruction.
+ if (op.Targets.TryAdd(pushOp, op.Targets.Count))
+ {
+ pushOp.PopOps.Add(op, Local());
+ target.Predecessors.Add(current);
+ }
}
else
{
diff --git a/Ryujinx.Graphics.Shader/IGpuAccessor.cs b/Ryujinx.Graphics.Shader/IGpuAccessor.cs
index 26a8cafd..04f23061 100644
--- a/Ryujinx.Graphics.Shader/IGpuAccessor.cs
+++ b/Ryujinx.Graphics.Shader/IGpuAccessor.cs
@@ -7,6 +7,11 @@
// No default log output.
}
+ uint ConstantBuffer1Read(int offset)
+ {
+ return 0;
+ }
+
T MemoryRead(ulong address) where T : unmanaged;
bool MemoryMapped(ulong address)
diff --git a/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs b/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs
index d4ab5955..1f5bf35b 100644
--- a/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs
+++ b/Ryujinx.Graphics.Shader/Instructions/InstEmitFlow.cs
@@ -25,6 +25,12 @@ namespace Ryujinx.Graphics.Shader.Instructions
{
OpCodeBranchIndir op = (OpCodeBranchIndir)context.CurrOp;
+ if (op.PossibleTargets.Count == 0)
+ {
+ context.Config.GpuAccessor.Log($"Failed to find targets for BRX instruction at 0x{op.Address:X}.");
+ return;
+ }
+
int offset = (int)op.Address + 8 + op.Offset;
Operand address = context.IAdd(Register(op.Ra), Const(offset));