Use SIMD acceleration for audio upsampler (#4410)
* Use SIMD acceleration for audio upsampler filter kernel for a moderate speedup * Address formatting. Implement AVX2 fast path for high quality resampling in ResamplerHelper * now really, are we really getting the benefit of inlining 50+ line methods? * adding unit tests for resampler + upsampler. The upsampler ones fail for some reason * Fixing upsampler test. Apparently this algo only works at specific ratios --------- Co-authored-by: Logan Stromberg <lostromb@microsoft.com>
This commit is contained in:
parent
fc43aecbbd
commit
edfd4d70c0
@ -1,5 +1,6 @@
|
||||
using System;
|
||||
using System.Linq;
|
||||
using System.Numerics;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Runtime.Intrinsics;
|
||||
using System.Runtime.Intrinsics.X86;
|
||||
@ -380,7 +381,6 @@ namespace Ryujinx.Audio.Renderer.Dsp
|
||||
return _normalCurveLut2F;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private unsafe static void ResampleDefaultQuality(Span<float> outputBuffer, ReadOnlySpan<short> inputBuffer, float ratio, ref float fraction, int sampleCount, bool needPitch)
|
||||
{
|
||||
ReadOnlySpan<float> parameters = GetDefaultParameter(ratio);
|
||||
@ -394,7 +394,6 @@ namespace Ryujinx.Audio.Renderer.Dsp
|
||||
if (ratio == 1f)
|
||||
{
|
||||
fixed (short* pInput = inputBuffer)
|
||||
{
|
||||
fixed (float* pOutput = outputBuffer, pParameters = parameters)
|
||||
{
|
||||
Vector128<float> parameter = Sse.LoadVector128(pParameters);
|
||||
@ -424,14 +423,12 @@ namespace Ryujinx.Audio.Renderer.Dsp
|
||||
Sse.Store(pOutput + (uint)i, Sse41.RoundToNearestInteger(mix0123));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
inputBufferIndex = i;
|
||||
}
|
||||
else
|
||||
{
|
||||
fixed (short* pInput = inputBuffer)
|
||||
{
|
||||
fixed (float* pOutput = outputBuffer, pParameters = parameters)
|
||||
{
|
||||
for (; i < (sampleCount & ~3); i += 4)
|
||||
@ -490,7 +487,6 @@ namespace Ryujinx.Audio.Renderer.Dsp
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (; i < sampleCount; i++)
|
||||
{
|
||||
@ -526,15 +522,39 @@ namespace Ryujinx.Audio.Renderer.Dsp
|
||||
return _highCurveLut2F;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
private static void ResampleHighQuality(Span<float> outputBuffer, ReadOnlySpan<short> inputBuffer, float ratio, ref float fraction, int sampleCount)
|
||||
private static unsafe void ResampleHighQuality(Span<float> outputBuffer, ReadOnlySpan<short> inputBuffer, float ratio, ref float fraction, int sampleCount)
|
||||
{
|
||||
ReadOnlySpan<float> parameters = GetHighParameter(ratio);
|
||||
|
||||
int inputBufferIndex = 0;
|
||||
|
||||
// TODO: fast path
|
||||
if (Avx2.IsSupported)
|
||||
{
|
||||
// Fast path; assumes 256-bit vectors for simplicity because the filter is 8 taps
|
||||
fixed (short* pInput = inputBuffer)
|
||||
fixed (float* pParameters = parameters)
|
||||
{
|
||||
for (int i = 0; i < sampleCount; i++)
|
||||
{
|
||||
int baseIndex = (int)(fraction * 128) * 8;
|
||||
|
||||
Vector256<int> intInput = Avx2.ConvertToVector256Int32(pInput + inputBufferIndex);
|
||||
Vector256<float> floatInput = Avx.ConvertToVector256Single(intInput);
|
||||
Vector256<float> parameter = Avx.LoadVector256(pParameters + baseIndex);
|
||||
Vector256<float> dp = Avx.DotProduct(floatInput, parameter, control: 0xFF);
|
||||
|
||||
// avx2 does an 8-element dot product piecewise so we have to sum up 2 intermediate results
|
||||
outputBuffer[i] = (float)Math.Round(dp[0] + dp[4]);
|
||||
|
||||
fraction += ratio;
|
||||
inputBufferIndex += (int)MathF.Truncate(fraction);
|
||||
|
||||
fraction -= (int)fraction;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (int i = 0; i < sampleCount; i++)
|
||||
{
|
||||
int baseIndex = (int)(fraction * 128) * 8;
|
||||
@ -556,6 +576,7 @@ namespace Ryujinx.Audio.Renderer.Dsp
|
||||
fraction -= (int)fraction;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
public static void ResampleLowQuality(Span<float> outputBuffer, ReadOnlySpan<short> inputBuffer, float ratio, ref float fraction, int sampleCount)
|
||||
|
@ -2,6 +2,7 @@ using Ryujinx.Audio.Renderer.Server.Upsampler;
|
||||
using Ryujinx.Common.Memory;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.Numerics;
|
||||
using System.Runtime.CompilerServices;
|
||||
|
||||
namespace Ryujinx.Audio.Renderer.Dsp
|
||||
@ -70,16 +71,32 @@ namespace Ryujinx.Audio.Renderer.Dsp
|
||||
return;
|
||||
}
|
||||
|
||||
[MethodImpl(MethodImplOptions.AggressiveInlining)]
|
||||
float DoFilterBank(ref UpsamplerBufferState state, in Array20<float> bank)
|
||||
{
|
||||
float result = 0.0f;
|
||||
|
||||
Debug.Assert(state.History.Length == HistoryLength);
|
||||
Debug.Assert(bank.Length == FilterBankLength);
|
||||
for (int j = 0; j < FilterBankLength; j++)
|
||||
|
||||
int curIdx = 0;
|
||||
if (Vector.IsHardwareAccelerated)
|
||||
{
|
||||
result += bank[j] * state.History[j];
|
||||
// Do SIMD-accelerated block operations where possible.
|
||||
// Only about a 2x speedup since filter bank length is short
|
||||
int stopIdx = FilterBankLength - (FilterBankLength % Vector<float>.Count);
|
||||
while (curIdx < stopIdx)
|
||||
{
|
||||
result += Vector.Dot(
|
||||
new Vector<float>(bank.AsSpan().Slice(curIdx, Vector<float>.Count)),
|
||||
new Vector<float>(state.History.AsSpan().Slice(curIdx, Vector<float>.Count)));
|
||||
curIdx += Vector<float>.Count;
|
||||
}
|
||||
}
|
||||
|
||||
while (curIdx < FilterBankLength)
|
||||
{
|
||||
result += bank[curIdx] * state.History[curIdx];
|
||||
curIdx++;
|
||||
}
|
||||
|
||||
return result;
|
||||
|
93
Ryujinx.Tests/Audio/Renderer/Dsp/ResamplerTests.cs
Normal file
93
Ryujinx.Tests/Audio/Renderer/Dsp/ResamplerTests.cs
Normal file
@ -0,0 +1,93 @@
|
||||
using NUnit.Framework;
|
||||
using Ryujinx.Audio.Renderer.Dsp;
|
||||
using Ryujinx.Audio.Renderer.Parameter;
|
||||
using Ryujinx.Audio.Renderer.Server.Upsampler;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Ryujinx.Tests.Audio.Renderer.Dsp
|
||||
{
|
||||
class ResamplerTests
|
||||
{
|
||||
[Test]
|
||||
[TestCase(VoiceInParameter.SampleRateConversionQuality.Low)]
|
||||
[TestCase(VoiceInParameter.SampleRateConversionQuality.Default)]
|
||||
[TestCase(VoiceInParameter.SampleRateConversionQuality.High)]
|
||||
public void TestResamplerConsistencyUpsampling(VoiceInParameter.SampleRateConversionQuality quality)
|
||||
{
|
||||
DoResamplingTest(44100, 48000, quality);
|
||||
}
|
||||
|
||||
[Test]
|
||||
[TestCase(VoiceInParameter.SampleRateConversionQuality.Low)]
|
||||
[TestCase(VoiceInParameter.SampleRateConversionQuality.Default)]
|
||||
[TestCase(VoiceInParameter.SampleRateConversionQuality.High)]
|
||||
public void TestResamplerConsistencyDownsampling(VoiceInParameter.SampleRateConversionQuality quality)
|
||||
{
|
||||
DoResamplingTest(48000, 44100, quality);
|
||||
}
|
||||
|
||||
/// <summary>
|
||||
/// Generates a 1-second sine wave sample at input rate, resamples it to output rate, and
|
||||
/// ensures that it resampled at the expected rate with no discontinuities
|
||||
/// </summary>
|
||||
/// <param name="inputRate">The input sample rate to test</param>
|
||||
/// <param name="outputRate">The output sample rate to test</param>
|
||||
/// <param name="quality">The resampler quality to use</param>
|
||||
private static void DoResamplingTest(int inputRate, int outputRate, VoiceInParameter.SampleRateConversionQuality quality)
|
||||
{
|
||||
float inputSampleRate = (float)inputRate;
|
||||
float outputSampleRate = (float)outputRate;
|
||||
int inputSampleCount = inputRate;
|
||||
int outputSampleCount = outputRate;
|
||||
short[] inputBuffer = new short[inputSampleCount + 100]; // add some safety buffer at the end
|
||||
float[] outputBuffer = new float[outputSampleCount + 100];
|
||||
for (int sample = 0; sample < inputBuffer.Length; sample++)
|
||||
{
|
||||
// 440 hz sine wave with amplitude = 0.5f at input sample rate
|
||||
inputBuffer[sample] = (short)(32767 * MathF.Sin((440 / inputSampleRate) * (float)sample * MathF.PI * 2f) * 0.5f);
|
||||
}
|
||||
|
||||
float fraction = 0;
|
||||
|
||||
ResamplerHelper.Resample(
|
||||
outputBuffer.AsSpan(),
|
||||
inputBuffer.AsSpan(),
|
||||
inputSampleRate / outputSampleRate,
|
||||
ref fraction,
|
||||
outputSampleCount,
|
||||
quality,
|
||||
false);
|
||||
|
||||
float[] expectedOutput = new float[outputSampleCount];
|
||||
float sumDifference = 0;
|
||||
int delay = quality switch
|
||||
{
|
||||
VoiceInParameter.SampleRateConversionQuality.High => 3,
|
||||
VoiceInParameter.SampleRateConversionQuality.Default => 1,
|
||||
_ => 0
|
||||
};
|
||||
|
||||
for (int sample = 0; sample < outputSampleCount; sample++)
|
||||
{
|
||||
outputBuffer[sample] /= 32767;
|
||||
// 440 hz sine wave with amplitude = 0.5f at output sample rate
|
||||
expectedOutput[sample] = MathF.Sin((440 / outputSampleRate) * (float)(sample + delay) * MathF.PI * 2f) * 0.5f;
|
||||
float thisDelta = Math.Abs(expectedOutput[sample] - outputBuffer[sample]);
|
||||
|
||||
// Ensure no discontinuities
|
||||
Assert.IsTrue(thisDelta < 0.1f);
|
||||
sumDifference += thisDelta;
|
||||
}
|
||||
|
||||
sumDifference = sumDifference / (float)outputSampleCount;
|
||||
// Expect the output to be 99% similar to the expected resampled sine wave
|
||||
Assert.IsTrue(sumDifference < 0.01f);
|
||||
}
|
||||
}
|
||||
}
|
64
Ryujinx.Tests/Audio/Renderer/Dsp/UpsamplerTests.cs
Normal file
64
Ryujinx.Tests/Audio/Renderer/Dsp/UpsamplerTests.cs
Normal file
@ -0,0 +1,64 @@
|
||||
using NUnit.Framework;
|
||||
using Ryujinx.Audio.Renderer.Dsp;
|
||||
using Ryujinx.Audio.Renderer.Parameter;
|
||||
using Ryujinx.Audio.Renderer.Server.Upsampler;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.IO;
|
||||
using System.Linq;
|
||||
using System.Runtime.CompilerServices;
|
||||
using System.Text;
|
||||
using System.Threading.Tasks;
|
||||
|
||||
namespace Ryujinx.Tests.Audio.Renderer.Dsp
|
||||
{
|
||||
class UpsamplerTests
|
||||
{
|
||||
[Test]
|
||||
public void TestUpsamplerConsistency()
|
||||
{
|
||||
UpsamplerBufferState bufferState = new UpsamplerBufferState();
|
||||
int inputBlockSize = 160;
|
||||
int numInputSamples = 32000;
|
||||
int numOutputSamples = 48000;
|
||||
float inputSampleRate = numInputSamples;
|
||||
float outputSampleRate = numOutputSamples;
|
||||
float[] inputBuffer = new float[numInputSamples + 100];
|
||||
float[] outputBuffer = new float[numOutputSamples + 100];
|
||||
for (int sample = 0; sample < inputBuffer.Length; sample++)
|
||||
{
|
||||
// 440 hz sine wave with amplitude = 0.5f at input sample rate
|
||||
inputBuffer[sample] = MathF.Sin((440 / inputSampleRate) * (float)sample * MathF.PI * 2f) * 0.5f;
|
||||
}
|
||||
|
||||
int inputIdx = 0;
|
||||
int outputIdx = 0;
|
||||
while (inputIdx + inputBlockSize < numInputSamples)
|
||||
{
|
||||
int outputBufLength = (int)Math.Round((float)(inputIdx + inputBlockSize) * outputSampleRate / inputSampleRate) - outputIdx;
|
||||
UpsamplerHelper.Upsample(
|
||||
outputBuffer.AsSpan(outputIdx),
|
||||
inputBuffer.AsSpan(inputIdx),
|
||||
outputBufLength,
|
||||
inputBlockSize,
|
||||
ref bufferState);
|
||||
|
||||
inputIdx += inputBlockSize;
|
||||
outputIdx += outputBufLength;
|
||||
}
|
||||
|
||||
float[] expectedOutput = new float[numOutputSamples];
|
||||
float sumDifference = 0;
|
||||
for (int sample = 0; sample < numOutputSamples; sample++)
|
||||
{
|
||||
// 440 hz sine wave with amplitude = 0.5f at output sample rate with an offset of 15
|
||||
expectedOutput[sample] = MathF.Sin((440 / outputSampleRate) * (float)(sample - 15) * MathF.PI * 2f) * 0.5f;
|
||||
sumDifference += Math.Abs(expectedOutput[sample] - outputBuffer[sample]);
|
||||
}
|
||||
|
||||
sumDifference = sumDifference / (float)expectedOutput.Length;
|
||||
// Expect the output to be 98% similar to the expected resampled sine wave
|
||||
Assert.IsTrue(sumDifference < 0.02f);
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user