From 894459fcd7797b1e38f2448797d83856d11b6e23 Mon Sep 17 00:00:00 2001 From: LDj3SNuD <35856442+LDj3SNuD@users.noreply.github.com> Date: Sun, 14 Oct 2018 04:35:16 +0200 Subject: [PATCH] Add Fmls_Se, Fmulx_Se/Ve, Smov_S Inst.; Opt. Clz/Clz_V, Cnt_V, Shl_V, S/Ushr_V, S/Usra_V Inst.; Add 11 Tests. Some fixes. (#449) * Update AOpCodeTable.cs * Update AInstEmitSimdMove.cs * Update AInstEmitSimdArithmetic.cs * Update AInstEmitSimdShift.cs * Update ASoftFallback.cs * Update ASoftFloat.cs * Update AOpCodeSimdRegElemF.cs * Update CpuTestSimdIns.cs * Update CpuTestSimdRegElem.cs * Create CpuTestSimdRegElemF.cs * Update CpuTestSimd.cs * Update CpuTestSimdReg.cs * Superseded Fmul_Se Test. Nit. * Address PR feedback. * Address PR feedback. * Update AInstEmitSimdArithmetic.cs * Update ASoftFallback.cs * Update AInstEmitAlu.cs * Update AInstEmitSimdShift.cs --- ChocolArm64/AOpCodeTable.cs | 14 +- ChocolArm64/Decoder/AOpCodeSimdRegElemF.cs | 27 +- ChocolArm64/Instruction/AInstEmitAlu.cs | 14 +- .../Instruction/AInstEmitSimdArithmetic.cs | 91 +++- ChocolArm64/Instruction/AInstEmitSimdMove.cs | 14 +- ChocolArm64/Instruction/AInstEmitSimdShift.cs | 131 +++++- ChocolArm64/Instruction/ASoftFallback.cs | 19 +- ChocolArm64/Instruction/ASoftFloat.cs | 16 +- Ryujinx.Tests/Cpu/CpuTestSimd.cs | 58 ++- Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs | 36 +- Ryujinx.Tests/Cpu/CpuTestSimdIns.cs | 109 ++++- Ryujinx.Tests/Cpu/CpuTestSimdReg.cs | 68 +-- Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs | 145 +++--- Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs | 424 ++++++++++++++++++ 14 files changed, 938 insertions(+), 228 deletions(-) create mode 100644 Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs diff --git a/ChocolArm64/AOpCodeTable.cs b/ChocolArm64/AOpCodeTable.cs index 30025712..44493298 100644 --- a/ChocolArm64/AOpCodeTable.cs +++ b/ChocolArm64/AOpCodeTable.cs @@ -284,11 +284,12 @@ namespace ChocolArm64 SetA64("000111100x1xxxxx011110xxxxxxxxxx", AInstEmit.Fminnm_S, typeof(AOpCodeSimdReg)); SetA64("0>0011101<1xxxxx110001xxxxxxxxxx", AInstEmit.Fminnm_V, typeof(AOpCodeSimdReg)); SetA64("0>1011101<1xxxxx111101xxxxxxxxxx", AInstEmit.Fminp_V, typeof(AOpCodeSimdReg)); - SetA64("010111111<0011100<1xxxxx110011xxxxxxxxxx", AInstEmit.Fmla_V, typeof(AOpCodeSimdReg)); - SetA64("0x0011111<00111110011101<1xxxxx110011xxxxxxxxxx", AInstEmit.Fmls_V, typeof(AOpCodeSimdReg)); - SetA64("0x0011111<00111111011100<1xxxxx110111xxxxxxxxxx", AInstEmit.Fmul_V, typeof(AOpCodeSimdReg)); - SetA64("0x0011111<00111110011100<1xxxxx110111xxxxxxxxxx", AInstEmit.Fmulx_V, typeof(AOpCodeSimdReg)); + SetA64("0>10111111011101<100000111110xxxxxxxxxx", AInstEmit.Fneg_V, typeof(AOpCodeSimd)); SetA64("000111110x1xxxxx0xxxxxxxxxxxxxxx", AInstEmit.Fnmadd_S, typeof(AOpCodeSimdReg)); @@ -401,6 +404,7 @@ namespace ChocolArm64 SetA64("0x001110<<1xxxxx101011xxxxxxxxxx", AInstEmit.Sminp_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx100000xxxxxxxxxx", AInstEmit.Smlal_V, typeof(AOpCodeSimdReg)); SetA64("0x001110<<1xxxxx101000xxxxxxxxxx", AInstEmit.Smlsl_V, typeof(AOpCodeSimdReg)); + SetA64("0x001110000xxxxx001011xxxxxxxxxx", AInstEmit.Smov_S, typeof(AOpCodeSimdIns)); SetA64("0x001110<<1xxxxx110000xxxxxxxxxx", AInstEmit.Smull_V, typeof(AOpCodeSimdReg)); SetA64("01011110xx100000011110xxxxxxxxxx", AInstEmit.Sqabs_S, typeof(AOpCodeSimd)); SetA64("0>001110<<100000011110xxxxxxxxxx", AInstEmit.Sqabs_V, typeof(AOpCodeSimd)); diff --git a/ChocolArm64/Decoder/AOpCodeSimdRegElemF.cs b/ChocolArm64/Decoder/AOpCodeSimdRegElemF.cs index e61d7093..e0670def 100644 --- a/ChocolArm64/Decoder/AOpCodeSimdRegElemF.cs +++ b/ChocolArm64/Decoder/AOpCodeSimdRegElemF.cs @@ -8,15 +8,26 @@ namespace ChocolArm64.Decoder public AOpCodeSimdRegElemF(AInst Inst, long Position, int OpCode) : base(Inst, Position, OpCode) { - if ((Size & 1) != 0) + switch ((OpCode >> 21) & 3) // sz:L { - Index = (OpCode >> 11) & 1; - } - else - { - Index = (OpCode >> 21) & 1 | - (OpCode >> 10) & 2; + case 0: // H:0 + Index = (OpCode >> 10) & 2; // 0, 2 + + break; + + case 1: // H:1 + Index = (OpCode >> 10) & 2; + Index++; // 1, 3 + + break; + + case 2: // H + Index = (OpCode >> 11) & 1; // 0, 1 + + break; + + default: Emitter = AInstEmit.Und; return; } } } -} \ No newline at end of file +} diff --git a/ChocolArm64/Instruction/AInstEmitAlu.cs b/ChocolArm64/Instruction/AInstEmitAlu.cs index 490387e1..4551346b 100644 --- a/ChocolArm64/Instruction/AInstEmitAlu.cs +++ b/ChocolArm64/Instruction/AInstEmitAlu.cs @@ -4,6 +4,7 @@ using ChocolArm64.Translation; using System; using System.Reflection; using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instruction.AInstEmitAluHelper; @@ -117,9 +118,18 @@ namespace ChocolArm64.Instruction Context.EmitLdintzr(Op.Rn); - Context.EmitLdc_I4(Op.RegisterSize == ARegisterSize.Int32 ? 32 : 64); + if (Lzcnt.IsSupported) + { + Type TValue = Op.RegisterSize == ARegisterSize.Int32 ? typeof(uint) : typeof(ulong); - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros)); + Context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { TValue })); + } + else + { + Context.EmitLdc_I4(Op.RegisterSize == ARegisterSize.Int32 ? 32 : 64); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros)); + } Context.EmitStintzr(Op.Rd); } diff --git a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs index d11a0b84..7ba08f5e 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdArithmetic.cs @@ -82,20 +82,6 @@ namespace ChocolArm64.Instruction } public static void Cls_V(AILEmitterCtx Context) - { - MethodInfo MthdInfo = typeof(ASoftFallback).GetMethod(nameof(ASoftFallback.CountLeadingSigns)); - - EmitCountLeadingBits(Context, () => Context.EmitCall(MthdInfo)); - } - - public static void Clz_V(AILEmitterCtx Context) - { - MethodInfo MthdInfo = typeof(ASoftFallback).GetMethod(nameof(ASoftFallback.CountLeadingZeros)); - - EmitCountLeadingBits(Context, () => Context.EmitCall(MthdInfo)); - } - - private static void EmitCountLeadingBits(AILEmitterCtx Context, Action Emit) { AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; @@ -110,7 +96,44 @@ namespace ChocolArm64.Instruction Context.EmitLdc_I4(ESize); - Emit(); + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingSigns)); + + EmitVectorInsert(Context, Op.Rd, Index, Op.Size); + } + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + + public static void Clz_V(AILEmitterCtx Context) + { + AOpCodeSimd Op = (AOpCodeSimd)Context.CurrOp; + + int Bytes = Op.GetBitsCount() >> 3; + int Elems = Bytes >> Op.Size; + + int ESize = 8 << Op.Size; + + for (int Index = 0; Index < Elems; Index++) + { + EmitVectorExtractZx(Context, Op.Rn, Index, Op.Size); + + if (Lzcnt.IsSupported && ESize == 32) + { + Context.Emit(OpCodes.Conv_U4); + + Context.EmitCall(typeof(Lzcnt).GetMethod(nameof(Lzcnt.LeadingZeroCount), new Type[] { typeof(uint) })); + + Context.Emit(OpCodes.Conv_U8); + } + else + { + Context.EmitLdc_I4(ESize); + + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountLeadingZeros)); + } EmitVectorInsert(Context, Op.Rd, Index, Op.Size); } @@ -131,11 +154,14 @@ namespace ChocolArm64.Instruction { EmitVectorExtractZx(Context, Op.Rn, Index, 0); - Context.Emit(OpCodes.Conv_U4); - - ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8)); - - Context.Emit(OpCodes.Conv_U8); + if (Popcnt.IsSupported) + { + Context.EmitCall(typeof(Popcnt).GetMethod(nameof(Popcnt.PopCount), new Type[] { typeof(ulong) })); + } + else + { + ASoftFallback.EmitCall(Context, nameof(ASoftFallback.CountSetBits8)); + } EmitVectorInsert(Context, Op.Rd, Index, 0); } @@ -440,6 +466,15 @@ namespace ChocolArm64.Instruction }); } + public static void Fmls_Se(AILEmitterCtx Context) + { + EmitScalarTernaryOpByElemF(Context, () => + { + Context.Emit(OpCodes.Mul); + Context.Emit(OpCodes.Sub); + }); + } + public static void Fmls_V(AILEmitterCtx Context) { EmitVectorTernaryOpF(Context, () => @@ -554,6 +589,14 @@ namespace ChocolArm64.Instruction }); } + public static void Fmulx_Se(AILEmitterCtx Context) + { + EmitScalarBinaryOpByElemF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX)); + }); + } + public static void Fmulx_V(AILEmitterCtx Context) { EmitVectorBinaryOpF(Context, () => @@ -562,6 +605,14 @@ namespace ChocolArm64.Instruction }); } + public static void Fmulx_Ve(AILEmitterCtx Context) + { + EmitVectorBinaryOpByElemF(Context, () => + { + EmitSoftFloatCall(Context, nameof(ASoftFloat_32.FPMulX)); + }); + } + public static void Fneg_S(AILEmitterCtx Context) { EmitScalarUnaryOpF(Context, () => Context.Emit(OpCodes.Neg)); diff --git a/ChocolArm64/Instruction/AInstEmitSimdMove.cs b/ChocolArm64/Instruction/AInstEmitSimdMove.cs index 94097f48..6001f48c 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdMove.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdMove.cs @@ -249,6 +249,17 @@ namespace ChocolArm64.Instruction EmitVectorImmUnaryOp(Context, () => Context.Emit(OpCodes.Not)); } + public static void Smov_S(AILEmitterCtx Context) + { + AOpCodeSimdIns Op = (AOpCodeSimdIns)Context.CurrOp; + + EmitVectorExtractSx(Context, Op.Rn, Op.DstIndex, Op.Size); + + EmitIntZeroUpperIfNeeded(Context); + + Context.EmitStintzr(Op.Rd); + } + public static void Tbl_V(AILEmitterCtx Context) { AOpCodeSimdTbl Op = (AOpCodeSimdTbl)Context.CurrOp; @@ -421,7 +432,8 @@ namespace ChocolArm64.Instruction private static void EmitIntZeroUpperIfNeeded(AILEmitterCtx Context) { - if (Context.CurrOp.RegisterSize == ARegisterSize.Int32) + if (Context.CurrOp.RegisterSize == ARegisterSize.Int32 || + Context.CurrOp.RegisterSize == ARegisterSize.SIMD64) { Context.Emit(OpCodes.Conv_U4); Context.Emit(OpCodes.Conv_U8); diff --git a/ChocolArm64/Instruction/AInstEmitSimdShift.cs b/ChocolArm64/Instruction/AInstEmitSimdShift.cs index 127abf1d..8918c0e1 100644 --- a/ChocolArm64/Instruction/AInstEmitSimdShift.cs +++ b/ChocolArm64/Instruction/AInstEmitSimdShift.cs @@ -3,6 +3,7 @@ using ChocolArm64.State; using ChocolArm64.Translation; using System; using System.Reflection.Emit; +using System.Runtime.Intrinsics.X86; using static ChocolArm64.Instruction.AInstEmitSimdHelper; @@ -31,12 +32,32 @@ namespace ChocolArm64.Instruction { AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; - EmitVectorUnaryOpZx(Context, () => + if (AOptimizations.UseSse2 && Op.Size > 0) { + Type[] Types = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + + EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); + Context.EmitLdc_I4(GetImmShl(Op)); - Context.Emit(OpCodes.Shl); - }); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftLeftLogical), Types)); + + EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitVectorUnaryOpZx(Context, () => + { + Context.EmitLdc_I4(GetImmShl(Op)); + + Context.Emit(OpCodes.Shl); + }); + } } public static void Shll_V(AILEmitterCtx Context) @@ -167,7 +188,30 @@ namespace ChocolArm64.Instruction public static void Sshr_V(AILEmitterCtx Context) { - EmitShrImmOp(Context, ShrImmFlags.VectorSx); + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + if (AOptimizations.UseSse2 && Op.Size > 0 + && Op.Size < 3) + { + Type[] Types = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + + EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); + + Context.EmitLdc_I4(GetImmShr(Op)); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), Types)); + + EmitStvecWithSignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitShrImmOp(Context, ShrImmFlags.VectorSx); + } } public static void Ssra_S(AILEmitterCtx Context) @@ -177,7 +221,33 @@ namespace ChocolArm64.Instruction public static void Ssra_V(AILEmitterCtx Context) { - EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate); + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + if (AOptimizations.UseSse2 && Op.Size > 0 + && Op.Size < 3) + { + Type[] TypesSra = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + Type[] TypesAdd = new Type[] { VectorIntTypesPerSizeLog2[Op.Size], VectorIntTypesPerSizeLog2[Op.Size] }; + + EmitLdvecWithSignedCast(Context, Op.Rd, Op.Size); + EmitLdvecWithSignedCast(Context, Op.Rn, Op.Size); + + Context.EmitLdc_I4(GetImmShr(Op)); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightArithmetic), TypesSra)); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); + + EmitStvecWithSignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitVectorShrImmOpSx(Context, ShrImmFlags.Accumulate); + } } public static void Uqrshrn_S(AILEmitterCtx Context) @@ -239,7 +309,29 @@ namespace ChocolArm64.Instruction public static void Ushr_V(AILEmitterCtx Context) { - EmitShrImmOp(Context, ShrImmFlags.VectorZx); + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + if (AOptimizations.UseSse2 && Op.Size > 0) + { + Type[] Types = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + + EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); + + Context.EmitLdc_I4(GetImmShr(Op)); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), Types)); + + EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitShrImmOp(Context, ShrImmFlags.VectorZx); + } } public static void Usra_S(AILEmitterCtx Context) @@ -249,7 +341,32 @@ namespace ChocolArm64.Instruction public static void Usra_V(AILEmitterCtx Context) { - EmitVectorShrImmOpZx(Context, ShrImmFlags.Accumulate); + AOpCodeSimdShImm Op = (AOpCodeSimdShImm)Context.CurrOp; + + if (AOptimizations.UseSse2 && Op.Size > 0) + { + Type[] TypesSrl = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], typeof(byte) }; + Type[] TypesAdd = new Type[] { VectorUIntTypesPerSizeLog2[Op.Size], VectorUIntTypesPerSizeLog2[Op.Size] }; + + EmitLdvecWithUnsignedCast(Context, Op.Rd, Op.Size); + EmitLdvecWithUnsignedCast(Context, Op.Rn, Op.Size); + + Context.EmitLdc_I4(GetImmShr(Op)); + + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.ShiftRightLogical), TypesSrl)); + Context.EmitCall(typeof(Sse2).GetMethod(nameof(Sse2.Add), TypesAdd)); + + EmitStvecWithUnsignedCast(Context, Op.Rd, Op.Size); + + if (Op.RegisterSize == ARegisterSize.SIMD64) + { + EmitVectorZeroUpper(Context, Op.Rd); + } + } + else + { + EmitVectorShrImmOpZx(Context, ShrImmFlags.Accumulate); + } } private static void EmitVectorShl(AILEmitterCtx Context, bool Signed) diff --git a/ChocolArm64/Instruction/ASoftFallback.cs b/ChocolArm64/Instruction/ASoftFallback.cs index a7bc1085..3c5c5c4d 100644 --- a/ChocolArm64/Instruction/ASoftFallback.cs +++ b/ChocolArm64/Instruction/ASoftFallback.cs @@ -386,7 +386,7 @@ namespace ChocolArm64.Instruction #endregion #region "Count" - public static ulong CountLeadingSigns(ulong Value, int Size) + public static ulong CountLeadingSigns(ulong Value, int Size) // Size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). { Value ^= Value >> 1; @@ -405,9 +405,9 @@ namespace ChocolArm64.Instruction private static readonly byte[] ClzNibbleTbl = { 4, 3, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0 }; - public static ulong CountLeadingZeros(ulong Value, int Size) + public static ulong CountLeadingZeros(ulong Value, int Size) // Size is 8, 16, 32 or 64 (SIMD&FP or Base Inst.). { - if (Value == 0) + if (Value == 0ul) { return (ulong)Size; } @@ -426,12 +426,17 @@ namespace ChocolArm64.Instruction return (ulong)Count; } - public static uint CountSetBits8(uint Value) + public static ulong CountSetBits8(ulong Value) // "Size" is 8 (SIMD&FP Inst.). { - Value = ((Value >> 1) & 0x55) + (Value & 0x55); - Value = ((Value >> 2) & 0x33) + (Value & 0x33); + if (Value == 0xfful) + { + return 8ul; + } - return (Value >> 4) + (Value & 0x0f); + Value = ((Value >> 1) & 0x55ul) + (Value & 0x55ul); + Value = ((Value >> 2) & 0x33ul) + (Value & 0x33ul); + + return (Value >> 4) + (Value & 0x0ful); } #endregion diff --git a/ChocolArm64/Instruction/ASoftFloat.cs b/ChocolArm64/Instruction/ASoftFloat.cs index 7412c976..2d9a9f0e 100644 --- a/ChocolArm64/Instruction/ASoftFloat.cs +++ b/ChocolArm64/Instruction/ASoftFloat.cs @@ -365,8 +365,8 @@ namespace ChocolArm64.Instruction { Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMaxNum: "); - Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); - Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + Value1.FPUnpack(out FPType Type1, out _, out _); + Value2.FPUnpack(out FPType Type2, out _, out _); if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) { @@ -430,8 +430,8 @@ namespace ChocolArm64.Instruction { Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_32.FPMinNum: "); - Value1.FPUnpack(out FPType Type1, out bool Sign1, out uint Op1); - Value2.FPUnpack(out FPType Type2, out bool Sign2, out uint Op2); + Value1.FPUnpack(out FPType Type1, out _, out _); + Value2.FPUnpack(out FPType Type2, out _, out _); if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) { @@ -1091,8 +1091,8 @@ namespace ChocolArm64.Instruction { Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMaxNum: "); - Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); - Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + Value1.FPUnpack(out FPType Type1, out _, out _); + Value2.FPUnpack(out FPType Type2, out _, out _); if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) { @@ -1156,8 +1156,8 @@ namespace ChocolArm64.Instruction { Debug.WriteIf(State.Fpcr != 0, "ASoftFloat_64.FPMinNum: "); - Value1.FPUnpack(out FPType Type1, out bool Sign1, out ulong Op1); - Value2.FPUnpack(out FPType Type2, out bool Sign2, out ulong Op2); + Value1.FPUnpack(out FPType Type1, out _, out _); + Value2.FPUnpack(out FPType Type2, out _, out _); if (Type1 == FPType.QNaN && Type2 != FPType.QNaN) { diff --git a/Ryujinx.Tests/Cpu/CpuTestSimd.cs b/Ryujinx.Tests/Cpu/CpuTestSimd.cs index 2075ccf2..279f9f0c 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimd.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimd.cs @@ -205,6 +205,22 @@ namespace Ryujinx.Tests.Cpu #endregion #region "ValueSource (Opcodes)" + private static uint[] _F_Cvt_S_SD_() + { + return new uint[] + { + 0x1E22C020u // FCVT D0, S1 + }; + } + + private static uint[] _F_Cvt_S_DS_() + { + return new uint[] + { + 0x1E624020u // FCVT S0, D1 + }; + } + private static uint[] _F_Cvt_NZ_SU_S_S_() { return new uint[] @@ -249,7 +265,7 @@ namespace Ryujinx.Tests.Cpu }; } - private static uint[] _F_RecpX_Sqrt_S_S_() + private static uint[] _F_Recpx_Sqrt_S_S_() { return new uint[] { @@ -258,7 +274,7 @@ namespace Ryujinx.Tests.Cpu }; } - private static uint[] _F_RecpX_Sqrt_S_D_() + private static uint[] _F_Recpx_Sqrt_S_D_() { return new uint[] { @@ -785,35 +801,33 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Pairwise, Description("FCVT
, ")] - public void Fcvt_S_SD([ValueSource("_1S_F_")] ulong A) + [Test, Pairwise] [Explicit] + public void F_Cvt_S_SD([ValueSource("_F_Cvt_S_SD_")] uint Opcodes, + [ValueSource("_1S_F_")] ulong A) { - uint Opcode = 0x1E22C020; // FCVT D0, S1 - ulong Z = TestContext.CurrentContext.Random.NextULong(); Vector128 V0 = MakeVectorE1(Z); Vector128 V1 = MakeVectorE0(A); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); CompareAgainstUnicorn(); } - [Test, Pairwise, Description("FCVT , ")] - public void Fcvt_S_DS([ValueSource("_1D_F_")] ulong A) + [Test, Pairwise] [Explicit] + public void F_Cvt_S_DS([ValueSource("_F_Cvt_S_DS_")] uint Opcodes, + [ValueSource("_1D_F_")] ulong A) { - uint Opcode = 0x1E624020; // FCVT S0, D1 - ulong Z = TestContext.CurrentContext.Random.NextULong(); Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0(A); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1); CompareAgainstUnicorn(); } - [Test, Pairwise] + [Test, Pairwise] [Explicit] public void F_Cvt_NZ_SU_S_S([ValueSource("_F_Cvt_NZ_SU_S_S_")] uint Opcodes, [ValueSource("_1S_F_")] ulong A) { @@ -826,7 +840,7 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Pairwise] + [Test, Pairwise] [Explicit] public void F_Cvt_NZ_SU_S_D([ValueSource("_F_Cvt_NZ_SU_S_D_")] uint Opcodes, [ValueSource("_1D_F_")] ulong A) { @@ -839,7 +853,7 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Pairwise] + [Test, Pairwise] [Explicit] public void F_Cvt_NZ_SU_V_2S_4S([ValueSource("_F_Cvt_NZ_SU_V_2S_4S_")] uint Opcodes, [Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, @@ -858,7 +872,7 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Pairwise] + [Test, Pairwise] [Explicit] public void F_Cvt_NZ_SU_V_2D([ValueSource("_F_Cvt_NZ_SU_V_2D_")] uint Opcodes, [Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, @@ -875,8 +889,8 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Pairwise] - public void F_RecpX_Sqrt_S_S([ValueSource("_F_RecpX_Sqrt_S_S_")] uint Opcodes, + [Test, Pairwise] [Explicit] + public void F_Recpx_Sqrt_S_S([ValueSource("_F_Recpx_Sqrt_S_S_")] uint Opcodes, [ValueSource("_1S_F_")] ulong A) { ulong Z = TestContext.CurrentContext.Random.NextULong(); @@ -890,8 +904,8 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FpsrMask: FPSR.IOC); } - [Test, Pairwise] - public void F_RecpX_Sqrt_S_D([ValueSource("_F_RecpX_Sqrt_S_D_")] uint Opcodes, + [Test, Pairwise] [Explicit] + public void F_Recpx_Sqrt_S_D([ValueSource("_F_Recpx_Sqrt_S_D_")] uint Opcodes, [ValueSource("_1D_F_")] ulong A) { ulong Z = TestContext.CurrentContext.Random.NextULong(); @@ -905,7 +919,7 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FpsrMask: FPSR.IOC); } - [Test, Pairwise] + [Test, Pairwise] [Explicit] public void F_Sqrt_V_2S_4S([ValueSource("_F_Sqrt_V_2S_4S_")] uint Opcodes, [Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, @@ -926,7 +940,7 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FpsrMask: FPSR.IOC); } - [Test, Pairwise] + [Test, Pairwise] [Explicit] public void F_Sqrt_V_2D([ValueSource("_F_Sqrt_V_2D_")] uint Opcodes, [Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs index a5ae1a5f..5afeab31 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdArithmetic.cs @@ -9,18 +9,6 @@ namespace Ryujinx.Tests.Cpu { public class CpuTestSimdArithmetic : CpuTest { - [Test, Description("FMUL S6, S1, V0.S[2]")] - public void Fmul_Se([Random(10)] float A, [Random(10)] float B) - { - AThreadState ThreadState = SingleOpcode(0x5F809826, - V1: Sse.SetVector128(0, 0, 0, A), - V0: Sse.SetVector128(0, B, 0, 0)); - - Assert.That(Sse41.Extract(ThreadState.V6, (byte)0), Is.EqualTo(A * B)); - - CompareAgainstUnicorn(); - } - [TestCase(0x00000000u, 0x7F800000u)] [TestCase(0x80000000u, 0xFF800000u)] [TestCase(0x00FFF000u, 0x7E000000u)] @@ -86,7 +74,7 @@ namespace Ryujinx.Tests.Cpu Vector128 V1 = MakeVectorE0(A); int FpcrTemp = 0x0; - if(DefaultNaN) + if (DefaultNaN) { FpcrTemp = 0x2000000; } @@ -115,7 +103,7 @@ namespace Ryujinx.Tests.Cpu Vector128 V1 = MakeVectorE0E1(A, B); int FpcrTemp = 0x0; - if(DefaultNaN) + if (DefaultNaN) { FpcrTemp = 0x2000000; } @@ -185,7 +173,7 @@ namespace Ryujinx.Tests.Cpu case 'M': FpcrTemp = 0x800000; break; case 'Z': FpcrTemp = 0xC00000; break; } - if(DefaultNaN) + if (DefaultNaN) { FpcrTemp |= 1 << 25; } @@ -241,7 +229,7 @@ namespace Ryujinx.Tests.Cpu case 'M': FpcrTemp = 0x800000; break; case 'Z': FpcrTemp = 0xC00000; break; } - if(DefaultNaN) + if (DefaultNaN) { FpcrTemp |= 1 << 25; } @@ -302,7 +290,7 @@ namespace Ryujinx.Tests.Cpu Vector128 V1 = MakeVectorE0(A); int FpcrTemp = 0x0; - if(DefaultNaN) + if (DefaultNaN) { FpcrTemp = 0x2000000; } @@ -327,7 +315,7 @@ namespace Ryujinx.Tests.Cpu Vector128 V1 = MakeVectorE0E1(A, B); int FpcrTemp = 0x0; - if(DefaultNaN) + if (DefaultNaN) { FpcrTemp = 0x2000000; } @@ -389,7 +377,7 @@ namespace Ryujinx.Tests.Cpu Vector128 V1 = MakeVectorE0(A); int FpcrTemp = 0x0; - if(DefaultNaN) + if (DefaultNaN) { FpcrTemp = 0x2000000; } @@ -417,7 +405,7 @@ namespace Ryujinx.Tests.Cpu Vector128 V1 = MakeVectorE0E1(A, B); int FpcrTemp = 0x0; - if(DefaultNaN) + if (DefaultNaN) { FpcrTemp = 0x2000000; } @@ -478,7 +466,7 @@ namespace Ryujinx.Tests.Cpu Vector128 V1 = MakeVectorE0(A); int FpcrTemp = 0x0; - if(DefaultNaN) + if (DefaultNaN) { FpcrTemp = 0x2000000; } @@ -503,7 +491,7 @@ namespace Ryujinx.Tests.Cpu Vector128 V1 = MakeVectorE0E1(A, B); int FpcrTemp = 0x0; - if(DefaultNaN) + if (DefaultNaN) { FpcrTemp = 0x2000000; } @@ -573,7 +561,7 @@ namespace Ryujinx.Tests.Cpu case 'M': FpcrTemp = 0x800000; break; case 'Z': FpcrTemp = 0xC00000; break; } - if(DefaultNaN) + if (DefaultNaN) { FpcrTemp |= 1 << 25; } @@ -629,7 +617,7 @@ namespace Ryujinx.Tests.Cpu case 'M': FpcrTemp = 0x800000; break; case 'Z': FpcrTemp = 0xC00000; break; } - if(DefaultNaN) + if (DefaultNaN) { FpcrTemp |= 1 << 25; } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs index 387cdf5d..0b227edb 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdIns.cs @@ -14,6 +14,27 @@ namespace Ryujinx.Tests.Cpu #if SimdIns #region "ValueSource" + private static ulong[] _1D_() + { + return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFFFFFFFFFFul, + 0x8000000000000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _8B4H_() + { + return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, + 0x8080808080808080ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0xFFFFFFFFFFFFFFFFul }; + } + + private static ulong[] _8B4H2S_() + { + return new ulong[] { 0x0000000000000000ul, 0x7F7F7F7F7F7F7F7Ful, + 0x8080808080808080ul, 0x7FFF7FFF7FFF7FFFul, + 0x8000800080008000ul, 0x7FFFFFFF7FFFFFFFul, + 0x8000000080000000ul, 0xFFFFFFFFFFFFFFFFul }; + } + private static uint[] _W_() { return new uint[] { 0x00000000u, 0x0000007Fu, @@ -39,7 +60,7 @@ namespace Ryujinx.Tests.Cpu [Values(0, 1, 2)] int Size, // Q0: <8B, 4H, 2S> [Values(0b0u, 0b1u)] uint Q) // Q1: <16B, 8H, 4S> { - uint Imm5 = (1U << Size) & 0x1F; + uint Imm5 = (1u << Size) & 0x1Fu; uint Opcode = 0x0E000C00; // RESERVED Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); @@ -69,6 +90,92 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } + + [Test, Pairwise, Description("SMOV , .[]")] + public void Smov_S_W([Values(0u, 31u)] uint Rd, + [Values(1u)] uint Rn, + [ValueSource("_8B4H_")] [Random(RndCnt)] ulong A, + [Values(0, 1)] int Size, // + [Values(0u, 1u, 2u, 3u)] uint Index) + { + uint Imm5 = (Index << (Size + 1) | 1u << Size) & 0x1Fu; + + uint Opcode = 0x0E002C00; // RESERVED + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= (Imm5 << 16); + + ulong _X0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32; + uint _W31 = TestContext.CurrentContext.Random.NextUInt(); + Vector128 V1 = MakeVectorE0(A); + + AThreadState ThreadState = SingleOpcode(Opcode, X0: _X0, X31: _W31, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("SMOV , .[]")] + public void Smov_S_X([Values(0u, 31u)] uint Rd, + [Values(1u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [Values(0, 1, 2)] int Size, // + [Values(0u, 1u)] uint Index) + { + uint Imm5 = (Index << (Size + 1) | 1u << Size) & 0x1Fu; + + uint Opcode = 0x4E002C00; // RESERVED + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= (Imm5 << 16); + + ulong _X31 = TestContext.CurrentContext.Random.NextULong(); + Vector128 V1 = MakeVectorE0(A); + + AThreadState ThreadState = SingleOpcode(Opcode, X31: _X31, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("UMOV , .[]")] + public void Umov_S_W([Values(0u, 31u)] uint Rd, + [Values(1u)] uint Rn, + [ValueSource("_8B4H2S_")] [Random(RndCnt)] ulong A, + [Values(0, 1, 2)] int Size, // + [Values(0u, 1u)] uint Index) + { + uint Imm5 = (Index << (Size + 1) | 1u << Size) & 0x1Fu; + + uint Opcode = 0x0E003C00; // RESERVED + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= (Imm5 << 16); + + ulong _X0 = (ulong)TestContext.CurrentContext.Random.NextUInt() << 32; + uint _W31 = TestContext.CurrentContext.Random.NextUInt(); + Vector128 V1 = MakeVectorE0(A); + + AThreadState ThreadState = SingleOpcode(Opcode, X0: _X0, X31: _W31, V1: V1); + + CompareAgainstUnicorn(); + } + + [Test, Pairwise, Description("UMOV , .[]")] + public void Umov_S_X([Values(0u, 31u)] uint Rd, + [Values(1u)] uint Rn, + [ValueSource("_1D_")] [Random(RndCnt)] ulong A, + [Values(3)] int Size, // + [Values(0u)] uint Index) + { + uint Imm5 = (Index << (Size + 1) | 1u << Size) & 0x1Fu; + + uint Opcode = 0x4E003C00; // RESERVED + Opcode |= ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcode |= (Imm5 << 16); + + ulong _X31 = TestContext.CurrentContext.Random.NextULong(); + Vector128 V1 = MakeVectorE0(A); + + AThreadState ThreadState = SingleOpcode(Opcode, X31: _X31, V1: V1); + + CompareAgainstUnicorn(); + } #endif } } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs index 7d47416f..1ea017c8 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdReg.cs @@ -204,7 +204,7 @@ namespace Ryujinx.Tests.Cpu #endregion #region "ValueSource (Opcodes)" - private static uint[] _F_Add_Div_Mul_MulX_Sub_S_S_() + private static uint[] _F_Add_Div_Mul_Mulx_Sub_S_S_() { return new uint[] { @@ -216,7 +216,7 @@ namespace Ryujinx.Tests.Cpu }; } - private static uint[] _F_Add_Div_Mul_MulX_Sub_S_D_() + private static uint[] _F_Add_Div_Mul_Mulx_Sub_S_D_() { return new uint[] { @@ -228,7 +228,7 @@ namespace Ryujinx.Tests.Cpu }; } - private static uint[] _F_Add_Div_Mul_MulX_Sub_V_2S_4S_() + private static uint[] _F_Add_Div_Mul_Mulx_Sub_V_2S_4S_() { return new uint[] { @@ -240,7 +240,7 @@ namespace Ryujinx.Tests.Cpu }; } - private static uint[] _F_Add_Div_Mul_MulX_Sub_V_2D_() + private static uint[] _F_Add_Div_Mul_Mulx_Sub_V_2D_() { return new uint[] { @@ -252,7 +252,7 @@ namespace Ryujinx.Tests.Cpu }; } - private static uint[] _Fmadd_Fmsub_S_S_() + private static uint[] _F_Madd_Msub_S_S_() { return new uint[] { @@ -261,7 +261,7 @@ namespace Ryujinx.Tests.Cpu }; } - private static uint[] _Fmadd_Fmsub_S_D_() + private static uint[] _F_Madd_Msub_S_D_() { return new uint[] { @@ -318,7 +318,7 @@ namespace Ryujinx.Tests.Cpu }; } - private static uint[] _Frecps_Frsqrts_S_S_() + private static uint[] _F_Recps_Rsqrts_S_S_() { return new uint[] { @@ -327,7 +327,7 @@ namespace Ryujinx.Tests.Cpu }; } - private static uint[] _Frecps_Frsqrts_S_D_() + private static uint[] _F_Recps_Rsqrts_S_D_() { return new uint[] { @@ -336,7 +336,7 @@ namespace Ryujinx.Tests.Cpu }; } - private static uint[] _Frecps_Frsqrts_V_2S_4S_() + private static uint[] _F_Recps_Rsqrts_V_2S_4S_() { return new uint[] { @@ -345,7 +345,7 @@ namespace Ryujinx.Tests.Cpu }; } - private static uint[] _Frecps_Frsqrts_V_2D_() + private static uint[] _F_Recps_Rsqrts_V_2D_() { return new uint[] { @@ -1137,8 +1137,8 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(); } - [Test, Pairwise] - public void F_Add_Div_Mul_MulX_Sub_S_S([ValueSource("_F_Add_Div_Mul_MulX_Sub_S_S_")] uint Opcodes, + [Test, Pairwise] [Explicit] + public void F_Add_Div_Mul_Mulx_Sub_S_S([ValueSource("_F_Add_Div_Mul_Mulx_Sub_S_S_")] uint Opcodes, [ValueSource("_1S_F_")] ulong A, [ValueSource("_1S_F_")] ulong B) { @@ -1154,8 +1154,8 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FpsrMask: FPSR.IOC | FPSR.DZC); } - [Test, Pairwise] - public void F_Add_Div_Mul_MulX_Sub_S_D([ValueSource("_F_Add_Div_Mul_MulX_Sub_S_D_")] uint Opcodes, + [Test, Pairwise] [Explicit] + public void F_Add_Div_Mul_Mulx_Sub_S_D([ValueSource("_F_Add_Div_Mul_Mulx_Sub_S_D_")] uint Opcodes, [ValueSource("_1D_F_")] ulong A, [ValueSource("_1D_F_")] ulong B) { @@ -1171,8 +1171,8 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FpsrMask: FPSR.IOC | FPSR.DZC); } - [Test, Pairwise] - public void F_Add_Div_Mul_MulX_Sub_V_2S_4S([ValueSource("_F_Add_Div_Mul_MulX_Sub_V_2S_4S_")] uint Opcodes, + [Test, Pairwise] [Explicit] + public void F_Add_Div_Mul_Mulx_Sub_V_2S_4S([ValueSource("_F_Add_Div_Mul_Mulx_Sub_V_2S_4S_")] uint Opcodes, [Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, @@ -1195,8 +1195,8 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FpsrMask: FPSR.IOC | FPSR.DZC); } - [Test, Pairwise] - public void F_Add_Div_Mul_MulX_Sub_V_2D([ValueSource("_F_Add_Div_Mul_MulX_Sub_V_2D_")] uint Opcodes, + [Test, Pairwise] [Explicit] + public void F_Add_Div_Mul_Mulx_Sub_V_2D([ValueSource("_F_Add_Div_Mul_Mulx_Sub_V_2D_")] uint Opcodes, [Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, @@ -1217,8 +1217,8 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FpsrMask: FPSR.IOC | FPSR.DZC); } - [Test, Pairwise] // Fused. - public void Fmadd_Fmsub_S_S([ValueSource("_Fmadd_Fmsub_S_S_")] uint Opcodes, + [Test, Pairwise] [Explicit] // Fused. + public void F_Madd_Msub_S_S([ValueSource("_F_Madd_Msub_S_S_")] uint Opcodes, [ValueSource("_1S_F_")] ulong A, [ValueSource("_1S_F_")] ulong B, [ValueSource("_1S_F_")] ulong C) @@ -1236,8 +1236,8 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_S); } - [Test, Pairwise] // Fused. - public void Fmadd_Fmsub_S_D([ValueSource("_Fmadd_Fmsub_S_D_")] uint Opcodes, + [Test, Pairwise] [Explicit] // Fused. + public void F_Madd_Msub_S_D([ValueSource("_F_Madd_Msub_S_D_")] uint Opcodes, [ValueSource("_1D_F_")] ulong A, [ValueSource("_1D_F_")] ulong B, [ValueSource("_1D_F_")] ulong C) @@ -1255,7 +1255,7 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_D); } - [Test, Pairwise] + [Test, Pairwise] [Explicit] public void F_Max_Min_Nm_S_S([ValueSource("_F_Max_Min_Nm_S_S_")] uint Opcodes, [ValueSource("_1S_F_")] ulong A, [ValueSource("_1S_F_")] ulong B) @@ -1272,7 +1272,7 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FpsrMask: FPSR.IOC); } - [Test, Pairwise] + [Test, Pairwise] [Explicit] public void F_Max_Min_Nm_S_D([ValueSource("_F_Max_Min_Nm_S_D_")] uint Opcodes, [ValueSource("_1D_F_")] ulong A, [ValueSource("_1D_F_")] ulong B) @@ -1289,7 +1289,7 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FpsrMask: FPSR.IOC); } - [Test, Pairwise] + [Test, Pairwise] [Explicit] public void F_Max_Min_Nm_P_V_2S_4S([ValueSource("_F_Max_Min_Nm_P_V_2S_4S_")] uint Opcodes, [Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, @@ -1313,7 +1313,7 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FpsrMask: FPSR.IOC); } - [Test, Pairwise] + [Test, Pairwise] [Explicit] public void F_Max_Min_Nm_P_V_2D([ValueSource("_F_Max_Min_Nm_P_V_2D_")] uint Opcodes, [Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, @@ -1335,8 +1335,8 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FpsrMask: FPSR.IOC); } - [Test, Pairwise] // Fused. - public void Frecps_Frsqrts_S_S([ValueSource("_Frecps_Frsqrts_S_S_")] uint Opcodes, + [Test, Pairwise] [Explicit] // Fused. + public void F_Recps_Rsqrts_S_S([ValueSource("_F_Recps_Rsqrts_S_S_")] uint Opcodes, [ValueSource("_1S_F_")] ulong A, [ValueSource("_1S_F_")] ulong B) { @@ -1352,8 +1352,8 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_S); } - [Test, Pairwise] // Fused. - public void Frecps_Frsqrts_S_D([ValueSource("_Frecps_Frsqrts_S_D_")] uint Opcodes, + [Test, Pairwise] [Explicit] // Fused. + public void F_Recps_Rsqrts_S_D([ValueSource("_F_Recps_Rsqrts_S_D_")] uint Opcodes, [ValueSource("_1D_F_")] ulong A, [ValueSource("_1D_F_")] ulong B) { @@ -1369,8 +1369,8 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_D); } - [Test, Pairwise] // Fused. - public void Frecps_Frsqrts_V_2S_4S([ValueSource("_Frecps_Frsqrts_V_2S_4S_")] uint Opcodes, + [Test, Pairwise] [Explicit] // Fused. + public void F_Recps_Rsqrts_V_2S_4S([ValueSource("_F_Recps_Rsqrts_V_2S_4S_")] uint Opcodes, [Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, @@ -1393,8 +1393,8 @@ namespace Ryujinx.Tests.Cpu CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_S); } - [Test, Pairwise] // Fused. - public void Frecps_Frsqrts_V_2D([ValueSource("_Frecps_Frsqrts_V_2D_")] uint Opcodes, + [Test, Pairwise] [Explicit] // Fused. + public void F_Recps_Rsqrts_V_2D([ValueSource("_F_Recps_Rsqrts_V_2D_")] uint Opcodes, [Values(0u)] uint Rd, [Values(1u, 0u)] uint Rn, [Values(2u, 0u)] uint Rm, diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs index 4d14ab48..61552062 100644 --- a/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs +++ b/Ryujinx.Tests/Cpu/CpuTestSimdRegElem.cs @@ -13,7 +13,7 @@ namespace Ryujinx.Tests.Cpu { #if SimdRegElem -#region "ValueSource" +#region "ValueSource (Types)" private static ulong[] _2S_() { return new ulong[] { 0x0000000000000000ul, 0x7FFFFFFF7FFFFFFFul, @@ -27,114 +27,81 @@ namespace Ryujinx.Tests.Cpu } #endregion +#region "ValueSource (Opcodes)" + private static uint[] _Mla_Mls_Mul_Ve_4H_8H_() + { + return new uint[] + { + 0x2F400000u, // MLA V0.4H, V0.4H, V0.H[0] + 0x2F404000u, // MLS V0.4H, V0.4H, V0.H[0] + 0x0F408000u // MUL V0.4H, V0.4H, V0.H[0] + }; + } + + private static uint[] _Mla_Mls_Mul_Ve_2S_4S_() + { + return new uint[] + { + 0x2F800000u, // MLA V0.2S, V0.2S, V0.S[0] + 0x2F804000u, // MLS V0.2S, V0.2S, V0.S[0] + 0x0F808000u // MUL V0.2S, V0.2S, V0.S[0] + }; + } +#endregion + private const int RndCnt = 2; - [Test, Pairwise, Description("MLA ., ., .[]")] - public void Mla_Ve_4H_8H([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [Values(2u, 0u)] uint Rm, - [ValueSource("_4H_")] [Random(RndCnt)] ulong Z, - [ValueSource("_4H_")] [Random(RndCnt)] ulong A, - [ValueSource("_4H_")] [Random(RndCnt)] ulong B, - [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint Index, - [Values(0b0u, 0b1u)] uint Q) // <4H, 8H> + [Test, Pairwise] + public void Mla_Mls_Mul_Ve_4H_8H([ValueSource("_Mla_Mls_Mul_Ve_4H_8H_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_4H_")] [Random(RndCnt)] ulong Z, + [ValueSource("_4H_")] [Random(RndCnt)] ulong A, + [ValueSource("_4H_")] [Random(RndCnt)] ulong B, + [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint Index, + [Values(0b0u, 0b1u)] uint Q) // <4H, 8H> { - uint H = (Index & 4) >> 2; - uint L = (Index & 2) >> 1; - uint M = (Index & 1) >> 0; + uint H = (Index >> 2) & 1; + uint L = (Index >> 1) & 1; + uint M = Index & 1; - uint Opcode = 0x2F400000; // MLA V0.4H, V0.4H, V0.H[0] - Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); - Opcode |= (L << 21) | (M << 20) | (H << 11); - Opcode |= ((Q & 1) << 30); + Opcodes |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (L << 21) | (M << 20) | (H << 11); + Opcodes |= ((Q & 1) << 30); Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0E1(A, A * Q); Vector128 V2 = MakeVectorE0E1(B, B * H); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2); CompareAgainstUnicorn(); } - [Test, Pairwise, Description("MLA ., ., .[]")] - public void Mla_Ve_2S_4S([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [Values(2u, 0u)] uint Rm, - [ValueSource("_2S_")] [Random(RndCnt)] ulong Z, - [ValueSource("_2S_")] [Random(RndCnt)] ulong A, - [ValueSource("_2S_")] [Random(RndCnt)] ulong B, - [Values(0u, 1u, 2u, 3u)] uint Index, - [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> + [Test, Pairwise] + public void Mla_Mls_Mul_Ve_2S_4S([ValueSource("_Mla_Mls_Mul_Ve_2S_4S_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_2S_")] [Random(RndCnt)] ulong Z, + [ValueSource("_2S_")] [Random(RndCnt)] ulong A, + [ValueSource("_2S_")] [Random(RndCnt)] ulong B, + [Values(0u, 1u, 2u, 3u)] uint Index, + [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> { - uint H = (Index & 2) >> 1; - uint L = (Index & 1) >> 0; + uint H = (Index >> 1) & 1; + uint L = Index & 1; - uint Opcode = 0x2F800000; // MLA V0.2S, V0.2S, V0.S[0] - Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); - Opcode |= (L << 21) | (H << 11); - Opcode |= ((Q & 1) << 30); + Opcodes |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (L << 21) | (H << 11); + Opcodes |= ((Q & 1) << 30); Vector128 V0 = MakeVectorE0E1(Z, Z); Vector128 V1 = MakeVectorE0E1(A, A * Q); Vector128 V2 = MakeVectorE0E1(B, B * H); - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - CompareAgainstUnicorn(); - } - - [Test, Pairwise, Description("MLS ., ., .[]")] - public void Mls_Ve_4H_8H([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [Values(2u, 0u)] uint Rm, - [ValueSource("_4H_")] [Random(RndCnt)] ulong Z, - [ValueSource("_4H_")] [Random(RndCnt)] ulong A, - [ValueSource("_4H_")] [Random(RndCnt)] ulong B, - [Values(0u, 1u, 2u, 3u, 4u, 5u, 6u, 7u)] uint Index, - [Values(0b0u, 0b1u)] uint Q) // <4H, 8H> - { - uint H = (Index & 4) >> 2; - uint L = (Index & 2) >> 1; - uint M = (Index & 1) >> 0; - - uint Opcode = 0x2F404000; // MLS V0.4H, V0.4H, V0.H[0] - Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); - Opcode |= (L << 21) | (M << 20) | (H << 11); - Opcode |= ((Q & 1) << 30); - - Vector128 V0 = MakeVectorE0E1(Z, Z); - Vector128 V1 = MakeVectorE0E1(A, A * Q); - Vector128 V2 = MakeVectorE0E1(B, B * H); - - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); - - CompareAgainstUnicorn(); - } - - [Test, Pairwise, Description("MLS ., ., .[]")] - public void Mls_Ve_2S_4S([Values(0u)] uint Rd, - [Values(1u, 0u)] uint Rn, - [Values(2u, 0u)] uint Rm, - [ValueSource("_2S_")] [Random(RndCnt)] ulong Z, - [ValueSource("_2S_")] [Random(RndCnt)] ulong A, - [ValueSource("_2S_")] [Random(RndCnt)] ulong B, - [Values(0u, 1u, 2u, 3u)] uint Index, - [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> - { - uint H = (Index & 2) >> 1; - uint L = (Index & 1) >> 0; - - uint Opcode = 0x2F804000; // MLS V0.2S, V0.2S, V0.S[0] - Opcode |= ((Rm & 15) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); - Opcode |= (L << 21) | (H << 11); - Opcode |= ((Q & 1) << 30); - - Vector128 V0 = MakeVectorE0E1(Z, Z); - Vector128 V1 = MakeVectorE0E1(A, A * Q); - Vector128 V2 = MakeVectorE0E1(B, B * H); - - AThreadState ThreadState = SingleOpcode(Opcode, V0: V0, V1: V1, V2: V2); + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2); CompareAgainstUnicorn(); } diff --git a/Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs b/Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs new file mode 100644 index 00000000..3945cce1 --- /dev/null +++ b/Ryujinx.Tests/Cpu/CpuTestSimdRegElemF.cs @@ -0,0 +1,424 @@ +#define SimdRegElemF + +using ChocolArm64.State; + +using NUnit.Framework; + +using System.Collections.Generic; +using System.Runtime.Intrinsics; + +namespace Ryujinx.Tests.Cpu +{ + [Category("SimdRegElemF")] // Tested: second half of 2018. + public sealed class CpuTestSimdRegElemF : CpuTest + { +#if SimdRegElemF + +#region "ValueSource (Types)" + private static IEnumerable _1S_F_() + { + yield return 0x00000000FF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x0000000080800000ul; // -Min Normal + yield return 0x00000000807FFFFFul; // -Max Subnormal + yield return 0x0000000080000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x000000007F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0000000000800000ul; // +Min Normal + yield return 0x00000000007FFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (float.Epsilon) + + if (!NoZeros) + { + yield return 0x0000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0x00000000FF800000ul; // -Infinity + yield return 0x000000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0x00000000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0x00000000FFBFFFFFul; // -SNaN (all ones payload) + yield return 0x000000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x000000007FBFFFFFul; // +SNaN (all ones payload) + } + + for (int Cnt = 1; Cnt <= RndCnt; Cnt++) + { + ulong Grbg = TestContext.CurrentContext.Random.NextUInt(); + ulong Rnd1 = GenNormal_S(); + ulong Rnd2 = GenSubnormal_S(); + + yield return (Grbg << 32) | Rnd1; + yield return (Grbg << 32) | Rnd2; + } + } + + private static IEnumerable _2S_F_() + { + yield return 0xFF7FFFFFFF7FFFFFul; // -Max Normal (float.MinValue) + yield return 0x8080000080800000ul; // -Min Normal + yield return 0x807FFFFF807FFFFFul; // -Max Subnormal + yield return 0x8000000180000001ul; // -Min Subnormal (-float.Epsilon) + yield return 0x7F7FFFFF7F7FFFFFul; // +Max Normal (float.MaxValue) + yield return 0x0080000000800000ul; // +Min Normal + yield return 0x007FFFFF007FFFFFul; // +Max Subnormal + yield return 0x0000000100000001ul; // +Min Subnormal (float.Epsilon) + + if (!NoZeros) + { + yield return 0x8000000080000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFF800000FF800000ul; // -Infinity + yield return 0x7F8000007F800000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFC00000FFC00000ul; // -QNaN (all zeros payload) (float.NaN) + yield return 0xFFBFFFFFFFBFFFFFul; // -SNaN (all ones payload) + yield return 0x7FC000007FC00000ul; // +QNaN (all zeros payload) (-float.NaN) (DefaultNaN) + yield return 0x7FBFFFFF7FBFFFFFul; // +SNaN (all ones payload) + } + + for (int Cnt = 1; Cnt <= RndCnt; Cnt++) + { + ulong Rnd1 = GenNormal_S(); + ulong Rnd2 = GenSubnormal_S(); + + yield return (Rnd1 << 32) | Rnd1; + yield return (Rnd2 << 32) | Rnd2; + } + } + + private static IEnumerable _1D_F_() + { + yield return 0xFFEFFFFFFFFFFFFFul; // -Max Normal (double.MinValue) + yield return 0x8010000000000000ul; // -Min Normal + yield return 0x800FFFFFFFFFFFFFul; // -Max Subnormal + yield return 0x8000000000000001ul; // -Min Subnormal (-double.Epsilon) + yield return 0x7FEFFFFFFFFFFFFFul; // +Max Normal (double.MaxValue) + yield return 0x0010000000000000ul; // +Min Normal + yield return 0x000FFFFFFFFFFFFFul; // +Max Subnormal + yield return 0x0000000000000001ul; // +Min Subnormal (double.Epsilon) + + if (!NoZeros) + { + yield return 0x8000000000000000ul; // -Zero + yield return 0x0000000000000000ul; // +Zero + } + + if (!NoInfs) + { + yield return 0xFFF0000000000000ul; // -Infinity + yield return 0x7FF0000000000000ul; // +Infinity + } + + if (!NoNaNs) + { + yield return 0xFFF8000000000000ul; // -QNaN (all zeros payload) (double.NaN) + yield return 0xFFF7FFFFFFFFFFFFul; // -SNaN (all ones payload) + yield return 0x7FF8000000000000ul; // +QNaN (all zeros payload) (-double.NaN) (DefaultNaN) + yield return 0x7FF7FFFFFFFFFFFFul; // +SNaN (all ones payload) + } + + for (int Cnt = 1; Cnt <= RndCnt; Cnt++) + { + ulong Rnd1 = GenNormal_D(); + ulong Rnd2 = GenSubnormal_D(); + + yield return Rnd1; + yield return Rnd2; + } + } +#endregion + +#region "ValueSource (Opcodes)" + private static uint[] _F_Mla_Mls_Se_S_() + { + return new uint[] + { + 0x5F821020u, // FMLA S0, S1, V2.S[0] + 0x5F825020u // FMLS S0, S1, V2.S[0] + }; + } + + private static uint[] _F_Mla_Mls_Se_D_() + { + return new uint[] + { + 0x5FC21020u, // FMLA D0, D1, V2.D[0] + 0x5FC25020u // FMLS D0, D1, V2.D[0] + }; + } + + private static uint[] _F_Mla_Mls_Ve_2S_4S_() + { + return new uint[] + { + 0x0F801000u, // FMLA V0.2S, V0.2S, V0.S[0] + 0x0F805000u // FMLS V0.2S, V0.2S, V0.S[0] + }; + } + + private static uint[] _F_Mla_Mls_Ve_2D_() + { + return new uint[] + { + 0x4FC01000u, // FMLA V0.2D, V0.2D, V0.D[0] + 0x4FC05000u // FMLS V0.2D, V0.2D, V0.D[0] + }; + } + + private static uint[] _F_Mul_Mulx_Se_S_() + { + return new uint[] + { + 0x5F829020u, // FMUL S0, S1, V2.S[0] + 0x7F829020u // FMULX S0, S1, V2.S[0] + }; + } + + private static uint[] _F_Mul_Mulx_Se_D_() + { + return new uint[] + { + 0x5FC29020u, // FMUL D0, D1, V2.D[0] + 0x7FC29020u // FMULX D0, D1, V2.D[0] + }; + } + + private static uint[] _F_Mul_Mulx_Ve_2S_4S_() + { + return new uint[] + { + 0x0F809000u, // FMUL V0.2S, V0.2S, V0.S[0] + 0x2F809000u // FMULX V0.2S, V0.2S, V0.S[0] + }; + } + + private static uint[] _F_Mul_Mulx_Ve_2D_() + { + return new uint[] + { + 0x4FC09000u, // FMUL V0.2D, V0.2D, V0.D[0] + 0x6FC09000u // FMULX V0.2D, V0.2D, V0.D[0] + }; + } +#endregion + + private const int RndCnt = 2; + + private static readonly bool NoZeros = false; + private static readonly bool NoInfs = false; + private static readonly bool NoNaNs = false; + + [Test, Pairwise] [Explicit] // Fused. + public void F_Mla_Mls_Se_S([ValueSource("_F_Mla_Mls_Se_S_")] uint Opcodes, + [ValueSource("_1S_F_")] ulong Z, + [ValueSource("_1S_F_")] ulong A, + [ValueSource("_2S_F_")] ulong B, + [Values(0u, 1u, 2u, 3u)] uint Index) + { + uint H = (Index >> 1) & 1; + uint L = Index & 1; + + Opcodes |= (L << 21) | (H << 11); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0E1(B, B * H); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_S); + } + + [Test, Pairwise] [Explicit] // Fused. + public void F_Mla_Mls_Se_D([ValueSource("_F_Mla_Mls_Se_D_")] uint Opcodes, + [ValueSource("_1D_F_")] ulong Z, + [ValueSource("_1D_F_")] ulong A, + [ValueSource("_1D_F_")] ulong B, + [Values(0u, 1u)] uint Index) + { + uint H = Index & 1; + + Opcodes |= H << 11; + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0E1(B, B * H); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_D); + } + + [Test, Pairwise] [Explicit] // Fused. + public void F_Mla_Mls_Ve_2S_4S([ValueSource("_F_Mla_Mls_Ve_2S_4S_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_2S_F_")] ulong Z, + [ValueSource("_2S_F_")] ulong A, + [ValueSource("_2S_F_")] ulong B, + [Values(0u, 1u, 2u, 3u)] uint Index, + [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> + { + uint H = (Index >> 1) & 1; + uint L = Index & 1; + + Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (L << 21) | (H << 11); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + Vector128 V2 = MakeVectorE0E1(B, B * H); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_S); + } + + [Test, Pairwise] [Explicit] // Fused. + public void F_Mla_Mls_Ve_2D([ValueSource("_F_Mla_Mls_Ve_2D_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_1D_F_")] ulong Z, + [ValueSource("_1D_F_")] ulong A, + [ValueSource("_1D_F_")] ulong B, + [Values(0u, 1u)] uint Index) + { + uint H = Index & 1; + + Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= H << 11; + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B * H); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FPSR.IOC, FpSkips.IfUnderflow, FpTolerances.UpToOneUlps_D); + } + + [Test, Pairwise] [Explicit] + public void F_Mul_Mulx_Se_S([ValueSource("_F_Mul_Mulx_Se_S_")] uint Opcodes, + [ValueSource("_1S_F_")] ulong A, + [ValueSource("_2S_F_")] ulong B, + [Values(0u, 1u, 2u, 3u)] uint Index) + { + uint H = (Index >> 1) & 1; + uint L = Index & 1; + + Opcodes |= (L << 21) | (H << 11); + + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0E1(B, B * H); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FpsrMask: FPSR.IOC); + } + + [Test, Pairwise] [Explicit] + public void F_Mul_Mulx_Se_D([ValueSource("_F_Mul_Mulx_Se_D_")] uint Opcodes, + [ValueSource("_1D_F_")] ulong A, + [ValueSource("_1D_F_")] ulong B, + [Values(0u, 1u)] uint Index) + { + uint H = Index & 1; + + Opcodes |= H << 11; + + ulong Z = TestContext.CurrentContext.Random.NextULong(); + Vector128 V0 = MakeVectorE1(Z); + Vector128 V1 = MakeVectorE0(A); + Vector128 V2 = MakeVectorE0E1(B, B * H); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FpsrMask: FPSR.IOC); + } + + [Test, Pairwise] [Explicit] + public void F_Mul_Mulx_Ve_2S_4S([ValueSource("_F_Mul_Mulx_Ve_2S_4S_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_2S_F_")] ulong Z, + [ValueSource("_2S_F_")] ulong A, + [ValueSource("_2S_F_")] ulong B, + [Values(0u, 1u, 2u, 3u)] uint Index, + [Values(0b0u, 0b1u)] uint Q) // <2S, 4S> + { + uint H = (Index >> 1) & 1; + uint L = Index & 1; + + Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= (L << 21) | (H << 11); + Opcodes |= ((Q & 1) << 30); + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A * Q); + Vector128 V2 = MakeVectorE0E1(B, B * H); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FpsrMask: FPSR.IOC); + } + + [Test, Pairwise] [Explicit] + public void F_Mul_Mulx_Ve_2D([ValueSource("_F_Mul_Mulx_Ve_2D_")] uint Opcodes, + [Values(0u)] uint Rd, + [Values(1u, 0u)] uint Rn, + [Values(2u, 0u)] uint Rm, + [ValueSource("_1D_F_")] ulong Z, + [ValueSource("_1D_F_")] ulong A, + [ValueSource("_1D_F_")] ulong B, + [Values(0u, 1u)] uint Index) + { + uint H = Index & 1; + + Opcodes |= ((Rm & 31) << 16) | ((Rn & 31) << 5) | ((Rd & 31) << 0); + Opcodes |= H << 11; + + Vector128 V0 = MakeVectorE0E1(Z, Z); + Vector128 V1 = MakeVectorE0E1(A, A); + Vector128 V2 = MakeVectorE0E1(B, B * H); + + int Fpcr = (int)TestContext.CurrentContext.Random.NextUInt() & (1 << (int)FPCR.DN); + + AThreadState ThreadState = SingleOpcode(Opcodes, V0: V0, V1: V1, V2: V2, Fpcr: Fpcr); + + CompareAgainstUnicorn(FpsrMask: FPSR.IOC); + } +#endif + } +}