814f75142e
* Implemented in IR the managed methods of the Saturating region ... ... of the SoftFallback class (the SatQ ones). The need to natively manage the Fpcr and Fpsr system registers is still a fact. Contributes to https://github.com/Ryujinx/Ryujinx/issues/2917 ; I will open another PR to implement in Intrinsics-branchless the methods of the Saturation region as well (the SatXXXToXXX ones). All instructions involved have been tested locally in both release and debug modes, in both lowcq and highcq. * Ptc.InternalVersion = 3665 * Addressed PR feedback. * Implemented in IR the managed methods of the ShlReg region of the SoftFallback class. It also includes the last two SatQ ones (following up on https://github.com/Ryujinx/Ryujinx/pull/3665). All instructions involved have been tested locally in both release and debug modes, in both lowcq and highcq. * Fpsr and Fpcr freed. Handling/isolation of Fpsr and Fpcr via register for IR and via memory for Tests and Threads, with synchronization to context exchanges (explicit for SoftFloat); without having to call managed methods. Thanks to the inlining work of the previous two PRs and others in this. Tests performed locally in both release and debug modes, in both lowcq and highcq, with FastFP to true and false (explicit FP tests included). Tested with the title Tony Hawk's PS. Depends on shlreg. * Update InstEmitSimdHelper.cs * De-magic Masks. Remove the Stride and Len flags; Fpsr.NZCV are A32 only, then moved to Fpscr: this leads to emitting less IR in reference to Get/Set Fpsr/Fpcr/Fpscr methods in reference to Mrs/Msr (A64) and Vmrs/Vmsr (A32) instructions. * Addressed PR feedback.
1620 lines
59 KiB
C#
1620 lines
59 KiB
C#
using ARMeilleure.Decoders;
|
|
using ARMeilleure.IntermediateRepresentation;
|
|
using ARMeilleure.State;
|
|
using ARMeilleure.Translation;
|
|
using System;
|
|
using System.Diagnostics;
|
|
using System.Reflection;
|
|
|
|
using static ARMeilleure.Instructions.InstEmitHelper;
|
|
using static ARMeilleure.Instructions.InstEmitSimdHelper;
|
|
using static ARMeilleure.IntermediateRepresentation.Operand.Factory;
|
|
|
|
namespace ARMeilleure.Instructions
|
|
{
|
|
using Func1I = Func<Operand, Operand>;
|
|
|
|
static partial class InstEmit
|
|
{
|
|
public static void Fcvt_S(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
if (op.Size == 0 && op.Opc == 1) // Single -> Double.
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), n);
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
|
|
|
|
Operand res = context.ConvertToFP(OperandType.FP64, ne);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
}
|
|
else if (op.Size == 1 && op.Opc == 0) // Double -> Single.
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
|
|
|
|
Operand res = context.ConvertToFP(OperandType.FP32, ne);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
}
|
|
else if (op.Size == 0 && op.Opc == 3) // Single -> Half.
|
|
{
|
|
if (Optimizations.UseF16c)
|
|
{
|
|
Debug.Assert(!Optimizations.ForceLegacySse);
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, n, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
|
|
res = context.AddIntrinsic(Intrinsic.X86Pslldq, res, Const(14)); // VectorZeroUpper112()
|
|
res = context.AddIntrinsic(Intrinsic.X86Psrldq, res, Const(14));
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), 0);
|
|
|
|
context.StoreToContext();
|
|
Operand res = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
|
|
context.LoadFromContext();
|
|
|
|
res = context.ZeroExtend16(OperandType.I64, res);
|
|
|
|
context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
|
|
}
|
|
}
|
|
else if (op.Size == 3 && op.Opc == 0) // Half -> Single.
|
|
{
|
|
if (Optimizations.UseF16c)
|
|
{
|
|
Debug.Assert(!Optimizations.ForceLegacySse);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn));
|
|
res = context.VectorZeroUpper96(res);
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
|
|
|
|
context.StoreToContext();
|
|
Operand res = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne);
|
|
context.LoadFromContext();
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
}
|
|
else if (op.Size == 1 && op.Opc == 3) // Double -> Half.
|
|
{
|
|
if (Optimizations.UseF16c)
|
|
{
|
|
Debug.Assert(!Optimizations.ForceLegacySse);
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Cvtsd2ss, context.VectorZero(), n);
|
|
res = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, res, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand ne = context.VectorExtract(OperandType.FP64, GetVec(op.Rn), 0);
|
|
|
|
context.StoreToContext();
|
|
Operand res = context.Call(typeof(SoftFloat64_16).GetMethod(nameof(SoftFloat64_16.FPConvert)), ne);
|
|
context.LoadFromContext();
|
|
|
|
res = context.ZeroExtend16(OperandType.I64, res);
|
|
|
|
context.Copy(GetVec(op.Rd), EmitVectorInsert(context, context.VectorZero(), res, 0, 1));
|
|
}
|
|
}
|
|
else if (op.Size == 3 && op.Opc == 1) // Half -> Double.
|
|
{
|
|
if (Optimizations.UseF16c)
|
|
{
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, GetVec(op.Rn));
|
|
res = context.AddIntrinsic(Intrinsic.X86Cvtss2sd, context.VectorZero(), res);
|
|
res = context.VectorZeroUpper64(res);
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand ne = EmitVectorExtractZx(context, op.Rn, 0, 1);
|
|
|
|
context.StoreToContext();
|
|
Operand res = context.Call(typeof(SoftFloat16_64).GetMethod(nameof(SoftFloat16_64.FPConvert)), ne);
|
|
context.LoadFromContext();
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
}
|
|
else // Invalid encoding.
|
|
{
|
|
Debug.Assert(false, $"type == {op.Size} && opc == {op.Opc}");
|
|
}
|
|
}
|
|
|
|
public static void Fcvtas_Gp(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
|
|
}
|
|
|
|
public static void Fcvtas_S(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: true);
|
|
}
|
|
|
|
public static void Fcvtas_V(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: true, scalar: false);
|
|
}
|
|
|
|
public static void Fcvtau_Gp(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt_u_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1));
|
|
}
|
|
|
|
public static void Fcvtau_S(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: true);
|
|
}
|
|
|
|
public static void Fcvtau_V(ArmEmitterContext context)
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.AwayFromZero, op1), signed: false, scalar: false);
|
|
}
|
|
|
|
public static void Fcvtl_V(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (Optimizations.UseSse2 && sizeF == 1)
|
|
{
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Operand res = op.RegisterSize == RegisterSize.Simd128 ? context.AddIntrinsic(Intrinsic.X86Movhlps, n, n) : n;
|
|
res = context.AddIntrinsic(Intrinsic.X86Cvtps2pd, res);
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else if (Optimizations.UseF16c && sizeF == 0)
|
|
{
|
|
Debug.Assert(!Optimizations.ForceLegacySse);
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Operand res = op.RegisterSize == RegisterSize.Simd128 ? context.AddIntrinsic(Intrinsic.X86Movhlps, n, n) : n;
|
|
res = context.AddIntrinsic(Intrinsic.X86Vcvtph2ps, res);
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else
|
|
{
|
|
Operand res = context.VectorZero();
|
|
|
|
int elems = 4 >> sizeF;
|
|
|
|
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
if (sizeF == 0)
|
|
{
|
|
Operand ne = EmitVectorExtractZx(context, op.Rn, part + index, 1);
|
|
|
|
context.StoreToContext();
|
|
Operand e = context.Call(typeof(SoftFloat16_32).GetMethod(nameof(SoftFloat16_32.FPConvert)), ne);
|
|
context.LoadFromContext();
|
|
|
|
res = context.VectorInsert(res, e, index);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand ne = context.VectorExtract(OperandType.FP32, GetVec(op.Rn), part + index);
|
|
|
|
Operand e = context.ConvertToFP(OperandType.FP64, ne);
|
|
|
|
res = context.VectorInsert(res, e, index);
|
|
}
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtms_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1));
|
|
}
|
|
}
|
|
|
|
public static void Fcvtms_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsMinusInfinity, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1), signed: true, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtmu_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsMinusInfinity, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Floor), op1));
|
|
}
|
|
}
|
|
|
|
public static void Fcvtn_V(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (Optimizations.UseSse2 && sizeF == 1)
|
|
{
|
|
Operand d = GetVec(op.Rd);
|
|
|
|
Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps;
|
|
|
|
Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtpd2ps, GetVec(op.Rn));
|
|
nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
|
|
|
|
Operand res = context.VectorZeroUpper64(d);
|
|
res = context.AddIntrinsic(movInst, res, nInt);
|
|
|
|
context.Copy(d, res);
|
|
}
|
|
else if (Optimizations.UseF16c && sizeF == 0)
|
|
{
|
|
Debug.Assert(!Optimizations.ForceLegacySse);
|
|
|
|
Operand d = GetVec(op.Rd);
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
Intrinsic movInst = op.RegisterSize == RegisterSize.Simd128 ? Intrinsic.X86Movlhps : Intrinsic.X86Movhlps;
|
|
|
|
Operand nInt = context.AddIntrinsic(Intrinsic.X86Vcvtps2ph, n, Const(X86GetRoundControl(FPRoundingMode.ToNearest)));
|
|
nInt = context.AddIntrinsic(Intrinsic.X86Movlhps, nInt, nInt);
|
|
|
|
Operand res = context.VectorZeroUpper64(d);
|
|
res = context.AddIntrinsic(movInst, res, nInt);
|
|
|
|
context.Copy(d, res);
|
|
}
|
|
else
|
|
{
|
|
OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
int elems = 4 >> sizeF;
|
|
|
|
int part = op.RegisterSize == RegisterSize.Simd128 ? elems : 0;
|
|
|
|
Operand d = GetVec(op.Rd);
|
|
|
|
Operand res = part == 0 ? context.VectorZero() : context.Copy(d);
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
context.StoreToContext();
|
|
Operand e = context.Call(typeof(SoftFloat32_16).GetMethod(nameof(SoftFloat32_16.FPConvert)), ne);
|
|
context.LoadFromContext();
|
|
|
|
e = context.ZeroExtend16(OperandType.I64, e);
|
|
|
|
res = EmitVectorInsert(context, res, e, part + index, 1);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand e = context.ConvertToFP(OperandType.FP32, ne);
|
|
|
|
res = context.VectorInsert(res, e, part + index);
|
|
}
|
|
}
|
|
|
|
context.Copy(d, res);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtns_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvts_Gp(context, FPRoundingMode.ToNearest, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_s_Gp(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1));
|
|
}
|
|
}
|
|
|
|
public static void Fcvtns_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtns_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.ToNearest, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: true, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtnu_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtnu_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.ToNearest, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt(context, (op1) => EmitRoundMathCall(context, MidpointRounding.ToEven, op1), signed: false, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtps_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_s_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1));
|
|
}
|
|
}
|
|
|
|
public static void Fcvtpu_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsPlusInfinity, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_u_Gp(context, (op1) => EmitUnaryMathCall(context, nameof(Math.Ceiling), op1));
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_s_Gp(context, (op1) => op1);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvts_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtzs_Gp_Fixed(context);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: true, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: true, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzs_V_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtsOpF(context, FPRoundingMode.TowardsZero, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: true, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_Gp(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvt_u_Gp(context, (op1) => op1);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41Fcvtu_Gp(context, FPRoundingMode.TowardsZero, isFixed: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtzu_Gp_Fixed(context);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: false, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: false, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Fcvtzu_V_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse41)
|
|
{
|
|
EmitSse41FcvtuOpF(context, FPRoundingMode.TowardsZero, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitFcvtz(context, signed: false, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Scvtf_Gp(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand res = GetIntOrZR(context, op.Rn);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
res = context.SignExtend32(OperandType.I64, res);
|
|
}
|
|
|
|
res = EmitFPConvert(context, res, op.Size, signed: true);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
|
|
public static void Scvtf_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand res = GetIntOrZR(context, op.Rn);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
res = context.SignExtend32(OperandType.I64, res);
|
|
}
|
|
|
|
res = EmitFPConvert(context, res, op.Size, signed: true);
|
|
|
|
res = EmitI2fFBitsMul(context, res, op.FBits);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
|
|
public static void Scvtf_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2ScvtfOp(context, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: true, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Scvtf_S_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2ScvtfOp(context, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: true, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Scvtf_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2ScvtfOp(context, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: true, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Scvtf_V_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2ScvtfOp(context, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: true, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Ucvtf_Gp(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand res = GetIntOrZR(context, op.Rn);
|
|
|
|
res = EmitFPConvert(context, res, op.Size, signed: false);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
|
|
public static void Ucvtf_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand res = GetIntOrZR(context, op.Rn);
|
|
|
|
res = EmitFPConvert(context, res, op.Size, signed: false);
|
|
|
|
res = EmitI2fFBitsMul(context, res, op.FBits);
|
|
|
|
context.Copy(GetVec(op.Rd), context.VectorInsert(context.VectorZero(), res, 0));
|
|
}
|
|
|
|
public static void Ucvtf_S(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2UcvtfOp(context, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: false, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Ucvtf_S_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2UcvtfOp(context, scalar: true);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: false, scalar: true);
|
|
}
|
|
}
|
|
|
|
public static void Ucvtf_V(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2UcvtfOp(context, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: false, scalar: false);
|
|
}
|
|
}
|
|
|
|
public static void Ucvtf_V_Fixed(ArmEmitterContext context)
|
|
{
|
|
if (Optimizations.UseSse2)
|
|
{
|
|
EmitSse2UcvtfOp(context, scalar: false);
|
|
}
|
|
else
|
|
{
|
|
EmitCvtf(context, signed: false, scalar: false);
|
|
}
|
|
}
|
|
|
|
private static void EmitFcvt(ArmEmitterContext context, Func1I emit, bool signed, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
int sizeF = op.Size & 1;
|
|
int sizeI = sizeF + 2;
|
|
|
|
OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = context.VectorExtract(type, n, index);
|
|
|
|
Operand e = emit(ne);
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32));
|
|
|
|
e = context.Call(info, e);
|
|
|
|
e = context.ZeroExtend32(OperandType.I64, e);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
|
|
|
|
e = context.Call(info, e);
|
|
}
|
|
|
|
res = EmitVectorInsert(context, res, e, index, sizeI);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
private static void EmitFcvtz(ArmEmitterContext context, bool signed, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
int sizeF = op.Size & 1;
|
|
int sizeI = sizeF + 2;
|
|
|
|
OperandType type = sizeF == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
int fBits = GetFBits(context);
|
|
|
|
int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = context.VectorExtract(type, n, index);
|
|
|
|
Operand e = EmitF2iFBitsMul(context, ne, fBits);
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32));
|
|
|
|
e = context.Call(info, e);
|
|
|
|
e = context.ZeroExtend32(OperandType.I64, e);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
MethodInfo info = signed
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
|
|
|
|
e = context.Call(info, e);
|
|
}
|
|
|
|
res = EmitVectorInsert(context, res, e, index, sizeI);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
private static void EmitFcvt_s_Gp(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
EmitFcvt___Gp(context, emit, signed: true);
|
|
}
|
|
|
|
private static void EmitFcvt_u_Gp(ArmEmitterContext context, Func1I emit)
|
|
{
|
|
EmitFcvt___Gp(context, emit, signed: false);
|
|
}
|
|
|
|
private static void EmitFcvt___Gp(ArmEmitterContext context, Func1I emit, bool signed)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
|
|
|
|
Operand res = signed
|
|
? EmitScalarFcvts(context, emit(ne), 0)
|
|
: EmitScalarFcvtu(context, emit(ne), 0);
|
|
|
|
SetIntOrZR(context, op.Rd, res);
|
|
}
|
|
|
|
private static void EmitFcvtzs_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
EmitFcvtz__Gp_Fixed(context, signed: true);
|
|
}
|
|
|
|
private static void EmitFcvtzu_Gp_Fixed(ArmEmitterContext context)
|
|
{
|
|
EmitFcvtz__Gp_Fixed(context, signed: false);
|
|
}
|
|
|
|
private static void EmitFcvtz__Gp_Fixed(ArmEmitterContext context, bool signed)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
OperandType type = op.Size == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
Operand ne = context.VectorExtract(type, GetVec(op.Rn), 0);
|
|
|
|
Operand res = signed
|
|
? EmitScalarFcvts(context, ne, op.FBits)
|
|
: EmitScalarFcvtu(context, ne, op.FBits);
|
|
|
|
SetIntOrZR(context, op.Rd, res);
|
|
}
|
|
|
|
private static void EmitCvtf(ArmEmitterContext context, bool signed, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand res = context.VectorZero();
|
|
|
|
int sizeF = op.Size & 1;
|
|
int sizeI = sizeF + 2;
|
|
|
|
int fBits = GetFBits(context);
|
|
|
|
int elems = !scalar ? op.GetBytesCount() >> sizeI : 1;
|
|
|
|
for (int index = 0; index < elems; index++)
|
|
{
|
|
Operand ne = EmitVectorLongExtract(context, op.Rn, index, sizeI);
|
|
|
|
Operand e = EmitFPConvert(context, ne, sizeF, signed);
|
|
|
|
e = EmitI2fFBitsMul(context, e, fBits);
|
|
|
|
res = context.VectorInsert(res, e, index);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
|
|
private static int GetFBits(ArmEmitterContext context)
|
|
{
|
|
if (context.CurrOp is OpCodeSimdShImm op)
|
|
{
|
|
return GetImmShr(op);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
private static Operand EmitFPConvert(ArmEmitterContext context, Operand value, int size, bool signed)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.I32 || value.Type == OperandType.I64);
|
|
Debug.Assert((uint)size < 2);
|
|
|
|
OperandType type = size == 0 ? OperandType.FP32 : OperandType.FP64;
|
|
|
|
if (signed)
|
|
{
|
|
return context.ConvertToFP(type, value);
|
|
}
|
|
else
|
|
{
|
|
return context.ConvertToFPUI(type, value);
|
|
}
|
|
}
|
|
|
|
private static Operand EmitScalarFcvts(ArmEmitterContext context, Operand value, int fBits)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
|
|
|
|
value = EmitF2iFBitsMul(context, value, fBits);
|
|
|
|
MethodInfo info;
|
|
|
|
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
info = value.Type == OperandType.FP32
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS32))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS32));
|
|
}
|
|
else
|
|
{
|
|
info = value.Type == OperandType.FP32
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToS64))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToS64));
|
|
}
|
|
|
|
return context.Call(info, value);
|
|
}
|
|
|
|
private static Operand EmitScalarFcvtu(ArmEmitterContext context, Operand value, int fBits)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
|
|
|
|
value = EmitF2iFBitsMul(context, value, fBits);
|
|
|
|
MethodInfo info;
|
|
|
|
if (context.CurrOp.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
info = value.Type == OperandType.FP32
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU32))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU32));
|
|
}
|
|
else
|
|
{
|
|
info = value.Type == OperandType.FP32
|
|
? typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF32ToU64))
|
|
: typeof(SoftFallback).GetMethod(nameof(SoftFallback.SatF64ToU64));
|
|
}
|
|
|
|
return context.Call(info, value);
|
|
}
|
|
|
|
private static Operand EmitF2iFBitsMul(ArmEmitterContext context, Operand value, int fBits)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
|
|
|
|
if (fBits == 0)
|
|
{
|
|
return value;
|
|
}
|
|
|
|
if (value.Type == OperandType.FP32)
|
|
{
|
|
return context.Multiply(value, ConstF(MathF.Pow(2f, fBits)));
|
|
}
|
|
else /* if (value.Type == OperandType.FP64) */
|
|
{
|
|
return context.Multiply(value, ConstF(Math.Pow(2d, fBits)));
|
|
}
|
|
}
|
|
|
|
private static Operand EmitI2fFBitsMul(ArmEmitterContext context, Operand value, int fBits)
|
|
{
|
|
Debug.Assert(value.Type == OperandType.FP32 || value.Type == OperandType.FP64);
|
|
|
|
if (fBits == 0)
|
|
{
|
|
return value;
|
|
}
|
|
|
|
if (value.Type == OperandType.FP32)
|
|
{
|
|
return context.Multiply(value, ConstF(1f / MathF.Pow(2f, fBits)));
|
|
}
|
|
else /* if (value.Type == OperandType.FP64) */
|
|
{
|
|
return context.Multiply(value, ConstF(1d / Math.Pow(2d, fBits)));
|
|
}
|
|
}
|
|
|
|
public static Operand EmitSse2CvtDoubleToInt64OpF(ArmEmitterContext context, Operand opF, bool scalar)
|
|
{
|
|
Debug.Assert(opF.Type == OperandType.V128);
|
|
|
|
Operand longL = context.AddIntrinsicLong (Intrinsic.X86Cvtsd2si, opF); // opFL
|
|
Operand res = context.VectorCreateScalar(longL);
|
|
|
|
if (!scalar)
|
|
{
|
|
Operand opFH = context.AddIntrinsic (Intrinsic.X86Movhlps, res, opF); // res doesn't matter.
|
|
Operand longH = context.AddIntrinsicLong (Intrinsic.X86Cvtsd2si, opFH);
|
|
Operand resH = context.VectorCreateScalar(longH);
|
|
res = context.AddIntrinsic (Intrinsic.X86Movlhps, res, resH);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
private static Operand EmitSse2CvtInt64ToDoubleOp(ArmEmitterContext context, Operand op, bool scalar)
|
|
{
|
|
Debug.Assert(op.Type == OperandType.V128);
|
|
|
|
Operand longL = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, op); // opL
|
|
Operand res = context.AddIntrinsic (Intrinsic.X86Cvtsi2sd, context.VectorZero(), longL);
|
|
|
|
if (!scalar)
|
|
{
|
|
Operand opH = context.AddIntrinsic (Intrinsic.X86Movhlps, res, op); // res doesn't matter.
|
|
Operand longH = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, opH);
|
|
Operand resH = context.AddIntrinsic (Intrinsic.X86Cvtsi2sd, res, longH); // res doesn't matter.
|
|
res = context.AddIntrinsic (Intrinsic.X86Movlhps, res, resH);
|
|
}
|
|
|
|
return res;
|
|
}
|
|
|
|
private static void EmitSse2ScvtfOp(ArmEmitterContext context, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
// sizeF == ((OpCodeSimdShImm)op).Size - 2
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
|
|
int fpScaled = 0x3F800000 - fBits * 0x800000;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask);
|
|
}
|
|
|
|
if (scalar)
|
|
{
|
|
res = context.VectorZeroUpper96(res);
|
|
}
|
|
else if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand res = EmitSse2CvtInt64ToDoubleOp(context, n, scalar);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits)
|
|
long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask);
|
|
}
|
|
|
|
if (scalar)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse2UcvtfOp(ArmEmitterContext context, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
// sizeF == ((OpCodeSimdShImm)op).Size - 2
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand mask = scalar // 65536.000f (1 << 16)
|
|
? X86GetScalar (context, 0x47800000)
|
|
: X86GetAllElements(context, 0x47800000);
|
|
|
|
Operand res = context.AddIntrinsic(Intrinsic.X86Psrld, n, Const(16));
|
|
res = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res);
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulps, res, mask);
|
|
|
|
Operand res2 = context.AddIntrinsic(Intrinsic.X86Pslld, n, Const(16));
|
|
res2 = context.AddIntrinsic(Intrinsic.X86Psrld, res2, Const(16));
|
|
res2 = context.AddIntrinsic(Intrinsic.X86Cvtdq2ps, res2);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Addps, res, res2);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == 1f / MathF.Pow(2f, fBits)
|
|
int fpScaled = 0x3F800000 - fBits * 0x800000;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulps, res, fpScaledMask);
|
|
}
|
|
|
|
if (scalar)
|
|
{
|
|
res = context.VectorZeroUpper96(res);
|
|
}
|
|
else if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand mask = scalar // 4294967296.0000000d (1L << 32)
|
|
? X86GetScalar (context, 0x41F0000000000000L)
|
|
: X86GetAllElements(context, 0x41F0000000000000L);
|
|
|
|
Operand res = context.AddIntrinsic (Intrinsic.X86Psrlq, n, Const(32));
|
|
res = EmitSse2CvtInt64ToDoubleOp(context, res, scalar);
|
|
res = context.AddIntrinsic (Intrinsic.X86Mulpd, res, mask);
|
|
|
|
Operand res2 = context.AddIntrinsic (Intrinsic.X86Psllq, n, Const(32));
|
|
res2 = context.AddIntrinsic (Intrinsic.X86Psrlq, res2, Const(32));
|
|
res2 = EmitSse2CvtInt64ToDoubleOp(context, res2, scalar);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Addpd, res, res2);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == 1d / Math.Pow(2d, fBits)
|
|
long fpScaled = 0x3FF0000000000000L - fBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
res = context.AddIntrinsic(Intrinsic.X86Mulpd, res, fpScaledMask);
|
|
}
|
|
|
|
if (scalar)
|
|
{
|
|
res = context.VectorZeroUpper64(res);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), res);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse41FcvtsOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
// sizeF == ((OpCodeSimdShImm)op).Size - 2
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
|
|
int fpScaled = 0x3F800000 + fBits * 0x800000;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
|
|
|
|
Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648)
|
|
? X86GetScalar (context, 0x4F000000)
|
|
: X86GetAllElements(context, 0x4F000000);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt, nRes);
|
|
|
|
if (scalar)
|
|
{
|
|
dRes = context.VectorZeroUpper96(dRes);
|
|
}
|
|
else if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
dRes = context.VectorZeroUpper64(dRes);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), dRes);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
|
|
long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
|
|
|
|
Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808)
|
|
? X86GetScalar (context, 0x43E0000000000000L)
|
|
: X86GetAllElements(context, 0x43E0000000000000L);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong, nRes);
|
|
|
|
if (scalar)
|
|
{
|
|
dRes = context.VectorZeroUpper64(dRes);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), dRes);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse41FcvtuOpF(ArmEmitterContext context, FPRoundingMode roundMode, bool scalar)
|
|
{
|
|
OpCodeSimd op = (OpCodeSimd)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
// sizeF == ((OpCodeSimdShImm)op).Size - 2
|
|
int sizeF = op.Size & 1;
|
|
|
|
if (sizeF == 0)
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, fBits)
|
|
int fpScaled = 0x3F800000 + fBits * 0x800000;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulps, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundps, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand fpMaxValMask = scalar // 2.14748365E9f (2147483648)
|
|
? X86GetScalar (context, 0x4F000000)
|
|
: X86GetAllElements(context, 0x4F000000);
|
|
|
|
Operand nInt = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subps, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand nInt2 = context.AddIntrinsic(Intrinsic.X86Cvtps2dq, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpps, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nInt2, nRes);
|
|
dRes = context.AddIntrinsic(Intrinsic.X86Paddd, dRes, nInt);
|
|
|
|
if (scalar)
|
|
{
|
|
dRes = context.VectorZeroUpper96(dRes);
|
|
}
|
|
else if (op.RegisterSize == RegisterSize.Simd64)
|
|
{
|
|
dRes = context.VectorZeroUpper64(dRes);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), dRes);
|
|
}
|
|
else /* if (sizeF == 1) */
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (op is OpCodeSimdShImm fixedOp)
|
|
{
|
|
int fBits = GetImmShr(fixedOp);
|
|
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, fBits)
|
|
long fpScaled = 0x3FF0000000000000L + fBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = scalar
|
|
? X86GetScalar (context, fpScaled)
|
|
: X86GetAllElements(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulpd, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundpd, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand fpMaxValMask = scalar // 9.2233720368547760E18d (9223372036854775808)
|
|
? X86GetScalar (context, 0x43E0000000000000L)
|
|
: X86GetAllElements(context, 0x43E0000000000000L);
|
|
|
|
Operand nLong = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subpd, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand nLong2 = EmitSse2CvtDoubleToInt64OpF(context, nRes, scalar);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmppd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand dRes = context.AddIntrinsic(Intrinsic.X86Pxor, nLong2, nRes);
|
|
dRes = context.AddIntrinsic(Intrinsic.X86Paddq, dRes, nLong);
|
|
|
|
if (scalar)
|
|
{
|
|
dRes = context.VectorZeroUpper64(dRes);
|
|
}
|
|
|
|
context.Copy(GetVec(op.Rd), dRes);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse41Fcvts_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
if (op.Size == 0)
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (isFixed)
|
|
{
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits)
|
|
int fpScaled = 0x3F800000 + op.FBits * 0x800000;
|
|
|
|
Operand fpScaledMask = X86GetScalar(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
|
|
|
|
int fpMaxVal = op.RegisterSize == RegisterSize.Int32
|
|
? 0x4F000000 // 2.14748365E9f (2147483648)
|
|
: 0x5F000000; // 9.223372E18f (9223372036854775808)
|
|
|
|
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int64)
|
|
{
|
|
nInt = context.SignExtend32(OperandType.I64, nInt);
|
|
}
|
|
|
|
Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nInt);
|
|
|
|
SetIntOrZR(context, op.Rd, dRes);
|
|
}
|
|
else /* if (op.Size == 1) */
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (isFixed)
|
|
{
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits)
|
|
long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = X86GetScalar(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
|
|
|
|
long fpMaxVal = op.RegisterSize == RegisterSize.Int32
|
|
? 0x41E0000000000000L // 2147483648.0000000d (2147483648)
|
|
: 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808)
|
|
|
|
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
nLong = context.ConvertI64ToI32(nLong);
|
|
}
|
|
|
|
Operand dRes = context.BitwiseExclusiveOr(nIntOrLong, nLong);
|
|
|
|
SetIntOrZR(context, op.Rd, dRes);
|
|
}
|
|
}
|
|
|
|
private static void EmitSse41Fcvtu_Gp(ArmEmitterContext context, FPRoundingMode roundMode, bool isFixed)
|
|
{
|
|
OpCodeSimdCvt op = (OpCodeSimdCvt)context.CurrOp;
|
|
|
|
Operand n = GetVec(op.Rn);
|
|
|
|
if (op.Size == 0)
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (isFixed)
|
|
{
|
|
// BitConverter.Int32BitsToSingle(fpScaled) == MathF.Pow(2f, op.FBits)
|
|
int fpScaled = 0x3F800000 + op.FBits * 0x800000;
|
|
|
|
Operand fpScaledMask = X86GetScalar(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulss, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundss, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
int fpMaxVal = op.RegisterSize == RegisterSize.Int32
|
|
? 0x4F000000 // 2.14748365E9f (2147483648)
|
|
: 0x5F000000; // 9.223372E18f (9223372036854775808)
|
|
|
|
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
|
|
|
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subss, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtss2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtss2si, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpss, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand nInt = context.AddIntrinsicInt(Intrinsic.X86Cvtsi2si, nRes);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int64)
|
|
{
|
|
nInt = context.SignExtend32(OperandType.I64, nInt);
|
|
}
|
|
|
|
Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nInt);
|
|
dRes = context.Add(dRes, nIntOrLong);
|
|
|
|
SetIntOrZR(context, op.Rd, dRes);
|
|
}
|
|
else /* if (op.Size == 1) */
|
|
{
|
|
Operand nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, n, n, Const((int)CmpCondition.OrderedQ));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, n);
|
|
|
|
if (isFixed)
|
|
{
|
|
// BitConverter.Int64BitsToDouble(fpScaled) == Math.Pow(2d, op.FBits)
|
|
long fpScaled = 0x3FF0000000000000L + op.FBits * 0x10000000000000L;
|
|
|
|
Operand fpScaledMask = X86GetScalar(context, fpScaled);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Mulsd, nRes, fpScaledMask);
|
|
}
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Roundsd, nRes, Const(X86GetRoundControl(roundMode)));
|
|
|
|
Operand zero = context.VectorZero();
|
|
|
|
Operand nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
long fpMaxVal = op.RegisterSize == RegisterSize.Int32
|
|
? 0x41E0000000000000L // 2147483648.0000000d (2147483648)
|
|
: 0x43E0000000000000L; // 9.2233720368547760E18d (9223372036854775808)
|
|
|
|
Operand fpMaxValMask = X86GetScalar(context, fpMaxVal);
|
|
|
|
Operand nIntOrLong = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Subsd, nRes, fpMaxValMask);
|
|
|
|
nCmp = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, zero, Const((int)CmpCondition.NotLessThanOrEqual));
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Pand, nRes, nCmp);
|
|
|
|
Operand nIntOrLong2 = op.RegisterSize == RegisterSize.Int32
|
|
? context.AddIntrinsicInt (Intrinsic.X86Cvtsd2si, nRes)
|
|
: context.AddIntrinsicLong(Intrinsic.X86Cvtsd2si, nRes);
|
|
|
|
nRes = context.AddIntrinsic(Intrinsic.X86Cmpsd, nRes, fpMaxValMask, Const((int)CmpCondition.NotLessThan));
|
|
|
|
Operand nLong = context.AddIntrinsicLong(Intrinsic.X86Cvtsi2si, nRes);
|
|
|
|
if (op.RegisterSize == RegisterSize.Int32)
|
|
{
|
|
nLong = context.ConvertI64ToI32(nLong);
|
|
}
|
|
|
|
Operand dRes = context.BitwiseExclusiveOr(nIntOrLong2, nLong);
|
|
dRes = context.Add(dRes, nIntOrLong);
|
|
|
|
SetIntOrZR(context, op.Rd, dRes);
|
|
}
|
|
}
|
|
|
|
private static Operand EmitVectorLongExtract(ArmEmitterContext context, int reg, int index, int size)
|
|
{
|
|
OperandType type = size == 3 ? OperandType.I64 : OperandType.I32;
|
|
|
|
return context.VectorExtract(type, GetVec(reg), index);
|
|
}
|
|
}
|
|
}
|