From 3edb66f389ac279bdcde26c5682aa39b9bf5f853 Mon Sep 17 00:00:00 2001 From: gdkchan Date: Sun, 4 Mar 2018 14:09:59 -0300 Subject: [PATCH] Improve CPU initial translation speeds (#50) * Add background translation to the CPU * Do not use a separate thread for translation, implement 2 tiers translation * Remove unnecessary usings * Lower MinCallCountForReJit * Remove unused variable --- ChocolArm64/ATranslatedSub.cs | 104 +++++++++++------- ChocolArm64/ATranslatedSubType.cs | 9 ++ ChocolArm64/ATranslator.cs | 81 +++++++++++--- ChocolArm64/Decoder/ADecoder.cs | 16 ++- ChocolArm64/Instruction/AInstEmitException.cs | 13 +++ ChocolArm64/Instruction/AInstEmitFlow.cs | 81 ++++++++++++-- ChocolArm64/Translation/AILEmitter.cs | 42 +++---- ChocolArm64/Translation/AILEmitterCtx.cs | 58 +++++++--- ChocolArm64/Translation/ALocalAlloc.cs | 18 +-- .../OsHle/Services/Set/ServiceSetSys.cs | 4 +- 10 files changed, 319 insertions(+), 107 deletions(-) create mode 100644 ChocolArm64/ATranslatedSubType.cs diff --git a/ChocolArm64/ATranslatedSub.cs b/ChocolArm64/ATranslatedSub.cs index 71a6793a..414038ab 100644 --- a/ChocolArm64/ATranslatedSub.cs +++ b/ChocolArm64/ATranslatedSub.cs @@ -2,6 +2,7 @@ using ChocolArm64.Memory; using ChocolArm64.State; using System; using System.Collections.Generic; +using System.Collections.ObjectModel; using System.Reflection; using System.Reflection.Emit; @@ -13,35 +14,47 @@ namespace ChocolArm64 private AA64Subroutine ExecDelegate; - private bool HasDelegate; - - public static Type[] FixedArgTypes { get; private set; } - public static int StateArgIdx { get; private set; } public static int MemoryArgIdx { get; private set; } + public static Type[] FixedArgTypes { get; private set; } + public DynamicMethod Method { get; private set; } - public HashSet SubCalls { get; private set; } + public ReadOnlyCollection Params { get; private set; } - public List Params { get; private set; } + private HashSet Callees; - public bool NeedsReJit { get; private set; } + private ATranslatedSubType Type; - public ATranslatedSub() + private int CallCount; + + private bool NeedsReJit; + + private int MinCallCountForReJit = 250; + + public ATranslatedSub(DynamicMethod Method, List Params, HashSet Callees) { - SubCalls = new HashSet(); - } + if (Method == null) + { + throw new ArgumentNullException(nameof(Method)); + } - public ATranslatedSub(DynamicMethod Method, List Params) : this() - { if (Params == null) { throw new ArgumentNullException(nameof(Params)); } - this.Method = Method; - this.Params = Params; + if (Callees == null) + { + throw new ArgumentNullException(nameof(Callees)); + } + + this.Method = Method; + this.Params = Params.AsReadOnly(); + this.Callees = Callees; + + PrepareDelegate(); } static ATranslatedSub() @@ -69,36 +82,53 @@ namespace ChocolArm64 } } - public long Execute(AThreadState ThreadState, AMemory Memory) + private void PrepareDelegate() { - if (!HasDelegate) + string Name = $"{Method.Name}_Dispatch"; + + DynamicMethod Mthd = new DynamicMethod(Name, typeof(long), FixedArgTypes); + + ILGenerator Generator = Mthd.GetILGenerator(); + + Generator.EmitLdargSeq(FixedArgTypes.Length); + + foreach (ARegister Reg in Params) { - string Name = $"{Method.Name}_Dispatch"; + Generator.EmitLdarg(StateArgIdx); - DynamicMethod Mthd = new DynamicMethod(Name, typeof(long), FixedArgTypes); - - ILGenerator Generator = Mthd.GetILGenerator(); - - Generator.EmitLdargSeq(FixedArgTypes.Length); - - foreach (ARegister Reg in Params) - { - Generator.EmitLdarg(StateArgIdx); - - Generator.Emit(OpCodes.Ldfld, Reg.GetField()); - } - - Generator.Emit(OpCodes.Call, Method); - Generator.Emit(OpCodes.Ret); - - ExecDelegate = (AA64Subroutine)Mthd.CreateDelegate(typeof(AA64Subroutine)); - - HasDelegate = true; + Generator.Emit(OpCodes.Ldfld, Reg.GetField()); } + Generator.Emit(OpCodes.Call, Method); + Generator.Emit(OpCodes.Ret); + + ExecDelegate = (AA64Subroutine)Mthd.CreateDelegate(typeof(AA64Subroutine)); + } + + public bool ShouldReJit() + { + if (Type == ATranslatedSubType.SubTier0) + { + if (CallCount < MinCallCountForReJit) + { + CallCount++; + } + + return CallCount == MinCallCountForReJit; + } + + return Type == ATranslatedSubType.SubTier1 && NeedsReJit; + } + + public long Execute(AThreadState ThreadState, AMemory Memory) + { return ExecDelegate(ThreadState, Memory); } - public void MarkForReJit() => NeedsReJit = true; + public void SetType(ATranslatedSubType Type) => this.Type = Type; + + public bool HasCallee(long Position) => Callees.Contains(Position); + + public void MarkForReJit() => NeedsReJit = true; } } \ No newline at end of file diff --git a/ChocolArm64/ATranslatedSubType.cs b/ChocolArm64/ATranslatedSubType.cs new file mode 100644 index 00000000..e9f3e0bf --- /dev/null +++ b/ChocolArm64/ATranslatedSubType.cs @@ -0,0 +1,9 @@ +namespace ChocolArm64 +{ + enum ATranslatedSubType + { + SubBlock, + SubTier0, + SubTier1 + } +} \ No newline at end of file diff --git a/ChocolArm64/ATranslator.cs b/ChocolArm64/ATranslator.cs index 2daf7bbc..ab434e22 100644 --- a/ChocolArm64/ATranslator.cs +++ b/ChocolArm64/ATranslator.cs @@ -7,11 +7,14 @@ using System; using System.Collections.Concurrent; using System.Collections.Generic; using System.Reflection.Emit; +using System.Threading; namespace ChocolArm64 { public class ATranslator { + private HashSet SubBlocks; + private ConcurrentDictionary CachedSubs; private ConcurrentDictionary SymbolTable; @@ -24,6 +27,8 @@ namespace ChocolArm64 public ATranslator(IReadOnlyDictionary SymbolTable = null) { + SubBlocks = new HashSet(); + CachedSubs = new ConcurrentDictionary(); if (SymbolTable != null) @@ -38,9 +43,9 @@ namespace ChocolArm64 KeepRunning = true; } - public void StopExecution() => KeepRunning = false; + internal void StopExecution() => KeepRunning = false; - public void ExecuteSubroutine(AThread Thread, long Position) + internal void ExecuteSubroutine(AThread Thread, long Position) { do { @@ -54,9 +59,14 @@ namespace ChocolArm64 CpuTrace?.Invoke(this, new ACpuTraceEventArgs(Position, SubName)); } - if (!CachedSubs.TryGetValue(Position, out ATranslatedSub Sub) || Sub.NeedsReJit) + if (!CachedSubs.TryGetValue(Position, out ATranslatedSub Sub)) { - Sub = TranslateSubroutine(Thread.Memory, Position); + Sub = TranslateTier0(Thread.Memory, Position); + } + + if (Sub.ShouldReJit()) + { + TranslateTier1(Thread.Memory, Position); } Position = Sub.Execute(Thread.ThreadState, Thread.Memory); @@ -86,19 +96,57 @@ namespace ChocolArm64 return CachedSubs.ContainsKey(Position); } - private ATranslatedSub TranslateSubroutine(AMemory Memory, long Position) + private ATranslatedSub TranslateTier0(AMemory Memory, long Position) + { + ABlock Block = ADecoder.DecodeBasicBlock(this, Memory, Position); + + ABlock[] Graph = new ABlock[] { Block }; + + string SubName = GetSubName(Position); + + AILEmitterCtx Context = new AILEmitterCtx(this, Graph, Block, SubName); + + do + { + Context.EmitOpCode(); + } + while (Context.AdvanceOpCode()); + + ATranslatedSub Subroutine = Context.GetSubroutine(); + + if (SubBlocks.Contains(Position)) + { + SubBlocks.Remove(Position); + + Subroutine.SetType(ATranslatedSubType.SubBlock); + } + else + { + Subroutine.SetType(ATranslatedSubType.SubTier0); + } + + CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine); + + AOpCode LastOp = Block.GetLastOp(); + + if (LastOp.Emitter != AInstEmit.Ret && + LastOp.Emitter != AInstEmit.Br) + { + SubBlocks.Add(LastOp.Position + 4); + } + + return Subroutine; + } + + private void TranslateTier1(AMemory Memory, long Position) { (ABlock[] Graph, ABlock Root) Cfg = ADecoder.DecodeSubroutine(this, Memory, Position); - string SubName = SymbolTable.GetOrAdd(Position, $"Sub{Position:x16}"); + string SubName = GetSubName(Position); PropagateName(Cfg.Graph, SubName); - AILEmitterCtx Context = new AILEmitterCtx( - this, - Cfg.Graph, - Cfg.Root, - SubName); + AILEmitterCtx Context = new AILEmitterCtx(this, Cfg.Graph, Cfg.Root, SubName); if (Context.CurrBlock.Position != Position) { @@ -115,7 +163,7 @@ namespace ChocolArm64 //since we can now call it directly which is faster. foreach (ATranslatedSub TS in CachedSubs.Values) { - if (TS.SubCalls.Contains(Position)) + if (TS.HasCallee(Position)) { TS.MarkForReJit(); } @@ -123,9 +171,14 @@ namespace ChocolArm64 ATranslatedSub Subroutine = Context.GetSubroutine(); - CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine); + Subroutine.SetType(ATranslatedSubType.SubTier1); - return Subroutine; + CachedSubs.AddOrUpdate(Position, Subroutine, (Key, OldVal) => Subroutine); + } + + private string GetSubName(long Position) + { + return SymbolTable.GetOrAdd(Position, $"Sub{Position:x16}"); } private void PropagateName(ABlock[] Graph, string Name) diff --git a/ChocolArm64/Decoder/ADecoder.cs b/ChocolArm64/Decoder/ADecoder.cs index 44302290..2375c185 100644 --- a/ChocolArm64/Decoder/ADecoder.cs +++ b/ChocolArm64/Decoder/ADecoder.cs @@ -18,6 +18,18 @@ namespace ChocolArm64.Decoder OpActivators = new ConcurrentDictionary(); } + public static ABlock DecodeBasicBlock( + ATranslator Translator, + AMemory Memory, + long Start) + { + ABlock Block = new ABlock(Start); + + FillBlock(Memory, Block); + + return Block; + } + public static (ABlock[] Graph, ABlock Root) DecodeSubroutine( ATranslator Translator, AMemory Memory, @@ -72,8 +84,8 @@ namespace ChocolArm64.Decoder } } - if ((!(LastOp is AOpCodeBImmAl) && - !(LastOp is AOpCodeBReg)) || HasCachedSub) + if (!((LastOp is AOpCodeBImmAl) || + (LastOp is AOpCodeBReg)) || HasCachedSub) { Current.Next = Enqueue(Current.EndPosition); } diff --git a/ChocolArm64/Instruction/AInstEmitException.cs b/ChocolArm64/Instruction/AInstEmitException.cs index 209ba56f..fe348edd 100644 --- a/ChocolArm64/Instruction/AInstEmitException.cs +++ b/ChocolArm64/Instruction/AInstEmitException.cs @@ -2,6 +2,7 @@ using ChocolArm64.Decoder; using ChocolArm64.State; using ChocolArm64.Translation; using System.Reflection; +using System.Reflection.Emit; namespace ChocolArm64.Instruction { @@ -37,6 +38,12 @@ namespace ChocolArm64.Instruction { Context.EmitLoadState(Context.CurrBlock.Next); } + else + { + Context.EmitLdc_I8(Op.Position + 4); + + Context.Emit(OpCodes.Ret); + } } public static void Und(AILEmitterCtx Context) @@ -60,6 +67,12 @@ namespace ChocolArm64.Instruction { Context.EmitLoadState(Context.CurrBlock.Next); } + else + { + Context.EmitLdc_I8(Op.Position + 4); + + Context.Emit(OpCodes.Ret); + } } } } \ No newline at end of file diff --git a/ChocolArm64/Instruction/AInstEmitFlow.cs b/ChocolArm64/Instruction/AInstEmitFlow.cs index be68aa6c..91262834 100644 --- a/ChocolArm64/Instruction/AInstEmitFlow.cs +++ b/ChocolArm64/Instruction/AInstEmitFlow.cs @@ -11,14 +11,24 @@ namespace ChocolArm64.Instruction { AOpCodeBImmAl Op = (AOpCodeBImmAl)Context.CurrOp; - Context.Emit(OpCodes.Br, Context.GetLabel(Op.Imm)); + if (Context.CurrBlock.Branch != null) + { + Context.Emit(OpCodes.Br, Context.GetLabel(Op.Imm)); + } + else + { + Context.EmitStoreState(); + Context.EmitLdc_I8(Op.Imm); + + Context.Emit(OpCodes.Ret); + } } public static void B_Cond(AILEmitterCtx Context) { AOpCodeBImmCond Op = (AOpCodeBImmCond)Context.CurrOp; - Context.EmitCondBranch(Context.GetLabel(Op.Imm), Op.Cond); + EmitBranch(Context, Op.Cond); } public static void Bl(AILEmitterCtx Context) @@ -48,10 +58,7 @@ namespace ChocolArm64.Instruction Context.Emit(OpCodes.Pop); - if (Context.CurrBlock.Next != null) - { - Context.EmitLoadState(Context.CurrBlock.Next); - } + Context.EmitLoadState(Context.CurrBlock.Next); } else { @@ -93,7 +100,7 @@ namespace ChocolArm64.Instruction Context.EmitLdintzr(Op.Rt); Context.EmitLdc_I(0); - Context.Emit(ILOp, Context.GetLabel(Op.Imm)); + EmitBranch(Context, ILOp); } public static void Ret(AILEmitterCtx Context) @@ -118,7 +125,65 @@ namespace ChocolArm64.Instruction Context.EmitLdc_I(0); - Context.Emit(ILOp, Context.GetLabel(Op.Imm)); + EmitBranch(Context, ILOp); + } + + private static void EmitBranch(AILEmitterCtx Context, ACond Cond) + { + AOpCodeBImm Op = (AOpCodeBImm)Context.CurrOp; + + if (Context.CurrBlock.Next != null && + Context.CurrBlock.Branch != null) + { + Context.EmitCondBranch(Context.GetLabel(Op.Imm), Cond); + } + else + { + Context.EmitStoreState(); + + AILLabel LblTaken = new AILLabel(); + + Context.EmitCondBranch(LblTaken, Cond); + + Context.EmitLdc_I8(Op.Position + 4); + + Context.Emit(OpCodes.Ret); + + Context.MarkLabel(LblTaken); + + Context.EmitLdc_I8(Op.Imm); + + Context.Emit(OpCodes.Ret); + } + } + + private static void EmitBranch(AILEmitterCtx Context, OpCode ILOp) + { + AOpCodeBImm Op = (AOpCodeBImm)Context.CurrOp; + + if (Context.CurrBlock.Next != null && + Context.CurrBlock.Branch != null) + { + Context.Emit(ILOp, Context.GetLabel(Op.Imm)); + } + else + { + Context.EmitStoreState(); + + AILLabel LblTaken = new AILLabel(); + + Context.Emit(ILOp, LblTaken); + + Context.EmitLdc_I8(Op.Position + 4); + + Context.Emit(OpCodes.Ret); + + Context.MarkLabel(LblTaken); + + Context.EmitLdc_I8(Op.Imm); + + Context.Emit(OpCodes.Ret); + } } } } \ No newline at end of file diff --git a/ChocolArm64/Translation/AILEmitter.cs b/ChocolArm64/Translation/AILEmitter.cs index 8f6e1210..af37a6c7 100644 --- a/ChocolArm64/Translation/AILEmitter.cs +++ b/ChocolArm64/Translation/AILEmitter.cs @@ -58,11 +58,13 @@ namespace ChocolArm64.Translation this.Root = ILBlocks[Array.IndexOf(Graph, Root)]; } - public ATranslatedSub GetSubroutine() + public AILBlock GetILBlock(int Index) => ILBlocks[Index]; + + public ATranslatedSub GetSubroutine(HashSet Callees) { LocalAlloc = new ALocalAlloc(ILBlocks, Root); - InitSubroutine(); + InitSubroutine(Callees); InitLocals(); foreach (AILBlock ILBlock in ILBlocks) @@ -73,24 +75,7 @@ namespace ChocolArm64.Translation return Subroutine; } - public AILBlock GetILBlock(int Index) => ILBlocks[Index]; - - private void InitLocals() - { - int ParamsStart = ATranslatedSub.FixedArgTypes.Length; - - Locals = new Dictionary(); - - for (int Index = 0; Index < Subroutine.Params.Count; Index++) - { - ARegister Reg = Subroutine.Params[Index]; - - Generator.EmitLdarg(Index + ParamsStart); - Generator.EmitStloc(GetLocalIndex(Reg)); - } - } - - private void InitSubroutine() + private void InitSubroutine(HashSet Callees) { List Params = new List(); @@ -114,9 +99,24 @@ namespace ChocolArm64.Translation Generator = Mthd.GetILGenerator(); - Subroutine = new ATranslatedSub(Mthd, Params); + Subroutine = new ATranslatedSub(Mthd, Params, Callees); } + private void InitLocals() + { + int ParamsStart = ATranslatedSub.FixedArgTypes.Length; + + Locals = new Dictionary(); + + for (int Index = 0; Index < Subroutine.Params.Count; Index++) + { + ARegister Reg = Subroutine.Params[Index]; + + Generator.EmitLdarg(Index + ParamsStart); + Generator.EmitStloc(GetLocalIndex(Reg)); + } + } + private Type[] GetParamTypes(IList Params) { Type[] FixedArgs = ATranslatedSub.FixedArgTypes; diff --git a/ChocolArm64/Translation/AILEmitterCtx.cs b/ChocolArm64/Translation/AILEmitterCtx.cs index ffcfa851..46659469 100644 --- a/ChocolArm64/Translation/AILEmitterCtx.cs +++ b/ChocolArm64/Translation/AILEmitterCtx.cs @@ -12,14 +12,9 @@ namespace ChocolArm64.Translation { private ATranslator Translator; - private Dictionary Labels; + private HashSet Callees; - private AILEmitter Emitter; - - private AILBlock ILBlock; - - private AOpCode OptOpLastCompare; - private AOpCode OptOpLastFlagSet; + private Dictionary Labels; private int BlkIndex; private int OpcIndex; @@ -29,6 +24,13 @@ namespace ChocolArm64.Translation public ABlock CurrBlock => Graph[BlkIndex]; public AOpCode CurrOp => Graph[BlkIndex].OpCodes[OpcIndex]; + private AILEmitter Emitter; + + private AILBlock ILBlock; + + private AOpCode OptOpLastCompare; + private AOpCode OptOpLastFlagSet; + //This is the index of the temporary register, used to store temporary //values needed by some functions, since IL doesn't have a swap instruction. //You can use any value here as long it doesn't conflict with the indices @@ -45,10 +47,27 @@ namespace ChocolArm64.Translation ABlock Root, string SubName) { + if (Translator == null) + { + throw new ArgumentNullException(nameof(Translator)); + } + + if (Graph == null) + { + throw new ArgumentNullException(nameof(Graph)); + } + + if (Root == null) + { + throw new ArgumentNullException(nameof(Root)); + } + this.Translator = Translator; this.Graph = Graph; this.Root = Root; + Callees = new HashSet(); + Labels = new Dictionary(); Emitter = new AILEmitter(Graph, Root, SubName); @@ -57,23 +76,27 @@ namespace ChocolArm64.Translation OpcIndex = -1; - if (!AdvanceOpCode()) + if (Graph.Length == 0 || !AdvanceOpCode()) { throw new ArgumentException(nameof(Graph)); } } - public ATranslatedSub GetSubroutine() => Emitter.GetSubroutine(); + public ATranslatedSub GetSubroutine() + { + return Emitter.GetSubroutine(Callees); + } public bool AdvanceOpCode() { + if (OpcIndex + 1 == CurrBlock.OpCodes.Count && + BlkIndex + 1 == Graph.Length) + { + return false; + } + while (++OpcIndex >= (CurrBlock?.OpCodes.Count ?? 0)) { - if (BlkIndex + 1 >= Graph.Length) - { - return false; - } - BlkIndex++; OpcIndex = -1; @@ -100,6 +123,13 @@ namespace ChocolArm64.Translation public bool TryOptEmitSubroutineCall() { + Callees.Add(((AOpCodeBImm)CurrOp).Imm); + + if (CurrBlock.Next == null) + { + return false; + } + if (!Translator.TryGetCachedSub(CurrOp, out ATranslatedSub Sub)) { return false; diff --git a/ChocolArm64/Translation/ALocalAlloc.cs b/ChocolArm64/Translation/ALocalAlloc.cs index f23af9c7..8e904780 100644 --- a/ChocolArm64/Translation/ALocalAlloc.cs +++ b/ChocolArm64/Translation/ALocalAlloc.cs @@ -67,14 +67,15 @@ namespace ChocolArm64.Translation public long VecOutputs; } - private const int MaxOptGraphLength = 55; + private const int MaxOptGraphLength = 40; public ALocalAlloc(AILBlock[] Graph, AILBlock Root) { IntPaths = new Dictionary(); VecPaths = new Dictionary(); - if (Graph.Length < MaxOptGraphLength) + if (Graph.Length > 1 && + Graph.Length < MaxOptGraphLength) { InitializeOptimal(Graph, Root); } @@ -179,10 +180,8 @@ namespace ChocolArm64.Translation { //This is WAY faster than InitializeOptimal, but results in //uneeded loads and stores, so the resulting code will be slower. - long IntInputs = 0; - long IntOutputs = 0; - long VecInputs = 0; - long VecOutputs = 0; + long IntInputs = 0, IntOutputs = 0; + long VecInputs = 0, VecOutputs = 0; foreach (AILBlock Block in Graph) { @@ -196,8 +195,11 @@ namespace ChocolArm64.Translation //in those cases if we attempt to write an output registers that was //not written, we will be just writing zero and messing up the old register value. //So we just need to ensure that all outputs are loaded. - IntInputs |= IntOutputs; - VecInputs |= VecOutputs; + if (Graph.Length > 1) + { + IntInputs |= IntOutputs; + VecInputs |= VecOutputs; + } foreach (AILBlock Block in Graph) { diff --git a/Ryujinx.Core/OsHle/Services/Set/ServiceSetSys.cs b/Ryujinx.Core/OsHle/Services/Set/ServiceSetSys.cs index 41c5d8b3..dee6573d 100644 --- a/Ryujinx.Core/OsHle/Services/Set/ServiceSetSys.cs +++ b/Ryujinx.Core/OsHle/Services/Set/ServiceSetSys.cs @@ -1,6 +1,4 @@ -using ChocolArm64.Memory; -using Ryujinx.Core.OsHle.Ipc; -using System; +using Ryujinx.Core.OsHle.Ipc; using System.Collections.Generic; namespace Ryujinx.Core.OsHle.IpcServices.Set