Skip to content

Commit

Permalink
Update Runtime to support Apple Silicon
Browse files Browse the repository at this point in the history
- Update entry point call for Apple arm64 function calling convention
- New Assembler routines for function calls
  • Loading branch information
rhuanjl committed Mar 22, 2023
1 parent 015b05d commit edb1ee9
Show file tree
Hide file tree
Showing 12 changed files with 412 additions and 7 deletions.
5 changes: 4 additions & 1 deletion lib/Runtime/Debug/TTEventLog.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//-------------------------------------------------------------------------------------------------------
// Copyright (C) Microsoft. All rights reserved.
// Copyright (c) ChakraCore Project Contributors. All rights reserved.
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
//-------------------------------------------------------------------------------------------------------
#include "RuntimeDebugPch.h"
Expand Down Expand Up @@ -2816,7 +2817,9 @@ namespace TTD
TTDAssert(wcscmp(_u("x86"), archString.Contents) == 0, "Mismatch in arch between record and replay!!!");
#elif defined(_M_X64)
TTDAssert(wcscmp(_u("x64"), archString.Contents) == 0, "Mismatch in arch between record and replay!!!");
#elif defined(_M_ARM)
#elif defined(_M_ARM) // #TODO investigate why this is checking for "arm64" instead of "arm"
TTDAssert(wcscmp(_u("arm64"), archString.Contents) == 0, "Mismatch in arch between record and replay!!!");
#elif defined(_M_ARM64)
TTDAssert(wcscmp(_u("arm64"), archString.Contents) == 0, "Mismatch in arch between record and replay!!!");
#else
TTDAssert(false, "Unknown arch!!!");
Expand Down
4 changes: 4 additions & 0 deletions lib/Runtime/Language/Arguments.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//-------------------------------------------------------------------------------------------------------
// Copyright (C) Microsoft. All rights reserved.
// Copyright (c) ChakraCore Project Contributors. All rights reserved.
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
//-------------------------------------------------------------------------------------------------------
#pragma once
Expand Down Expand Up @@ -82,6 +83,9 @@ inline int _count_args(const T1&, const T2&, const T3&, const T4&, const T5&, Js
// xplat-todo: fix me ARM
#define CALL_ENTRYPOINT_NOASSERT(entryPoint, function, callInfo, ...) \
entryPoint(function, callInfo, ##__VA_ARGS__)
#elif defined (_ARM64_)
#define CALL_ENTRYPOINT_NOASSERT(entryPoint, function, callInfo, ...) \
entryPoint(function, callInfo, function, callInfo, ##__VA_ARGS__)
#else
#error CALL_ENTRYPOINT_NOASSERT not yet implemented
#endif
Expand Down
6 changes: 6 additions & 0 deletions lib/Runtime/Language/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,12 @@ elseif(CC_TARGETS_X86)
i386/AsmJsJitTemplate.cpp
i386/StackFrame.cpp
)
elseif(CC_TARGETS_ARM64)
set (CRL_SOURCE_FILES ${CRL_SOURCE_FILES}
arm64/StackFrame.cpp
arm64/arm64_Thunks.S
arm64/arm64_CallEhFrame.S
)
elseif(CC_TARGETS_ARM)
set (CRL_SOURCE_FILES ${CRL_SOURCE_FILES}
arm/StackFrame.cpp
Expand Down
16 changes: 12 additions & 4 deletions lib/Runtime/Language/InterpreterStackFrame.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
//-------------------------------------------------------------------------------------------------------
// Copyright (C) Microsoft Corporation and contributors. All rights reserved.
// Copyright (c) 2021 ChakraCore Project Contributors. All rights reserved.
// Copyright (c) ChakraCore Project Contributors. All rights reserved.
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
//-------------------------------------------------------------------------------------------------------

Expand Down Expand Up @@ -1814,13 +1814,13 @@ namespace Js
}
#endif

#if !defined(_M_ARM64)
#if defined(_M_ARM64) && defined(_WIN32)
// Language\arm64\arm64_Thunks.asm
#else
Var InterpreterStackFrame::StaticInterpreterThunk(RecyclableObject* function, CallInfo callInfo, ...)
{
return InterpreterThunk((JavascriptCallStackLayout*)&function);
}
#else
// Language\arm64\arm64_Thunks.asm
#endif
#pragma optimize("", on)

Expand Down Expand Up @@ -6352,7 +6352,11 @@ namespace Js
// For ARM we need to make sure that pipeline is synchronized with memory/cache for newly jitted code.
// Note: this does not seem to affect perf, but if it was, we could add a boolean isCalled to EntryPointInfo
// and do ISB only for 1st time this entry point is called (potential working set regression though).
#if defined(_InstructionSynchronizationBarrier)
_InstructionSynchronizationBarrier();
#else
asm("isb");
#endif
#endif
uint newOffset = ::Math::PointerCastToIntegral<uint>(
CALL_ENTRYPOINT_NOASSERT(address, function, CallInfo(CallFlags_InternalFrame, 1), this));
Expand Down Expand Up @@ -6386,7 +6390,11 @@ namespace Js
// For ARM we need to make sure that pipeline is synchronized with memory/cache for newly jitted code.
// Note: this does not seem to affect perf, but if it was, we could add a boolean isCalled to EntryPointInfo
// and do ISB only for 1st time this entry point is called (potential working set regression though).
#if defined(_InstructionSynchronizationBarrier)
_InstructionSynchronizationBarrier();
#else
asm("isb");
#endif
#endif
uint newOffset = ::Math::PointerCastToIntegral<uint>(
CALL_ENTRYPOINT_NOASSERT(address, function, CallInfo(CallFlags_InternalFrame, 1), this));
Expand Down
3 changes: 2 additions & 1 deletion lib/Runtime/Language/JavascriptStackWalker.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//-------------------------------------------------------------------------------------------------------
// Copyright (C) Microsoft. All rights reserved.
// Copyright (c) ChakraCore Project Contributors. All rights reserved.
// Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
//-------------------------------------------------------------------------------------------------------
#include "RuntimeLanguagePch.h"
Expand Down Expand Up @@ -278,7 +279,7 @@ namespace Js
}
else
#endif
if (this->GetCurrentFunction()->GetFunctionInfo()->IsCoroutine())
if (this->GetCurrentFunction()->GetFunctionInfo()->IsCoroutine())
{
JavascriptGenerator* gen = VarTo<JavascriptGenerator>(this->GetCurrentArgv()[JavascriptFunctionArgIndex_This]);
return gen->GetArguments().Values;
Expand Down
136 changes: 136 additions & 0 deletions lib/Runtime/Language/arm64/arm64_CallEhFrame.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
;-------------------------------------------------------------------------------------------------------
; Copyright (C) Microsoft. All rights reserved.
; Copyright (c) ChakraCore Project Contributors. All rights reserved.
; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
;-------------------------------------------------------------------------------------------------------

;
; arm64_CallEhFrame() and arm64_CallCatch() both thunk into jitted code at the
; start of an EH region. The purpose is to restore the frame pointer (fp)
; and locals pointer (x28) to the appropriate values for executing the parent
; function and to create a local frame that can be unwound using the parent
; function's pdata. The parent's frame looks like this:
;
;-------------------
; {x0-x7} -- homed parameters
; lr -- address from which parent was called
; fp -- saved frame pointer, pointed to by current fp
; arg obj
; {x19-x28} -- non-volatile registers: all of them are saved
; {q8-q15} -- non-volatile double registers: all of them are saved
; locals area -- pointed to by x28
; pointer to non-volatile register area above
; stack args
;-------------------
;
; The reason for the "pointer to non-volatile register area" is to allow the
; unwinder to deallocate the locals area regardless of its size. So this thunk can skip
; the allocation of the locals area altogether, and unwinding still works.
; The unwind pseudo-codes for the above prolog look like:
;
; 1. Deallocate stack args (sp now points to "pointer to non-volatile register area")
; 2. Restore rN (rN now points to first saved register)
; 3. Copy rN to sp (sp now points to first saved register)
; 4. Restore {q8-q15} (non-volatile double registers restored)
; 5. Restore {x19-x28} (non-volatile registers restored, sp points to saved r11)
; 6. Restore fp
; 7. Load lr into pc and deallocate remaining stack.
;
; The prologs for the assembly thunks allocate a frame that can be unwound by executing
; the above steps, although we don't allocate a locals area and don't know the size of the
; stack args. The caller doesn't return to this thunk; it executes its own epilog and
; returns to the caller of the thunk (one of the runtime try helpers).


#include "unixasmmacros.inc"

.global C_FUNC(arm64_CallEhFrame)
.global C_FUNC(arm64_CallCatch)

.macro STANDARD_PROLOG

;
; Generate a prolog that will match the original function's, with all
; parameters homed and all non-volatile registers saved:
;
; Size Offset
; ---- ------
; 64 176 Homed parameters
; 16 160 Saved FP/LR
; 16 144 ArgOut / stack function list
; 80 64 Saved x19-x28
; 64 0 Saved d8-d15
; = 240 total
;
; The try/catch/finally blocks will jump to the epilog code skipping
; the instruction that deallocates the locals, in order to allow these
; thunks to skip re-allocating locals space.
;

; Params:
; x0 -- thunk target
; x1 -- frame pointer
; x2 -- locals pointer
; x3 -- size of stack args area
; x4 -- exception object (for arm64_CallCatch only)

PROLOG_SAVE_REG_PAIR d8, d9, -240
PROLOG_SAVE_REG_PAIR d10, d11, 16
PROLOG_SAVE_REG_PAIR d12, d13, 32
PROLOG_SAVE_REG_PAIR d14, d15, 48
PROLOG_SAVE_REG_PAIR x19, x20, 64
PROLOG_SAVE_REG_PAIR x21, x22, 80
PROLOG_SAVE_REG_PAIR x23, x24, 96
PROLOG_SAVE_REG_PAIR x25, x26, 112
PROLOG_SAVE_REG_PAIR x27, x28, 128
PROLOG_SAVE_REG fp, 160 ; TODO: verify that this works the same as PROLOG_SAVE_REG_PAIR_NO_FP
PROLOG_SAVE_REG lr, 168

sub x15, x1, x2 ; x15 = frame pointer minus locals pointer
sub x15, x15, #160 ; x15 -= space we already allocated
add x15, x15, x3 ; x15 += argout area = same stack allocation as original function
lsr x15, x15, #4 ; x15 /= 16
sub sp, sp, x15, lsl #4 ; allocate the stack

.endm



NESTED_ENTRY arm64_CallEhFrame, _TEXT, NoHandler

STANDARD_PROLOG

; Set up the locals pointer and frame pointer
mov x28, x2
mov fp, x1

; Thunk to the jitted code (and don't return)
br x0

NESTED_END arm64_CallEhFrame



; arm64_CallCatch() is similar to arm64_CallEhFrame() except that we also pass the catch object to the jitted code

NESTED_ENTRY arm64_CallCatch, _TEXT, NoHandler

; Params:
; x0 -- thunk target
; x1 -- frame pointer
; x2 -- locals pointer
; x3 -- size of stack args area
; x4 -- exception object

STANDARD_PROLOG

; Set up the locals pointer and frame pointer and catch object handler
mov x28, x2
mov fp, x1
mov x1, x4

; Thunk to the jitted code (and don't return)
br x0

NESTED_END arm64_CallCatch

96 changes: 96 additions & 0 deletions lib/Runtime/Language/arm64/arm64_Thunks.S
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
;-------------------------------------------------------------------------------------------------------
; Copyright (C) Microsoft. All rights reserved.
; Copyright (c) ChakraCore Project Contributors. All rights reserved.
; Licensed under the MIT license. See LICENSE.txt file in the project root for full license information.
;-------------------------------------------------------------------------------------------------------

#include "unixasmmacros.inc"

.global C_FUNC(_ZN2Js13ScriptContext31ProfileModeDeferredParsingThunkEPNS_16RecyclableObjectENS_8CallInfoEz)
.global C_FUNC(_ZN2Js13ScriptContext35ProfileModeDeferredDeserializeThunkEPNS_16RecyclableObjectENS_8CallInfoEz)

#ifdef _ENABLE_DYNAMIC_THUNKS

.global C_FUNC(_ZN2Js21InterpreterStackFrame28DelayDynamicInterpreterThunkEPNS_16RecyclableObjectENS_8CallInfoEz)
.global C_FUNC(_ZN2Js18DynamicProfileInfo29EnsureDynamicProfileInfoThunkEPNS_16RecyclableObjectENS_8CallInfoEz)

;;============================================================================================================
;; InterpreterStackFrame::DelayDynamicInterpreterThunk
;;============================================================================================================
;Var InterpreterStackFrame::DelayDynamicInterpreterThunk(RecyclableObject* function, CallInfo callInfo, ...)

NESTED_ENTRY _ZN2Js21InterpreterStackFrame28DelayDynamicInterpreterThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT, NoHandler

PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 ; save parameters and volatile registers
stp x0, x1, [sp, #16]

bl C_FUNC(_ZN2Js21InterpreterStackFrame29EnsureDynamicInterpreterThunkEPNS_14ScriptFunctionE) ; call InterpreterStackFrame::EnsureDynamicInterpreterThunk
mov x16, x0 ; back up entryPoint in x16

ldp x0, x1, [sp, #16] ; restore parameters and volatile registers

EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32
br x16 ; jump (tail call) to new entryPoint

NESTED_END _ZN2Js21InterpreterStackFrame28DelayDynamicInterpreterThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT

;;============================================================================================================
;; DynamicProfileInfo::EnsureDynamicProfileInfoThunk
;;============================================================================================================
;Var DynamicProfileInfo::EnsureDynamicProfileInfoThunk(RecyclableObject* function, CallInfo callInfo, ...)
NESTED_ENTRY _ZN2Js18DynamicProfileInfo29EnsureDynamicProfileInfoThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT, NoHandler

PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 ; save parameters and volatile registers
stp x0, x1, [sp, #16]

bl C_FUNC(_ZN2Js18DynamicProfileInfo24EnsureDynamicProfileInfoEPNS_14ScriptFunctionE) ; call DynamicProfileInfo::EnsureDynamicProfileInfo
mov x16, x0 ; back up entryPoint in x16

ldp x0, x1, [sp, #16] ; restore parameters and volatile registers

EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32
br x16 ; jump (tail call) to new entryPoint

NESTED_END _ZN2Js18DynamicProfileInfo29EnsureDynamicProfileInfoThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT

#endif

;;============================================================================================================
;; ScriptContext::ProfileModeDeferredParsingThunk
;;============================================================================================================
;; Var ScriptContext::ProfileModeDeferredParsingThunk(RecyclableObject* function, CallInfo callInfo, ...)
NESTED_ENTRY _ZN2Js13ScriptContext31ProfileModeDeferredParsingThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT, NoHandler

PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 ; save parameters and volatile registers
stp x0, x1, [sp, #16]

mov x0, sp ; Pass the address of the function at the saved x0 in case it need to be boxed
add x0, x0, #16 ; 16 is subtracted from the stack pointer when the a function is called, add it back here.
bl C_FUNC(_ZN2Js13ScriptContext24ProfileModeDeferredParseEPPNS_14ScriptFunctionE) ; call ScriptContext::ProfileModeDeferredParse
mov x16, x0 ; back up entryPoint in x16

ldp x0, x1, [sp, #16] ; restore parameters and volatile registers

EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32
br x16 ; jump (tail call) to new entryPoint

NESTED_END _ZN2Js13ScriptContext31ProfileModeDeferredParsingThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT

;;============================================================================================================
;; ScriptContext::ProfileModeDeferredDeserializeThunk
;;============================================================================================================
;; Var ScriptContext::ProfileModeDeferredDeserializeThunk(RecyclableObject* function, CallInfo callInfo, ...)
NESTED_ENTRY _ZN2Js13ScriptContext35ProfileModeDeferredDeserializeThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT, NoHandler

PROLOG_SAVE_REG_PAIR_INDEXED fp, lr, -32 ; save parameters and volatile registers
stp x0, x1, [sp, #16]

bl C_FUNC(_ZN2Js13ScriptContext30ProfileModeDeferredDeserializeEPNS_14ScriptFunctionE) ; call ScriptContext::ProfileModeDeferredDeserialize
mov x16, x0 ; back up entryPoint in x16

ldp x0, x1, [sp, #16] ; restore parameters and volatile registers

EPILOG_RESTORE_REG_PAIR_INDEXED fp, lr, 32
br x16 ; jump (tail call) to new entryPoint

NESTED_END _ZN2Js13ScriptContext35ProfileModeDeferredDeserializeThunkEPNS_16RecyclableObjectENS_8CallInfoEz, _TEXT
6 changes: 6 additions & 0 deletions lib/Runtime/Library/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,12 @@ elseif(CC_TARGETS_ARM)
set (CRLIB_SOURCE_CODES ${CRLIB_SOURCE_CODES}
arm/arm_JavascriptFunctionA.S
)
elseif(CC_TARGETS_ARM64)
set (CRLIB_SOURCE_CODES ${CRLIB_SOURCE_CODES}
arm64/arm64_CallFunction.S
arm64/arm64_DeferredDeserializeThunk.S
arm64/arm64_DeferredParsingThunk.S
)
endif()

add_library (Chakra.Runtime.Library OBJECT ${CRLIB_SOURCE_CODES})
Expand Down
Loading

0 comments on commit edb1ee9

Please # to comment.