DeerEngine/Deer/vendor/angelScript/src/as_callfunc_arm64_xcode.S

214 lines
5.1 KiB
ArmAsm
Executable File

//
// AngelCode Scripting Library
// Copyright (c) 2020-2024 Andreas Jonsson
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any
// damages arising from the use of this software.
//
// Permission is granted to anyone to use this software for any
// purpose, including commercial applications, and to alter it and
// redistribute it freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented// you
// must not claim that you wrote the original software. If you use
// this software in a product, an acknowledgment in the product
// documentation would be appreciated but is not required.
//
// 2. Altered source versions must be plainly marked as such, and
// must not be misrepresented as being the original software.
//
// 3. This notice may not be removed or altered from any source
// distribution.
//
// The original version of this library can be located at:
// http://www.angelcode.com/angelscript/
//
// Andreas Jonsson
// andreas@angelcode.com
//
// Assembly routines for the ARM64/AArch64 call convention used for Linux
// Written by Max Waine in July 2020, based on as_callfunc_arm_msvc.asm,
// with assistance & guidance provided by Sir Kane
// Compile with GCC/GAS
#if !defined(AS_MAX_PORTABILITY)
#if defined(__aarch64__)
.global _GetHFAReturnDouble
.global _GetHFAReturnFloat
.global _CallARM64Ret128
.global _CallARM64RetInMemory
.global _CallARM64Double
.global _CallARM64Float
.global _CallARM64
.p2align 2
_GetHFAReturnDouble:
adr x9, LBL_populateDoubles
sub x9, x9, x2, lsr 1 // x9 -= returnSize >> 1; (/2 because double is 2x instruction size)
br x9
str d3, [x0, #0x18]
str d2, [x0, #0x10]
str d1, [x1]
str d0, [x0]
LBL_populateDoubles:
ret
.p2align 2
_GetHFAReturnFloat:
adr x9, LBL_populateFloats
sub x9, x9, x2 // x9 -= returnSize; (already 4 bytes per return)
br x9
str s3, [x1, #0x4]
str s2, [x1]
str s1, [x0, #0x4]
str s0, [x0]
LBL_populateFloats:
ret
//[returnType] _CallARM64[type](
// const asQWORD *gpRegArgs, asQWORD numGPRegArgs,
// const asQWORD *floatRegArgs, asQWORD numFloatRegArgs,
// const asQWORD *stackArgs, asQWORD numStackArgs,
// asFUNCTION_t func
//)
.p2align 2
_CallARM64Double:
_CallARM64Float:
_CallARM64:
.cfi_startproc
sub sp, sp, #0x20
.cfi_def_cfa_offset 0x20
stp fp, lr, [sp,#0x10]
str x20, [sp,#0x08]
add fp, sp, #0x10
.cfi_def_cfa x29, 0x10
.cfi_offset x30, -0x08
.cfi_offset x29, -0x10
.cfi_offset x20, -0x18
mov x20, #0
cbz x5, LBL_stackArgsLoopEnd
// Align count to 2, then multiply by 8, resulting in a size aligned to 16
add x20, x5, #1
lsl x20, x20, #3
and x20, x20, #-0x10
// Multiply count by 8
lsl x10, x5, #3
sub sp, sp, x20
LBL_stackArgsLoopStart:
ldp x9,x11, [x4],#16
stp x9,x11, [sp],#16
subs x10, x10, #16
bgt LBL_stackArgsLoopStart
LBL_stackArgsLoopEnd:
// Calculate amount to jump forward, avoiding pointless instructions
adr x9, LBL_populateFloatRegisterArgsEnd
sub x9, x9, x3, lsl 2 // x9 -= numFloatRegArgs * 4
br x9
ldr d7, [x2, #0x38]
ldr d6, [x2, #0x30]
ldr d5, [x2, #0x28]
ldr d4, [x2, #0x20]
ldr d3, [x2, #0x18]
ldr d2, [x2, #0x10]
ldr d1, [x2, #0x08]
ldr d0, [x2]
LBL_populateFloatRegisterArgsEnd:
mov x15, x6
// Calculate amount to jump forward, avoiding pointless instructions
adr x9, LBL_populateGPRegisterArgsEnd
sub x9, x9, x1, lsl 2 // x9 -= numGPRegArgs * 4
br x9
ldr x7, [x0, #0x38]
ldr x6, [x0, #0x30]
ldr x5, [x0, #0x28]
ldr x4, [x0, #0x20]
ldr x3, [x0, #0x18]
ldr x2, [x0, #0x10]
ldr x1, [x0, #0x08]
ldr x0, [x0]
LBL_populateGPRegisterArgsEnd:
// Actually call function
sub sp, sp, x20
blr x15
add sp, sp, x20
ldr x20, [sp,#0x08]
ldp fp, lr, [sp, #0x10]
add sp, sp, #0x20
ret
.cfi_endproc
.p2align 2
_CallARM64Ret128:
.cfi_startproc
sub sp, sp, #0x20
.cfi_def_cfa_offset 0x20
stp fp, lr, [sp,#0x10]
str x20, [sp,#0x08]
add fp, sp, #0x10
.cfi_def_cfa x29, 0x10
.cfi_offset x30, -0x08
.cfi_offset x29, -0x10
.cfi_offset x20, -0x18
mov x20, x6
mov x6, x7
mov x7, #0
bl _CallARM64
str x1, [x20]
ldr x20, [sp,#0x08]
ldp fp, lr, [sp, #0x10]
add sp, sp, #0x20
ret
.cfi_endproc
.p2align 2
_CallARM64RetInMemory:
.cfi_startproc
stp fp, lr, [sp,#-0x10]!
.cfi_def_cfa_offset 0x10
mov fp, sp
.cfi_def_cfa x29, 0x10
.cfi_offset x30, -0x08
.cfi_offset x29, -0x10
mov x8, x6
mov x6, x7
mov x7, #0
bl _CallARM64
mov x0, x8
ldp fp, lr, [sp], #0x10
ret
.cfi_endproc
.subsections_via_symbols
#endif /* __aarch64__ */
#endif /* !AS_MAX_PORTABILITY */