[Context] RFR: Add support for ARM aarch64 architecture

Hi, I hope this is the right place to post this. The following patch adds support for ARMs aarch64 architecture on Linux (there is existing iOS support for aarch64). I have run the context regression test on this on an aarch64 board running Ubuntu 14.04 with gcc 4.9.1 and it passes all OK. Could someone please review this patch and if OK push it? Many thanks, Ed. --- CUT HERE --- commit bd34672217ff3bcee185f9c7383179342ec2fe9b Author: Edward Nevill <edward.nevill@linaro.org> Date: Mon Feb 23 14:37:45 2015 +0000 Add support for ARM aarch64 architecture diff --git a/build/Jamfile.v2 b/build/Jamfile.v2 index 80d1c46..edd4400 100644 --- a/build/Jamfile.v2 +++ b/build/Jamfile.v2 @@ -253,6 +253,14 @@ alias asm_context_sources <toolset>darwin ; +alias asm_context_sources + : [ make asm/make_arm64_elf_gas.o : asm/make_arm64_elf_gas.S : @gas ] + [ make asm/jump_arm64_elf_gas.o : asm/jump_arm64_elf_gas.S : @gas ] + : <address-model>64 + <architecture>arm + <binary-format>elf + ; + # MIPS # MIPS/O32/ELF alias asm_context_sources diff --git a/config/arm.cpp b/config/arm.cpp index 6a83737..6eece0b 100644 --- a/config/arm.cpp +++ b/config/arm.cpp @@ -6,7 +6,7 @@ // accompanying file LICENSE_1_0.txt or copy at // http://www.boost.org/LICENSE_1_0.txt) -#if !defined(__arm__) && !defined(__thumb__) && \ +#if !defined(__aarch64__) && !defined(__arm__) && !defined(__thumb__) && \ !defined(__TARGET_ARCH_ARM) && !defined(__TARGET_ARCH_THUMB) && \ !defined(_ARM) && !defined(_M_ARM) #error "Not ARM" diff --git a/src/asm/jump_arm64_elf_gas.S b/src/asm/jump_arm64_elf_gas.S new file mode 100644 index 0000000..3e0e2c2 --- /dev/null +++ b/src/asm/jump_arm64_elf_gas.S @@ -0,0 +1,128 @@ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | d8 | d9 | d10 | d11 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | d12 | d13 | d14 | d15 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | x19 | x20 | x21 | x22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | x23 | x24 | x25 | x26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 0x80| 0x84| 0x88| 0x8c| 0x90| 0x94| 0x98| 0x9c| * + * ------------------------------------------------- * + * | x27 | x28 | FP | LR | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | | | * + * ------------------------------------------------- * + * | 0xa0| 0xa4| 0xa8| 0xac| | | * + * ------------------------------------------------- * + * | PC | align | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + + .cpu generic+fp+simd + .text + .align 2 + .global jump_fcontext + .type jump_fcontext, %function +jump_fcontext: + # prepare stack for GP + FPU + sub sp, sp, #0xb0 + +# Because gcc may save integer registers in fp registers across a +# function call we cannot skip saving the fp registers. +# +# Do not reinstate this test unless you fully understand what you +# are doing. +# +# # test if fpu env should be preserved +# cmp w3, #0 +# b.eq 1f + + # save d8 - d15 + stp d8, d9, [sp, #0x00] + stp d10, d11, [sp, #0x10] + stp d12, d13, [sp, #0x20] + stp d14, d15, [sp, #0x30] + +1: + + # save x19-x30 + stp x19, x20, [sp, #0x40] + stp x21, x22, [sp, #0x50] + stp x23, x24, [sp, #0x60] + stp x25, x26, [sp, #0x70] + stp x27, x28, [sp, #0x80] + stp x29, x30, [sp, #0x90] + + # save LR as PC + str x30, [sp, #0xa0] + + # store RSP (pointing to context-data) in first argument (x0). + # STR cannot have sp as a target register + mov x4, sp + str x4, [x0] + + # restore RSP (pointing to context-data) from A2 (x1) + mov sp, x1 + +# # test if fpu env should be preserved +# cmp w3, #0 +# b.eq 2f + + # load d8 - d15 + ldp d8, d9, [x0, #0x00] + ldp d10, d11, [x0, #0x10] + ldp d12, d13, [x0, #0x20] + ldp d14, d15, [x0, #0x30] + +2: + + # load x19-x30 + ldp x19, x20, [sp, #0x40] + ldp x21, x22, [sp, #0x50] + ldp x23, x24, [sp, #0x60] + ldp x25, x26, [sp, #0x70] + ldp x27, x28, [sp, #0x80] + ldp x29, x30, [sp, #0x90] + + # use third arg as return value after jump + # and as first arg in context function + mov x0, x2 + + # load pc + ldr x4, [sp, #0xa0] + + # restore stack from GP + FPU + add sp, sp, #0xb0 + + ret x4 + .size jump_fcontext,.-jump_fcontext + +# Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits diff --git a/src/asm/make_arm64_elf_gas.S b/src/asm/make_arm64_elf_gas.S new file mode 100644 index 0000000..5a6914f --- /dev/null +++ b/src/asm/make_arm64_elf_gas.S @@ -0,0 +1,80 @@ +/******************************************************* + * * + * ------------------------------------------------- * + * | 0 | 1 | 2 | 3 | 4 | 5 | 6 | 7 | * + * ------------------------------------------------- * + * | 0x0 | 0x4 | 0x8 | 0xc | 0x10| 0x14| 0x18| 0x1c| * + * ------------------------------------------------- * + * | d8 | d9 | d10 | d11 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | * + * ------------------------------------------------- * + * | 0x20| 0x24| 0x28| 0x2c| 0x30| 0x34| 0x38| 0x3c| * + * ------------------------------------------------- * + * | d12 | d13 | d14 | d15 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | * + * ------------------------------------------------- * + * | 0x40| 0x44| 0x48| 0x4c| 0x50| 0x54| 0x58| 0x5c| * + * ------------------------------------------------- * + * | x19 | x20 | x21 | x22 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | * + * ------------------------------------------------- * + * | 0x60| 0x64| 0x68| 0x6c| 0x70| 0x74| 0x78| 0x7c| * + * ------------------------------------------------- * + * | x23 | x24 | x25 | x26 | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | * + * ------------------------------------------------- * + * | 0x80| 0x84| 0x88| 0x8c| 0x90| 0x94| 0x98| 0x9c| * + * ------------------------------------------------- * + * | x27 | x28 | FP | LR | * + * ------------------------------------------------- * + * ------------------------------------------------- * + * | 40 | 41 | 42 | 43 | | | * + * ------------------------------------------------- * + * | 0xa0| 0xa4| 0xa8| 0xac| | | * + * ------------------------------------------------- * + * | PC | align | | | * + * ------------------------------------------------- * + * * + *******************************************************/ + + .cpu generic+fp+simd + .text + .align 2 + .global make_fcontext + .type make_fcontext, %function +make_fcontext: + # shift address in x0 (allocated stack) to lower 16 byte boundary + and x0, x0, ~0xF + + # reserve space for context-data on context-stack + sub x0, x0, #0xb0 + + # third arg of make_fcontext() == address of context-function + # store address as a PC to jump in + str x2, [x0, #0xa0] + + # save address of finish as return-address for context-function + # will be entered after context-function returns (LR register) + adr x1, finish + str x1, [x0, #0x98] + + ret x30 // return pointer to context-data (x0) + +finish: + # exit code is zero + mov x0, #0 + # exit application + bl _exit + + .size make_fcontext,.-make_fcontext + +# Mark that we don't need executable stack. +.section .note.GNU-stack,"",%progbits

thx for the patches - I've updated branch develop. Could you verify that the code is running on your arm64 system (if it is working I'll merge to master).

Hi Oliver, Thanks for looking at this so quickly. The develop branch does not build because when I build <abi> is set to 'sysv' not 'aapcs'. I have patched it as in the patch below by just changing <abi>aapcs to <abi>sysv in build/Jamfile.v2. I have not changed the name of the .S files so they are now somewhat inappropriately named *_aapcs_*. There was also a problem with jump_fcontext in that it was restoring the FP context from x0, not from SP. Interestingly I based my patch on the existing iOS code which also has this bug. I have not fixed it in the iOS code as I have no way of testing anything on iOS. Built and testing on arm64, ubuntu 14.04, gcc 4.9.1. Thanks for your help with this, Ed. --- CUT HERE --- commit d19baee73f90de01375af69a9f8621799c649964 Author: Edward Nevill <edward.nevill@linaro.org> Date: Mon Feb 23 17:32:49 2015 +0000 Fix build for aarch64. Restore FP registers from sp, not x0 diff --git a/build/Jamfile.v2 b/build/Jamfile.v2 index 4bf3fce..00ee44d 100644 --- a/build/Jamfile.v2 +++ b/build/Jamfile.v2 @@ -227,7 +227,7 @@ alias asm_context_sources alias asm_context_sources : [ make asm/make_arm64_aapcs_elf_gas.o : asm/make_arm64_aapcs_elf_gas.S : @gas64 ] [ make asm/jump_arm64_aapcs_elf_gas.o : asm/jump_arm64_aapcs_elf_gas.S : @gas64 ] - : <abi>aapcs + : <abi>sysv <address-model>64 <architecture>arm <binary-format>elf @@ -236,7 +236,7 @@ alias asm_context_sources alias asm_context_sources : asm/make_arm64_aapcs_elf_gas.S asm/jump_arm64_aapcs_elf_gas.S - : <abi>aapcs + : <abi>sysv <address-model>64 <architecture>arm <binary-format>elf @@ -246,7 +246,7 @@ alias asm_context_sources alias asm_context_sources : asm/make_arm64_aapcs_elf_gas.S asm/jump_arm64_aapcs_elf_gas.S - : <abi>aapcs + : <abi>sysv <address-model>64 <architecture>arm <binary-format>elf diff --git a/src/asm/jump_arm64_aapcs_elf_gas.S b/src/asm/jump_arm64_aapcs_elf_gas.S index 46e029d..645d84d 100644 --- a/src/asm/jump_arm64_aapcs_elf_gas.S +++ b/src/asm/jump_arm64_aapcs_elf_gas.S @@ -101,10 +101,10 @@ jump_fcontext: # b.eq 2f # load d8 - d15 - ldp d8, d9, [x0, #0x00] - ldp d10, d11, [x0, #0x10] - ldp d12, d13, [x0, #0x20] - ldp d14, d15, [x0, #0x30] + ldp d8, d9, [sp, #0x00] + ldp d10, d11, [sp, #0x10] + ldp d12, d13, [sp, #0x20] + ldp d14, d15, [sp, #0x30] 2: # load x19-x30 --- CUT HERE --- On 23 February 2015 at 15:25, Oliver Kowalke <oliver.kowalke@gmail.com> wrote:
thx for the patches - I've updated branch develop. Could you verify that the code is running on your arm64 system (if it is working I'll merge to master).
_______________________________________________ Unsubscribe & other changes: http://lists.boost.org/mailman/listinfo.cgi/boost

2015-02-23 18:44 GMT+01:00 Edward Nevill <edward.nevill@linaro.org>:
The develop branch does not build because when I build <abi> is set to 'sysv' not 'aapcs'.
the problem that boost.build does not evaluate os.paltform to ARM the reason is that tools/build/src/engine/jam.h tests for __arm__ which is not set in the case of the 64bit ARM architecture. I'll post a patch.
There was also a problem with jump_fcontext in that it was restoring the FP context from x0, not from SP. Interestingly I based my patch on the existing iOS code which also has this bug. I have not fixed it in the iOS code as I have no way of testing anything on iOS.
strange - I got the information that it is working on iOS (I don't own Apple systems).

On Tue, 2015-02-24 at 08:16 +0100, Oliver Kowalke wrote:
2015-02-23 18:44 GMT+01:00 Edward Nevill <edward.nevill@linaro.org>: the problem that boost.build does not evaluate os.paltform to ARM the reason is that tools/build/src/engine/jam.h tests for __arm__ which is not set in the case of the 64bit ARM architecture. I'll post a patch.
OK, Thx.
There was also a problem with jump_fcontext in that it was restoring the FP context from x0, not from SP. Interestingly I based my patch on the existing iOS code which also has this bug. I have not fixed it in the iOS code as I have no way of testing anything on iOS.
strange - I got the information that it is working on iOS (I don't own Apple systems).
I think it is broken. It may be that it is just never exercised with the 'save floating point registers' option. Here is what it does #if (defined(__VFP_FP__) && !defined(__SOFTFP__)) ; test if fpu env should be preserved cmp w3, #0 b.eq 1f ; save d8 - d15 stp d8, d9, [x0, #0x00] stp d10, d11, [x0, #0x10] stp d12, d13, [x0, #0x20] stp d14, d15, [x0, #0x30] 1: #endif ; save x19-x30 stp x19, x20, [sp, #0x40] stp x21, x22, [sp, #0x50] stp x23, x24, [sp, #0x60] stp x25, x26, [sp, #0x70] stp x27, x28, [sp, #0x80] Note that in the case of the integer registers it is saving them to [sp, #XXX] whereas in the case of the fp registers it is to [x0, #XXX]. On entry to jump_context x0 contains a pointer to an fcontext_t (ie a pointer to a single longword which is to receive the new fcontext_t). Writing to [x0, #NNN] is just writing off the end of this into whatever data happens to be there. It may be that this works on iOS because the code is never compiled due to the conditionalisation. All the best, Ed.

2015-02-24 10:24 GMT+01:00 Edward Nevill <edward.nevill@linaro.org>:
On Tue, 2015-02-24 at 08:16 +0100, Oliver Kowalke wrote:
2015-02-23 18:44 GMT+01:00 Edward Nevill <edward.nevill@linaro.org>: the problem that boost.build does not evaluate os.paltform to ARM the reason is that tools/build/src/engine/jam.h tests for __arm__ which is not set in the case of the 64bit ARM architecture. I'll post a patch.
OK, Thx.
the patch was merged into boost.build - could you fetch the new version (file src/engine/jam.h <https://github.com/boostorg/build/pull/58/files#diff-c88fe8afebc632d0bef2bd5985137af2>) and check if boost.context (please fetch) builds and runs as expected?

On Wed, 2015-02-25 at 10:27 +0100, Oliver Kowalke wrote:
the patch was merged into boost.build - could you fetch the new version (file src/engine/jam.h <https://github.com/boostorg/build/pull/58/files#diff-c88fe8afebc632d0bef2bd5985137af2>) and check if boost.context (please fetch) builds and runs as expected?
_______________________________________________ Unsubscribe & other changes: http://lists.boost.org/mailman/listinfo.cgi/boost
Hi Oliver, I have pulled the jam.h patch and pulled the latest boost.context (develop branch), however it fails to build with the following error. gcc.compile.asm ../../../bin.v2/libs/context/build/gcc-4.9.1/debug/address-model-64/architecture-arm/link-static/threading-multi/asm/jump_arm64_aapcs_elf_gas.o ../../../libs/context/src/asm/jump_arm64_aapcs_elf_gas.S: Assembler messages: ../../../libs/context/src/asm/jump_arm64_aapcs_elf_gas.S:86: Error: operand 1 should be a floating-point register -- `stp fp,lr,[sp,#0x90]' ../../../libs/context/src/asm/jump_arm64_aapcs_elf_gas.S:89: Error: operand 1 should be an integer register -- `str lr,[sp,#0xa0]' ../../../libs/context/src/asm/jump_arm64_aapcs_elf_gas.S:116: Error: operand 1 should be a floating-point register -- `ldp fp,lr,[sp,#0x90]' The assembler does not recognise the names 'fp', or 'lr'. Instead you must use x29, or x30. I thought I had done this already in my original patch. Apologies if I made a mistake. Patch below:- With this patch it now reports gcc.compile.asm ../../../bin.v2/libs/context/build/gcc-4.9.1/debug/address-model-64/architecture-arm/link-static/threading-multi/asm/jump_arm64_aapcs_elf_gas.o gcc.archive ../../../bin.v2/libs/context/build/gcc-4.9.1/debug/address-model-64/architecture-arm/link-static/threading-multi/libboost_context.a gcc.link ../../../bin.v2/libs/context/test/test_context.test/gcc-4.9.1/debug/address-model-64/architecture-arm/link-static/threading-multi/test_context testing.capture-output ../../../bin.v2/libs/context/test/test_context.test/gcc-4.9.1/debug/address-model-64/architecture-arm/link-static/threading-multi/test_context.run **passed** ../../../bin.v2/libs/context/test/test_context.test/gcc-4.9.1/debug/address-model-64/architecture-arm/link-static/threading-multi/test_context.test ...updated 6 targets... All the best, Ed. --- CUT HERE --- commit f5905ba4672c78fc5b250e990a9103beb226baeb Author: Edward Nevill <edward.nevill@linaro.org> Date: Wed Feb 25 11:45:23 2015 +0000 Correct register names in arm64 diff --git a/src/asm/jump_arm64_aapcs_elf_gas.S b/src/asm/jump_arm64_aapcs_elf_gas.S index 889008f..09bd7b5 100644 --- a/src/asm/jump_arm64_aapcs_elf_gas.S +++ b/src/asm/jump_arm64_aapcs_elf_gas.S @@ -83,10 +83,10 @@ jump_fcontext: stp x23, x24, [sp, #0x60] stp x25, x26, [sp, #0x70] stp x27, x28, [sp, #0x80] - stp fp, lr, [sp, #0x90] + stp x29, x30, [sp, #0x90] # save LR as PC - str lr, [sp, #0xa0] + str x30, [sp, #0xa0] # store RSP (pointing to context-data) in first argument (x0). # STR cannot have sp as a target register @@ -113,7 +113,7 @@ jump_fcontext: ldp x23, x24, [sp, #0x60] ldp x25, x26, [sp, #0x70] ldp x27, x28, [sp, #0x80] - ldp fp, lr, [sp, #0x90] + ldp x29, x30, [sp, #0x90] # use third arg as return value after jump # and as first arg in context function --- CUT HERE ---

2015-02-25 12:48 GMT+01:00 Edward Nevill <edward.nevill@linaro.org>:
gcc.compile.asm ../../../bin.v2/libs/context/build/gcc-4.9.1/debug/address-model-64/architecture-arm/link-static/threading-multi/asm/jump_arm64_aapcs_elf_gas.o ../../../libs/context/src/asm/jump_arm64_aapcs_elf_gas.S: Assembler messages: ../../../libs/context/src/asm/jump_arm64_aapcs_elf_gas.S:86: Error: operand 1 should be a floating-point register -- `stp fp,lr,[sp,#0x90]' ../../../libs/context/src/asm/jump_arm64_aapcs_elf_gas.S:89: Error: operand 1 should be an integer register -- `str lr,[sp,#0xa0]' ../../../libs/context/src/asm/jump_arm64_aapcs_elf_gas.S:116: Error: operand 1 should be a floating-point register -- `ldp fp,lr,[sp,#0x90]'
The assembler does not recognise the names 'fp', or 'lr'. Instead you must use x29, or x30. I thought I had done this already in my original patch. Apologies if I made a mistake.
I've renamed the code - I assumed that the alias names for register LR and FP are still valid on 64 bit (too be consistent to the other implementations) :^(
participants (2)
-
Edward Nevill
-
Oliver Kowalke