Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Add branch protections for AArch64 (BTI/PAC) in assembly code used by
:option:`-X perf_jit <-X>` (Linux perf profiler integration).
4 changes: 4 additions & 0 deletions Python/asm_trampoline.S
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#include "asm_trampoline_aarch64.h"

.text
#if defined(__APPLE__)
.globl __Py_trampoline_func_start
Expand Down Expand Up @@ -29,10 +31,12 @@ _Py_trampoline_func_start:
#if defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
// ARM64 little endian, 64bit ABI
// generate with aarch64-linux-gnu-gcc 12.1
SIGN_LR
stp x29, x30, [sp, -16]!
mov x29, sp
blr x3
ldp x29, x30, [sp], 16
VERIFY_LR
ret
#endif
#ifdef __riscv
Expand Down
56 changes: 56 additions & 0 deletions Python/asm_trampoline_aarch64.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
#ifndef ASM_TRAMPOLINE_AARCH_64_H_
#define ASM_TRAMPOLINE_AARCH_64_H_

/*
* References:
* - https://developer.arm.com/documentation/101028/0012/5--Feature-test-macros
* - https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst
*/

#if defined(__ARM_FEATURE_BTI_DEFAULT) && __ARM_FEATURE_BTI_DEFAULT == 1
#define BTI_J hint 36 /* bti j: for jumps, IE br instructions */
#define BTI_C hint 34 /* bti c: for calls, IE bl instructions */
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This LGTM, still need to test, but just a few comments:

  1. Do we need to support binutils older than 2.31? If we don't we can just use the mnemonics.
  2. Do we need it added conditionally? We can just add the instructions, they're binary compatible with older machines so we can just do the PAC path and we get both. It will just NOP on unsupported machines. So if this can take the cost of a NOP, we can simplify this quite a bit.

Don't let my comments derail this, they can always be done in follow ups IMHO.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only OS that still has glibc < 2.31 is for RHEL8. While the buildbots do not compile python >= 3.12 on RHEL8, still someone trying to compile Python 3.13 and higher there won't have it compiled. Granted RHEL8 doesn't enable mbranch-protection by default, I'd rather have at least till RHEL8 goes out of support the old way for delaring pac and bti.

Regarding the if else conditions, while they can be simplified, the assembly code maps nicely somewhat 1on1 with the C code generating dwarf unwinding info for the non-frame pointer case, simplifying them there would make the visual comparisons a tad more cumbersome.

#define GNU_PROPERTY_AARCH64_BTI 1 /* bit 0 GNU Notes is for BTI support */
#else
#define BTI_J
#define BTI_C
#define GNU_PROPERTY_AARCH64_BTI 0
#endif

#if defined(__ARM_FEATURE_PAC_DEFAULT)
#if __ARM_FEATURE_PAC_DEFAULT & 1
#define SIGN_LR hint 25 /* paciasp: sign with the A key */
#define VERIFY_LR hint 29 /* autiasp: verify with the A key */
#elif __ARM_FEATURE_PAC_DEFAULT & 2
#define SIGN_LR hint 27 /* pacibsp: sign with the b key */
#define VERIFY_LR hint 31 /* autibsp: verify with the b key */
#endif
#define GNU_PROPERTY_AARCH64_POINTER_AUTH 2 /* bit 1 GNU Notes is for PAC support */
#else
#define SIGN_LR BTI_C
#define VERIFY_LR
#define GNU_PROPERTY_AARCH64_POINTER_AUTH 0
#endif

#if defined(__ARM_FEATURE_GCS_DEFAULT) && __ARM_FEATURE_GCS_DEFAULT == 1
#define GNU_PROPERTY_AARCH64_GCS 4 /* bit 2 GNU Notes is for GCS support */
#else
#define GNU_PROPERTY_AARCH64_GCS 0
#endif

/* Add the BTI, PAC and GCS support to GNU Notes section */
#if GNU_PROPERTY_AARCH64_BTI != 0 || GNU_PROPERTY_AARCH64_POINTER_AUTH != 0 || GNU_PROPERTY_AARCH64_GCS != 0
.pushsection .note.gnu.property, "a"; /* Start a new allocatable section */
.balign 8; /* align it on a byte boundry */
.long 4; /* size of "GNU\0" */
.long 0x10; /* size of descriptor */
.long 0x5; /* NT_GNU_PROPERTY_TYPE_0 */
.asciz "GNU";
.long 0xc0000000; /* GNU_PROPERTY_AARCH64_FEATURE_1_AND */
.long 4; /* Four bytes of data */
.long (GNU_PROPERTY_AARCH64_BTI|GNU_PROPERTY_AARCH64_POINTER_AUTH|GNU_PROPERTY_AARCH64_GCS); /* BTI, PAC or GCS is enabled */
.long 0; /* padding for 8 byte alignment */
.popsection; /* end the section */
#endif

#endif
13 changes: 13 additions & 0 deletions Python/jit_unwind.c
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ enum {
DWRF_CFA_offset_extended_sf = 0x11, // Extended signed offset
DWRF_CFA_advance_loc = 0x40, // Advance location counter
DWRF_CFA_offset = 0x80, // Simple offset instruction
#if defined(__aarch64__)
DWRF_CFA_AARCH64_negate_ra_state = 0x2d, // Toggle return address signing state
#endif
DWRF_CFA_restore = 0xc0 // Restore register
};

Expand Down Expand Up @@ -562,6 +565,13 @@ static void elf_init_ehframe_perf(ELFObjectContext* ctx) {
DWRF_UV(8); // New offset: SP + 8
#elif defined(__aarch64__) && defined(__AARCH64EL__) && !defined(__ILP32__)
/* AArch64 calling convention unwinding rules */
#if defined(__ARM_FEATURE_PAC_DEFAULT) || \
(defined(__ARM_FEATURE_BTI_DEFAULT) && __ARM_FEATURE_BTI_DEFAULT == 1)
DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance past SIGN_LR (4 bytes)
#endif
#if defined(__ARM_FEATURE_PAC_DEFAULT)
DWRF_U8(DWRF_CFA_AARCH64_negate_ra_state); // Saved LR is PAC-signed from here
#endif
DWRF_U8(DWRF_CFA_advance_loc | 1); // Advance by 1 instruction (4 bytes)
DWRF_U8(DWRF_CFA_def_cfa_offset); // CFA = SP + 16
DWRF_UV(16); // Stack pointer moved by 16 bytes
Expand All @@ -570,6 +580,9 @@ static void elf_init_ehframe_perf(ELFObjectContext* ctx) {
DWRF_U8(DWRF_CFA_offset | DWRF_REG_RA); // x30 (link register) saved
DWRF_UV(1); // At CFA-8 (1 * 8 = 8 bytes from CFA)
DWRF_U8(DWRF_CFA_advance_loc | 3); // Advance by 3 instructions (12 bytes)
#if defined(__ARM_FEATURE_PAC_DEFAULT)
DWRF_U8(DWRF_CFA_AARCH64_negate_ra_state); // LR is authenticated, no longer PAC-signed
#endif
DWRF_U8(DWRF_CFA_def_cfa_register); // CFA = FP (x29) + 16
DWRF_UV(DWRF_REG_FP);
DWRF_U8(DWRF_CFA_restore | DWRF_REG_RA); // Restore x30 - NO DWRF_UV() after this!
Expand Down
Loading