From 74b27d8eedc7a4c0e8276345107790e6b3c023cb Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 23 Sep 2020 19:25:04 +0100 Subject: [PATCH] aarch64: Prevent canary address being spilled to stack This patch fixes the equivalent of arm bug PR85434/CVE-2018-12886 for aarch64: under high register pressure, the -fstack-protector code might spill the address of the canary onto the stack and reload it at the test site, giving an attacker the opportunity to change the expected canary value. This would happen in two cases: - when generating PIC for -mstack-protector-guard=global (tested by stack-protector-6.c). This is a direct analogue of PR85434, which was also about PIC for the global case. - when using -mstack-protector-guard=sysreg. The two problems were really separate bugs and caused by separate code, but it was more convenient to fix them together. The post-patch code still spills _GLOBAL_OFFSET_TABLE_ for stack-protector-6.c, which is a more general problem. However, it no longer spills the canary address itself. The patch also fixes an ICE when using -mstack-protector-guard=sysreg with ILP32: even if the register read is SImode, the address calculation itself should still be DImode. gcc/ * config/aarch64/aarch64-protos.h (aarch64_salt_type): New enum. (aarch64_stack_protect_canary_mem): Declare. * config/aarch64/aarch64.md (UNSPEC_SALT_ADDR): New unspec. (stack_protect_set): Forward to stack_protect_combined_set. (stack_protect_combined_set): New pattern. Use aarch64_stack_protect_canary_mem. (reg_stack_protect_address_): Add a salt operand. (stack_protect_test): Forward to stack_protect_combined_test. (stack_protect_combined_test): New pattern. Use aarch64_stack_protect_canary_mem. * config/aarch64/aarch64.c (strip_salt): New function. (strip_offset_and_salt): Likewise. (tls_symbolic_operand_type): Use strip_offset_and_salt. (aarch64_stack_protect_canary_mem): New function. (aarch64_cannot_force_const_mem): Use strip_offset_and_salt. (aarch64_classify_address): Likewise. (aarch64_symbolic_address_p): Likewise. (aarch64_print_operand): Likewise. (aarch64_output_addr_const_extra): New function. (aarch64_tls_symbol_p): Use strip_salt. (aarch64_classify_symbol): Likewise. (aarch64_legitimate_pic_operand_p): Use strip_offset_and_salt. (aarch64_legitimate_constant_p): Likewise. (aarch64_mov_operand_p): Use strip_salt. (TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA): Override. gcc/testsuite/ * gcc.target/aarch64/stack-protector-5.c: New test. * gcc.target/aarch64/stack-protector-6.c: Likewise. * gcc.target/aarch64/stack-protector-7.c: Likewise. --- gcc/config/aarch64/aarch64-protos.h | 20 +++ gcc/config/aarch64/aarch64.c | 164 +++++++++++++----- gcc/config/aarch64/aarch64.md | 85 ++++----- .../gcc.target/aarch64/stack-protector-5.c | 23 +++ .../gcc.target/aarch64/stack-protector-6.c | 8 + .../gcc.target/aarch64/stack-protector-7.c | 25 +++ 6 files changed, 228 insertions(+), 97 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-5.c create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-6.c create mode 100644 gcc/testsuite/gcc.target/aarch64/stack-protector-7.c diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index c7e828d940f..302e09b202f 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -136,6 +136,25 @@ enum aarch64_addr_query_type { ADDR_QUERY_ANY }; +/* Enumerates values that can be arbitrarily mixed into a calculation + in order to make the result of the calculation unique to its use case. + + AARCH64_SALT_SSP_SET + AARCH64_SALT_SSP_TEST + Used when calculating the address of the stack protection canary value. + There is a separate value for setting and testing the canary, meaning + that these two operations produce unique addresses: they are different + from each other, and from all other address calculations. + + The main purpose of this is to prevent the SET address being spilled + to the stack and reloaded for the TEST, since that would give an + attacker the opportunity to change the address of the expected + canary value. */ +enum aarch64_salt_type { + AARCH64_SALT_SSP_SET, + AARCH64_SALT_SSP_TEST +}; + /* A set of tuning parameters contains references to size and time cost models and vectors for address cost calculations, register move costs and memory move costs. */ @@ -608,6 +627,7 @@ opt_machine_mode aarch64_ptrue_all_mode (rtx); rtx aarch64_convert_sve_data_to_pred (rtx, machine_mode, rtx); rtx aarch64_expand_sve_dupq (rtx, machine_mode, rtx); void aarch64_expand_mov_immediate (rtx, rtx); +rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type); rtx aarch64_ptrue_reg (machine_mode); rtx aarch64_pfalse_reg (machine_mode); bool aarch64_sve_pred_dominates_p (rtx *, rtx); diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index b251f3947e2..491fc582dab 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1935,6 +1935,29 @@ aarch64_sve_abi (void) return sve_abi; } +/* If X is an UNSPEC_SALT_ADDR expression, return the address that it + wraps, otherwise return X itself. */ + +static rtx +strip_salt (rtx x) +{ + rtx search = x; + if (GET_CODE (search) == CONST) + search = XEXP (search, 0); + if (GET_CODE (search) == UNSPEC && XINT (search, 1) == UNSPEC_SALT_ADDR) + x = XVECEXP (search, 0, 0); + return x; +} + +/* Like strip_offset, but also strip any UNSPEC_SALT_ADDR from the + expression. */ + +static rtx +strip_offset_and_salt (rtx addr, poly_int64 *offset) +{ + return strip_salt (strip_offset (addr, offset)); +} + /* Generate code to enable conditional branches in functions over 1 MiB. */ const char * aarch64_gen_far_branch (rtx * operands, int pos_label, const char * dest, @@ -2932,14 +2955,9 @@ static enum tls_model tls_symbolic_operand_type (rtx addr) { enum tls_model tls_kind = TLS_MODEL_NONE; - if (GET_CODE (addr) == CONST) - { - poly_int64 addend; - rtx sym = strip_offset (addr, &addend); - if (GET_CODE (sym) == SYMBOL_REF) - tls_kind = SYMBOL_REF_TLS_MODEL (sym); - } - else if (GET_CODE (addr) == SYMBOL_REF) + poly_int64 offset; + addr = strip_offset_and_salt (addr, &offset); + if (GET_CODE (addr) == SYMBOL_REF) tls_kind = SYMBOL_REF_TLS_MODEL (addr); return tls_kind; @@ -5239,6 +5257,48 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm) as_a (mode)); } +/* Return the MEM rtx that provides the canary value that should be used + for stack-smashing protection. MODE is the mode of the memory. + For SSP_GLOBAL, DECL_RTL is the MEM rtx for the canary variable + (__stack_chk_guard), otherwise it has no useful value. SALT_TYPE + indicates whether the caller is performing a SET or a TEST operation. */ + +rtx +aarch64_stack_protect_canary_mem (machine_mode mode, rtx decl_rtl, + aarch64_salt_type salt_type) +{ + rtx addr; + if (aarch64_stack_protector_guard == SSP_GLOBAL) + { + gcc_assert (MEM_P (decl_rtl)); + addr = XEXP (decl_rtl, 0); + poly_int64 offset; + rtx base = strip_offset_and_salt (addr, &offset); + if (!SYMBOL_REF_P (base)) + return decl_rtl; + + rtvec v = gen_rtvec (2, base, GEN_INT (salt_type)); + addr = gen_rtx_UNSPEC (Pmode, v, UNSPEC_SALT_ADDR); + addr = gen_rtx_CONST (Pmode, addr); + addr = plus_constant (Pmode, addr, offset); + } + else + { + /* Calculate the address from the system register. */ + rtx salt = GEN_INT (salt_type); + addr = gen_reg_rtx (mode); + if (mode == DImode) + emit_insn (gen_reg_stack_protect_address_di (addr, salt)); + else + { + emit_insn (gen_reg_stack_protect_address_si (addr, salt)); + addr = convert_memory_address (Pmode, addr); + } + addr = plus_constant (Pmode, addr, aarch64_stack_protector_guard_offset); + } + return gen_rtx_MEM (mode, force_reg (Pmode, addr)); +} + /* Emit an SVE predicated move from SRC to DEST. PRED is a predicate that is known to contain PTRUE. */ @@ -8677,8 +8737,6 @@ aarch64_move_imm (HOST_WIDE_INT val, machine_mode mode) static bool aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) { - rtx base, offset; - if (GET_CODE (x) == HIGH) return true; @@ -8688,10 +8746,12 @@ aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) if (GET_CODE (*iter) == CONST_POLY_INT) return true; - split_const (x, &base, &offset); + poly_int64 offset; + rtx base = strip_offset_and_salt (x, &offset); if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF) { - if (aarch64_classify_symbol (base, INTVAL (offset)) + /* We checked for POLY_INT_CST offsets above. */ + if (aarch64_classify_symbol (base, offset.to_constant ()) != SYMBOL_FORCE_TO_MEM) return true; else @@ -9217,9 +9277,8 @@ aarch64_classify_address (struct aarch64_address_info *info, && GET_MODE_SIZE (mode).is_constant (&const_size) && const_size >= 4) { - rtx sym, addend; - - split_const (x, &sym, &addend); + poly_int64 offset; + rtx sym = strip_offset_and_salt (x, &offset); return ((GET_CODE (sym) == LABEL_REF || (GET_CODE (sym) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (sym) @@ -9234,10 +9293,12 @@ aarch64_classify_address (struct aarch64_address_info *info, if (allow_reg_index_p && aarch64_base_register_rtx_p (info->base, strict_p)) { - rtx sym, offs; - split_const (info->offset, &sym, &offs); + poly_int64 offset; + HOST_WIDE_INT const_offset; + rtx sym = strip_offset_and_salt (info->offset, &offset); if (GET_CODE (sym) == SYMBOL_REF - && (aarch64_classify_symbol (sym, INTVAL (offs)) + && offset.is_constant (&const_offset) + && (aarch64_classify_symbol (sym, const_offset) == SYMBOL_SMALL_ABSOLUTE)) { /* The symbol and offset must be aligned to the access size. */ @@ -9263,7 +9324,7 @@ aarch64_classify_address (struct aarch64_address_info *info, if (known_eq (ref_size, 0)) ref_size = GET_MODE_SIZE (DImode); - return (multiple_p (INTVAL (offs), ref_size) + return (multiple_p (const_offset, ref_size) && multiple_p (align / BITS_PER_UNIT, ref_size)); } } @@ -9295,9 +9356,8 @@ aarch64_address_valid_for_prefetch_p (rtx x, bool strict_p) bool aarch64_symbolic_address_p (rtx x) { - rtx offset; - - split_const (x, &x, &offset); + poly_int64 offset; + x = strip_offset_and_salt (x, &offset); return GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF; } @@ -10028,27 +10088,16 @@ aarch64_print_operand (FILE *f, rtx x, int code) switch (code) { case 'c': - switch (GET_CODE (x)) + if (CONST_INT_P (x)) + fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); + else { - case CONST_INT: - fprintf (f, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); - break; - - case SYMBOL_REF: - output_addr_const (f, x); - break; - - case CONST: - if (GET_CODE (XEXP (x, 0)) == PLUS - && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF) - { - output_addr_const (f, x); - break; - } - /* Fall through. */ - - default: - output_operand_lossage ("unsupported operand for code '%c'", code); + poly_int64 offset; + rtx base = strip_offset_and_salt (x, &offset); + if (SYMBOL_REF_P (base)) + output_addr_const (f, x); + else + output_operand_lossage ("unsupported operand for code '%c'", code); } break; @@ -10623,6 +10672,19 @@ aarch64_print_operand_address (FILE *f, machine_mode mode, rtx x) output_addr_const (f, x); } +/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ + +static bool +aarch64_output_addr_const_extra (FILE *file, rtx x) +{ + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SALT_ADDR) + { + output_addr_const (file, XVECEXP (x, 0, 0)); + return true; + } + return false; +} + bool aarch64_label_mentioned_p (rtx x) { @@ -15932,6 +15994,7 @@ aarch64_tls_symbol_p (rtx x) if (! TARGET_HAVE_TLS) return false; + x = strip_salt (x); if (GET_CODE (x) != SYMBOL_REF) return false; @@ -15987,6 +16050,8 @@ aarch64_classify_tls_symbol (rtx x) enum aarch64_symbol_type aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset) { + x = strip_salt (x); + if (GET_CODE (x) == LABEL_REF) { switch (aarch64_cmodel) @@ -16086,11 +16151,10 @@ aarch64_constant_address_p (rtx x) bool aarch64_legitimate_pic_operand_p (rtx x) { - if (GET_CODE (x) == SYMBOL_REF - || (GET_CODE (x) == CONST - && GET_CODE (XEXP (x, 0)) == PLUS - && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)) - return false; + poly_int64 offset; + x = strip_offset_and_salt (x, &offset); + if (GET_CODE (x) == SYMBOL_REF) + return false; return true; } @@ -16136,7 +16200,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x) /* If an offset is being added to something else, we need to allow the base to be moved into the destination register, meaning that there are no free temporaries for the offset. */ - x = strip_offset (x, &offset); + x = strip_offset_and_salt (x, &offset); if (!offset.is_constant () && aarch64_offset_temporaries (true, offset) > 0) return false; @@ -18035,6 +18099,7 @@ aarch64_mov_operand_p (rtx x, machine_mode mode) return aarch64_simd_valid_immediate (x, NULL); } + x = strip_salt (x); if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x)) return true; @@ -23890,6 +23955,9 @@ aarch64_libgcc_floating_mode_supported_p #undef TARGET_PRINT_OPERAND_ADDRESS #define TARGET_PRINT_OPERAND_ADDRESS aarch64_print_operand_address +#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA +#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA aarch64_output_addr_const_extra + #undef TARGET_OPTAB_SUPPORTED_P #define TARGET_OPTAB_SUPPORTED_P aarch64_optab_supported_p diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index dbc6b1db176..19ec9e33f9f 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -281,6 +281,7 @@ UNSPEC_GEN_TAG_RND ; Generate a random 4-bit MTE tag. UNSPEC_TAG_SPACE ; Translate address to MTE tag address space. UNSPEC_LD1RO + UNSPEC_SALT_ADDR ]) (define_c_enum "unspecv" [ @@ -6881,43 +6882,37 @@ DONE; }) -;; Named patterns for stack smashing protection. +;; Defined for -mstack-protector-guard=sysreg, which goes through this +;; pattern rather than stack_protect_combined_set. Our implementation +;; of the latter can handle both. (define_expand "stack_protect_set" [(match_operand 0 "memory_operand") - (match_operand 1 "memory_operand")] + (match_operand 1 "")] "" { - machine_mode mode = GET_MODE (operands[0]); - if (aarch64_stack_protector_guard != SSP_GLOBAL) - { - /* Generate access through the system register. */ - rtx tmp_reg = gen_reg_rtx (mode); - if (mode == DImode) - { - emit_insn (gen_reg_stack_protect_address_di (tmp_reg)); - emit_insn (gen_adddi3 (tmp_reg, tmp_reg, - GEN_INT (aarch64_stack_protector_guard_offset))); - } - else - { - emit_insn (gen_reg_stack_protect_address_si (tmp_reg)); - emit_insn (gen_addsi3 (tmp_reg, tmp_reg, - GEN_INT (aarch64_stack_protector_guard_offset))); + emit_insn (gen_stack_protect_combined_set (operands[0], operands[1])); + DONE; +}) - } - operands[1] = gen_rtx_MEM (mode, tmp_reg); - } - +(define_expand "stack_protect_combined_set" + [(match_operand 0 "memory_operand") + (match_operand 1 "")] + "" +{ + machine_mode mode = GET_MODE (operands[0]); + operands[1] = aarch64_stack_protect_canary_mem (mode, operands[1], + AARCH64_SALT_SSP_SET); emit_insn ((mode == DImode ? gen_stack_protect_set_di : gen_stack_protect_set_si) (operands[0], operands[1])); DONE; }) +;; Operand 1 is either AARCH64_SALT_SSP_SET or AARCH64_SALT_SSP_TEST. (define_insn "reg_stack_protect_address_" [(set (match_operand:PTR 0 "register_operand" "=r") - (unspec:PTR [(const_int 0)] - UNSPEC_SSP_SYSREG))] + (unspec:PTR [(match_operand 1 "const_int_operand")] + UNSPEC_SSP_SYSREG))] "aarch64_stack_protector_guard != SSP_GLOBAL" { char buf[150]; @@ -6940,37 +6935,29 @@ [(set_attr "length" "12") (set_attr "type" "multiple")]) +;; Defined for -mstack-protector-guard=sysreg, which goes through this +;; pattern rather than stack_protect_combined_test. Our implementation +;; of the latter can handle both. (define_expand "stack_protect_test" [(match_operand 0 "memory_operand") - (match_operand 1 "memory_operand") + (match_operand 1 "") (match_operand 2)] "" { - machine_mode mode = GET_MODE (operands[0]); - - if (aarch64_stack_protector_guard != SSP_GLOBAL) - { - /* Generate access through the system register. The - sequence we want here is the access - of the stack offset to come with - mrs scratch_reg, - add scratch_reg, scratch_reg, :lo12:offset. */ - rtx tmp_reg = gen_reg_rtx (mode); - if (mode == DImode) - { - emit_insn (gen_reg_stack_protect_address_di (tmp_reg)); - emit_insn (gen_adddi3 (tmp_reg, tmp_reg, - GEN_INT (aarch64_stack_protector_guard_offset))); - } - else - { - emit_insn (gen_reg_stack_protect_address_si (tmp_reg)); - emit_insn (gen_addsi3 (tmp_reg, tmp_reg, - GEN_INT (aarch64_stack_protector_guard_offset))); + emit_insn (gen_stack_protect_combined_test (operands[0], operands[1], + operands[2])); + DONE; +}) - } - operands[1] = gen_rtx_MEM (mode, tmp_reg); - } +(define_expand "stack_protect_combined_test" + [(match_operand 0 "memory_operand") + (match_operand 1 "") + (match_operand 2)] + "" +{ + machine_mode mode = GET_MODE (operands[0]); + operands[1] = aarch64_stack_protect_canary_mem (mode, operands[1], + AARCH64_SALT_SSP_TEST); emit_insn ((mode == DImode ? gen_stack_protect_test_di : gen_stack_protect_test_si) (operands[0], operands[1])); diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-5.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-5.c new file mode 100644 index 00000000000..a9cd53b2eac --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-5.c @@ -0,0 +1,23 @@ +/* { dg-do compile } */ +/* { dg-options "-fstack-protector-all -O2" } */ + +void __attribute__ ((noipa)) +f (void) +{ + volatile int x; + asm volatile ("" ::: + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28", "x30"); +} + +/* The register clobbers above should not generate any single LDRs or STRs; + all registers should be saved and restored in pairs. The only STRs + should be therefore be those associated with the stack protector + tests themselves. + + Make sure the address of the canary value is not spilled and reloaded, + since that would give the attacker an opportunity to change the + canary value. */ +/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-6.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-6.c new file mode 100644 index 00000000000..e2ac0885eba --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-6.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target fpic } */ +/* { dg-options "-fstack-protector-all -O2 -fpic" } */ + +#include "stack-protector-5.c" + +/* See the comment in stack-protector-5.c. */ +/* { dg-final { scan-assembler-times {\tldr\t[^\n]*__stack_chk_guard} 2 } } */ diff --git a/gcc/testsuite/gcc.target/aarch64/stack-protector-7.c b/gcc/testsuite/gcc.target/aarch64/stack-protector-7.c new file mode 100644 index 00000000000..e644768fe5e --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/stack-protector-7.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-options "-fstack-protector-all -mstack-protector-guard=sysreg -mstack-protector-guard-offset=16 -mstack-protector-guard-reg=tpidr_el0 -O2" } */ + +void __attribute__ ((noipa)) +f (void) +{ + volatile int x; + asm volatile ("" ::: + "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7", + "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15", + "x16", "x17", "x18", "x19", "x20", "x21", "x22", "x23", + "x24", "x25", "x26", "x27", "x28", "x30"); +} + +/* The register clobbers above should not generate any single LDRs or STRs; + all registers should be saved and restored in pairs. The only LDRs and + STRs should be therefore be those associated with the stack protector + tests themselves. + + Make sure the address of the canary value (tpidr_el0 + 16) is not + spilled and reloaded, since that would give the attacker an opportunity + to change the canary value. */ +/* { dg-final { scan-assembler-times {\tmrs\t} 2 } } */ +/* { dg-final { scan-assembler-times {\tstr\t} 1 } } */ +/* { dg-final { scan-assembler-times {\tldr\t} 3 } } */ -- 2.30.2