From 0ab81d9cc73303c376a0014774ab6058d40a25a0 Mon Sep 17 00:00:00 2001 From: Mihail Ionescu Date: Wed, 15 Jan 2020 11:35:21 +0000 Subject: [PATCH] [PATCH, GCC/ARM, 8/10] Do lazy store & load inline when calling nscall function This patch adds two new patterns for the VLSTM and VLLDM instructions. cmse_nonsecure_call_inline_register_clear is then modified to generate VLSTM and VLLDM respectively before and after calls to functions with the cmse_nonsecure_call attribute in order to have lazy saving, clearing and restoring of VFP registers. Since these instructions do not do writeback of the base register, the stack is adjusted prior the lazy store and after the lazy load with appropriate frame debug notes to describe the effect on the CFA register. As with CLRM, VSCCLRM and VSTR/VLDR, the instruction is modeled as an unspecified operation to the memory pointed to by the base register. *** gcc/ChangeLog *** 2020-01-16 Mihail-Calin Ionescu 2020-01-16 Thomas Preud'homme * config/arm/arm.c (arm_add_cfa_adjust_cfa_note): Declare early. (cmse_nonsecure_call_inline_register_clear): Define new lazy_fpclear variable as true when floating-point ABI is not hard. Replace check against TARGET_HARD_FLOAT_ABI by checks against lazy_fpclear. Generate VLSTM and VLLDM instruction respectively before and after a function call to cmse_nonsecure_call function. * config/arm/unspecs.md (VUNSPEC_VLSTM): Define unspec. (VUNSPEC_VLLDM): Likewise. * config/arm/vfp.md (lazy_store_multiple_insn): New define_insn. (lazy_load_multiple_insn): Likewise. *** gcc/testsuite/ChangeLog *** 2020-01-16 Mihail-Calin Ionescu 2020-01-16 Thomas Preud'homme * gcc.target/arm/cmse/mainline/8_1m/soft/cmse-13.c: Add check for VLSTM and VLLDM. * gcc.target/arm/cmse/mainline/8_1m/soft/cmse-7.c: Likewise. * gcc.target/arm/cmse/mainline/8_1m/soft/cmse-8.c: Likewise. * gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-13.c: Likewise. * gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-7.c: Likewise. * gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-8.c: Likewise. * gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-7.c: Likewise. * gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-8.c: Likewise. --- gcc/ChangeLog | 14 +++++++ gcc/config/arm/arm.c | 40 +++++++++++++++++-- gcc/config/arm/unspecs.md | 4 ++ gcc/config/arm/vfp.md | 24 +++++++++++ gcc/testsuite/ChangeLog | 13 ++++++ .../arm/cmse/mainline/8_1m/soft/cmse-13.c | 2 + .../arm/cmse/mainline/8_1m/soft/cmse-7.c | 2 + .../arm/cmse/mainline/8_1m/soft/cmse-8.c | 2 + .../arm/cmse/mainline/8_1m/softfp-sp/cmse-7.c | 2 + .../arm/cmse/mainline/8_1m/softfp-sp/cmse-8.c | 2 + .../arm/cmse/mainline/8_1m/softfp/cmse-13.c | 2 + .../arm/cmse/mainline/8_1m/softfp/cmse-7.c | 2 + .../arm/cmse/mainline/8_1m/softfp/cmse-8.c | 2 + 13 files changed, 107 insertions(+), 4 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 11938ef2432..fac36a41deb 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,6 +1,20 @@ 2020-01-16 Mihail-Calin Ionescu 2020-01-16 Thomas Preud'homme + * config/arm/arm.c (arm_add_cfa_adjust_cfa_note): Declare early. + (cmse_nonsecure_call_inline_register_clear): Define new lazy_fpclear + variable as true when floating-point ABI is not hard. Replace + check against TARGET_HARD_FLOAT_ABI by checks against lazy_fpclear. + Generate VLSTM and VLLDM instruction respectively before and + after a function call to cmse_nonsecure_call function. + * config/arm/unspecs.md (VUNSPEC_VLSTM): Define unspec. + (VUNSPEC_VLLDM): Likewise. + * config/arm/vfp.md (lazy_store_multiple_insn): New define_insn. + (lazy_load_multiple_insn): Likewise. + +2020-01-16 Mihail-Calin Ionescu +2020-01-16 Thomas Preud'homme + * config/arm/arm.c (vfp_emit_fstmd): Declare early. (arm_emit_vfp_multi_reg_pop): Likewise. (cmse_nonsecure_call_inline_register_clear): Abstract number of VFP diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 2cb2b8ec84b..5e93f52f856 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -186,6 +186,7 @@ static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t); static int arm_memory_move_cost (machine_mode, reg_class_t, bool); static void emit_constant_insn (rtx cond, rtx pattern); static rtx_insn *emit_set_insn (rtx, rtx); +static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx); static rtx emit_multi_reg_push (unsigned long, unsigned long); static void arm_emit_multi_reg_pop (unsigned long); static int vfp_emit_fstmd (int, int); @@ -18283,6 +18284,9 @@ cmse_nonsecure_call_inline_register_clear (void) FOR_BB_INSNS (bb, insn) { bool clear_callee_saved = TARGET_HAVE_FPCXT_CMSE; + /* frame = VFP regs + FPSCR + VPR. */ + unsigned lazy_store_stack_frame_size + = (LAST_VFP_REGNUM - FIRST_VFP_REGNUM + 1 + 2) * UNITS_PER_WORD; unsigned long callee_saved_mask = ((1 << (LAST_HI_REGNUM + 1)) - 1) & ~((1 << (LAST_ARG_REGNUM + 1)) - 1); @@ -18300,7 +18304,7 @@ cmse_nonsecure_call_inline_register_clear (void) CUMULATIVE_ARGS args_so_far_v; cumulative_args_t args_so_far; tree arg_type, fntype; - bool first_param = true; + bool first_param = true, lazy_fpclear = !TARGET_HARD_FLOAT_ABI; function_args_iterator args_iter; uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U}; @@ -18334,7 +18338,7 @@ cmse_nonsecure_call_inline_register_clear (void) -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the lazy store and loads which clear both caller- and callee-saved registers. */ - if (TARGET_HARD_FLOAT_ABI) + if (!lazy_fpclear) { auto_sbitmap float_bitmap (maxregno + 1); @@ -18418,8 +18422,23 @@ cmse_nonsecure_call_inline_register_clear (void) disabled for pop (see below). */ RTX_FRAME_RELATED_P (push_insn) = 0; + /* Lazy store multiple. */ + if (lazy_fpclear) + { + rtx imm; + rtx_insn *add_insn; + + imm = gen_int_mode (- lazy_store_stack_frame_size, SImode); + add_insn = emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, imm)); + arm_add_cfa_adjust_cfa_note (add_insn, + - lazy_store_stack_frame_size, + stack_pointer_rtx, + stack_pointer_rtx); + emit_insn (gen_lazy_store_multiple_insn (stack_pointer_rtx)); + } /* Save VFP callee-saved registers. */ - if (TARGET_HARD_FLOAT_ABI) + else { vfp_emit_fstmd (D7_VFP_REGNUM + 1, (max_fp_regno - D7_VFP_REGNUM) / 2); @@ -18445,8 +18464,21 @@ cmse_nonsecure_call_inline_register_clear (void) start_sequence (); + /* Lazy load multiple done as part of libcall in Armv8-M. */ + if (lazy_fpclear) + { + rtx imm = gen_int_mode (lazy_store_stack_frame_size, SImode); + emit_insn (gen_lazy_load_multiple_insn (stack_pointer_rtx)); + rtx_insn *add_insn = + emit_insn (gen_addsi3 (stack_pointer_rtx, + stack_pointer_rtx, imm)); + arm_add_cfa_adjust_cfa_note (add_insn, + lazy_store_stack_frame_size, + stack_pointer_rtx, + stack_pointer_rtx); + } /* Restore VFP callee-saved registers. */ - if (TARGET_HARD_FLOAT_ABI) + else { int nb_callee_saved_vfp_regs = (max_fp_regno - D7_VFP_REGNUM) / 2; diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 22a14919230..8f4a705f43e 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -243,6 +243,10 @@ VUNSPEC_CLRM_APSR ; Represent the clearing of APSR with clrm instruction. VUNSPEC_VSCCLRM_VPR ; Represent the clearing of VPR with vscclrm ; instruction. + VUNSPEC_VLSTM ; Represent the lazy store multiple with vlstm + ; instruction. + VUNSPEC_VLLDM ; Represent the lazy load multiple with vlldm + ; instruction. ]) ;; Enumerators for NEON unspecs. diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md index 92e8d8fbd8f..930ef462947 100644 --- a/gcc/config/arm/vfp.md +++ b/gcc/config/arm/vfp.md @@ -1673,6 +1673,30 @@ (set_attr "type" "mov_reg")] ) +(define_insn "lazy_store_multiple_insn" + [(set (match_operand:SI 0 "s_register_operand" "+&rk") + (post_dec:SI (match_dup 0))) + (unspec_volatile [(const_int 0) + (mem:SI (post_dec:SI (match_dup 0)))] + VUNSPEC_VLSTM)] + "use_cmse && reload_completed" + "vlstm%?\\t%0" + [(set_attr "predicable" "yes") + (set_attr "type" "store_4")] +) + +(define_insn "lazy_load_multiple_insn" + [(set (match_operand:SI 0 "s_register_operand" "+&rk") + (post_inc:SI (match_dup 0))) + (unspec_volatile:SI [(const_int 0) + (mem:SI (match_dup 0))] + VUNSPEC_VLLDM)] + "use_cmse && reload_completed" + "vlldm%?\\t%0" + [(set_attr "predicable" "yes") + (set_attr "type" "load_4")] +) + (define_insn_and_split "*cmpsf_split_vfp" [(set (reg:CCFP CC_REGNUM) (compare:CCFP (match_operand:SF 0 "s_register_operand" "t") diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 24ebfcb913c..6653b728a15 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,6 +1,19 @@ 2020-01-16 Mihail-Calin Ionescu 2020-01-16 Thomas Preud'homme + * gcc.target/arm/cmse/mainline/8_1m/soft/cmse-13.c: Add check for VLSTM and + VLLDM. + * gcc.target/arm/cmse/mainline/8_1m/soft/cmse-7.c: Likewise. + * gcc.target/arm/cmse/mainline/8_1m/soft/cmse-8.c: Likewise. + * gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-13.c: Likewise. + * gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-7.c: Likewise. + * gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-8.c: Likewise. + * gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-7.c: Likewise. + * gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-8.c: Likewise. + +2020-01-16 Mihail-Calin Ionescu +2020-01-16 Thomas Preud'homme + * gcc.target/arm/cmse/mainline/8_1m/hard-sp/cmse-13.c: Add check for VPUSH and VPOP and update expectation for VSCCLRM. * gcc.target/arm/cmse/mainline/8_1m/hard-sp/cmse-7.c: Likewise. diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-13.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-13.c index 07a6719b4f1..52d22427de7 100644 --- a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-13.c +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-13.c @@ -11,7 +11,9 @@ /* { dg-final { scan-assembler-not "mov\tr2, r4" } } */ /* { dg-final { scan-assembler-not "mov\tr3, r4" } } */ /* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ /* { dg-final { scan-assembler "clrm\t\{r1, r5, r6, r7, r8, r9, r10, fp, ip, APSR\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ /* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ /* { dg-final { scan-assembler-not "vmov" } } */ /* { dg-final { scan-assembler-not "vmsr" } } */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-7.c index ca2961ac18c..40026d5ee1c 100644 --- a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-7.c +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-7.c @@ -8,7 +8,9 @@ /* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ /* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ /* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ /* { dg-final { scan-assembler "clrm\t\{r0, r1, r2, r3, r5, r6, r7, r8, r9, r10, fp, ip, APSR\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ /* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ /* { dg-final { scan-assembler-not "vmov" } } */ /* { dg-final { scan-assembler-not "vmsr" } } */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-8.c index 7a1abb51fcf..6edc1f6ed7e 100644 --- a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-8.c +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/soft/cmse-8.c @@ -10,7 +10,9 @@ /* { dg-final { scan-assembler-not "mov\tr0, r4" } } */ /* { dg-final { scan-assembler-not "mov\tr1, r4" } } */ /* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ /* { dg-final { scan-assembler "clrm\t\{r2, r3, r5, r6, r7, r8, r9, r10, fp, ip, APSR\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ /* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ /* { dg-final { scan-assembler-not "vmov" } } */ /* { dg-final { scan-assembler-not "vmsr" } } */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-7.c index 90aadffb7aa..8d05576add9 100644 --- a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-7.c +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-7.c @@ -9,7 +9,9 @@ /* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ /* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ /* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ /* { dg-final { scan-assembler "clrm\t\{r0, r1, r2, r3, r5, r6, r7, r8, r9, r10, fp, ip, APSR\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ /* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ /* Now we check that we use the correct intrinsic to call. */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-8.c index 28f2e86dfaa..1f0a1474278 100644 --- a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-8.c +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp-sp/cmse-8.c @@ -11,7 +11,9 @@ /* { dg-final { scan-assembler-not "mov\tr0, r4" } } */ /* { dg-final { scan-assembler-not "mov\tr1, r4" } } */ /* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ /* { dg-final { scan-assembler "clrm\t\{r2, r3, r5, r6, r7, r8, r9, r10, fp, ip, APSR\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ /* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ /* Now we check that we use the correct intrinsic to call. */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-13.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-13.c index 15d3b682c79..84279418108 100644 --- a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-13.c +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-13.c @@ -12,7 +12,9 @@ /* { dg-final { scan-assembler-not "mov\tr2, r4" } } */ /* { dg-final { scan-assembler-not "mov\tr3, r4" } } */ /* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ /* { dg-final { scan-assembler "clrm\t\{r1, r5, r6, r7, r8, r9, r10, fp, ip, APSR\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ /* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ /* Now we check that we use the correct intrinsic to call. */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-7.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-7.c index 3d48859028a..38c9d545703 100644 --- a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-7.c +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-7.c @@ -9,7 +9,9 @@ /* { dg-final { scan-assembler "lsrs\tr4, r4, #1" } } */ /* { dg-final { scan-assembler "lsls\tr4, r4, #1" } } */ /* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ /* { dg-final { scan-assembler "clrm\t\{r0, r1, r2, r3, r5, r6, r7, r8, r9, r10, fp, ip, APSR\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ /* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ /* Now we check that we use the correct intrinsic to call. */ diff --git a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-8.c b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-8.c index 0e2dcae3692..6a17bd322fc 100644 --- a/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-8.c +++ b/gcc/testsuite/gcc.target/arm/cmse/mainline/8_1m/softfp/cmse-8.c @@ -11,7 +11,9 @@ /* { dg-final { scan-assembler-not "mov\tr0, r4" } } */ /* { dg-final { scan-assembler-not "mov\tr1, r4" } } */ /* { dg-final { scan-assembler "push\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ +/* { dg-final { scan-assembler "vlstm\tsp" } } */ /* { dg-final { scan-assembler "clrm\t\{r2, r3, r5, r6, r7, r8, r9, r10, fp, ip, APSR\}" } } */ +/* { dg-final { scan-assembler "vlldm\tsp" } } */ /* { dg-final { scan-assembler "pop\t\{r4, r5, r6, r7, r8, r9, r10, fp\}" } } */ /* Now we check that we use the correct intrinsic to call. */ -- 2.30.2