From 43e06d03ef96cfb4ddce46edc29cc7c7b82bf2f0 Mon Sep 17 00:00:00 2001 From: Ramana Radhakrishnan Date: Mon, 10 Aug 2015 10:06:28 +0000 Subject: [PATCH] [AArch64] Recommit correct version for improving TLS descriptor pattern 2015-08-06 Ramana Radhakrishnan Jiong Wang gcc/ * config/aarch64/aarch64.d (tlsdesc_small_pseudo_): New pattern. * config/aarch64/aarch64.h (reg_class): New enumeration FIXED_REG0. (REG_CLASS_NAMES): Likewise. (REG_CLASS_CONTENTS): Likewise. * config/aarch64/aarch64.c (aarch64_class_max_nregs): Likewise. (aarch64_register_move_cost): Likewise. (aarch64_load_symref_appropriately): Invoke the new added pattern if possible. * config/aarch64/constraints.md (Uc0): New constraint. gcc/testsuite/ * gcc.target/aarch64/tlsdesc_hoist.c: New testcase. From-SVN: r226757 --- gcc/config/aarch64/aarch64.c | 34 ++++++++++++++----- gcc/config/aarch64/aarch64.h | 3 ++ gcc/config/aarch64/aarch64.md | 19 +++++++++++ gcc/config/aarch64/constraints.md | 3 ++ .../gcc.target/aarch64/tlsdesc_hoist.c | 22 ++++++++++++ 5 files changed, 73 insertions(+), 8 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index e991a494cfc..35f063ea19b 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1044,22 +1044,39 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm, { machine_mode mode = GET_MODE (dest); rtx x0 = gen_rtx_REG (mode, R0_REGNUM); + rtx offset; rtx tp; gcc_assert (mode == Pmode || mode == ptr_mode); - /* In ILP32, the got entry is always of SImode size. Unlike - small GOT, the dest is fixed at reg 0. */ - if (TARGET_ILP32) - emit_insn (gen_tlsdesc_small_si (imm)); + if (can_create_pseudo_p ()) + { + rtx reg = gen_reg_rtx (mode); + + if (TARGET_ILP32) + emit_insn (gen_tlsdesc_small_pseudo_si (reg, imm)); + else + emit_insn (gen_tlsdesc_small_pseudo_di (reg, imm)); + + offset = reg; + } else - emit_insn (gen_tlsdesc_small_di (imm)); + { + /* In ILP32, the got entry is always of SImode size. Unlike + small GOT, the dest is fixed at reg 0. */ + if (TARGET_ILP32) + emit_insn (gen_tlsdesc_small_si (imm)); + else + emit_insn (gen_tlsdesc_small_di (imm)); + + offset = x0; + } tp = aarch64_load_tp (NULL); if (mode != Pmode) tp = gen_lowpart (mode, tp); - emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0))); + emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, offset))); set_unique_reg_note (get_last_insn (), REG_EQUIV, imm); return; } @@ -5105,6 +5122,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode) aarch64_vector_mode_p (mode) ? (GET_MODE_SIZE (mode) + UNITS_PER_VREG - 1) / UNITS_PER_VREG : (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; + case FIXED_REG0: case STACK_REG: return 1; @@ -6973,10 +6991,10 @@ aarch64_register_move_cost (machine_mode mode, = aarch64_tune_params.regmove_cost; /* Caller save and pointer regs are equivalent to GENERAL_REGS. */ - if (to == CALLER_SAVE_REGS || to == POINTER_REGS) + if (to == CALLER_SAVE_REGS || to == POINTER_REGS || to == FIXED_REG0) to = GENERAL_REGS; - if (from == CALLER_SAVE_REGS || from == POINTER_REGS) + if (from == CALLER_SAVE_REGS || from == POINTER_REGS || from == FIXED_REG0) from = GENERAL_REGS; /* Moving between GPR and stack cost is the same as GP2GP. */ diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h index 721927ffdc7..43ff895c072 100644 --- a/gcc/config/aarch64/aarch64.h +++ b/gcc/config/aarch64/aarch64.h @@ -401,6 +401,7 @@ extern unsigned aarch64_architecture_version; enum reg_class { NO_REGS, + FIXED_REG0, CALLER_SAVE_REGS, GENERAL_REGS, STACK_REG, @@ -416,6 +417,7 @@ enum reg_class #define REG_CLASS_NAMES \ { \ "NO_REGS", \ + "FIXED_REG0" \ "CALLER_SAVE_REGS", \ "GENERAL_REGS", \ "STACK_REG", \ @@ -428,6 +430,7 @@ enum reg_class #define REG_CLASS_CONTENTS \ { \ { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS */ \ + { 0x00000001, 0x00000000, 0x00000000 }, /* FIXED_REG0 */ \ { 0x0007ffff, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */ \ { 0x7fffffff, 0x00000000, 0x00000003 }, /* GENERAL_REGS */ \ { 0x80000000, 0x00000000, 0x00000000 }, /* STACK_REG */ \ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 7f997532895..35255e91a95 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -4549,6 +4549,25 @@ [(set_attr "type" "call") (set_attr "length" "16")]) +;; The same as tlsdesc_small_ with hard register hiding. +;; The first operand is actually x0, while we wrap it under a delicated +;; register class so that before register allocation, it's seen as pseudo +;; register. The reason for doing this is we don't expose hard register X0 +;; as the destination of set as it will cause trouble for RTL loop iv. +;; RTL loop iv will abort ongoing optimization once it finds there is hard reg +;; as destination of set. +(define_insn "tlsdesc_small_pseudo_" + [(set (match_operand:PTR 0 "register_operand" "=Uc0") + (unspec:PTR [(match_operand 1 "aarch64_valid_symref" "S")] + UNSPEC_TLSDESC)) + (clobber (reg:DI LR_REGNUM)) + (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:DI 2 "=r"))] + "TARGET_TLS_DESC" + "adrp\\t0, %A1\;ldr\\t%2, [%0, #%L1]\;add\\t%0, %0, %L1\;.tlsdesccall\\t%1\;blr\\t%2" + [(set_attr "type" "call") + (set_attr "length" "16")]) + (define_insn "stack_tie" [(set (mem:BLK (scratch)) (unspec:BLK [(match_operand:DI 0 "register_operand" "rk") diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md index 9dc21089154..7b410e74c64 100644 --- a/gcc/config/aarch64/constraints.md +++ b/gcc/config/aarch64/constraints.md @@ -24,6 +24,9 @@ (define_register_constraint "Ucs" "CALLER_SAVE_REGS" "@internal The caller save registers.") +(define_register_constraint "Uc0" "FIXED_REG0" + "@internal Represent X0/W0.") + (define_register_constraint "w" "FP_REGS" "Floating point and SIMD vector registers.") diff --git a/gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c b/gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c new file mode 100644 index 00000000000..a1fd3b0f09f --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/tlsdesc_hoist.c @@ -0,0 +1,22 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target tls_native } */ +/* { dg-options "-O2 -fpic -fdump-rtl-loop2_invariant" } */ +/* { dg-skip-if "-mcmodel=large, no support for -fpic" { aarch64-*-* } { "-mcmodel=large" } { "" } } */ + +int cal (int, int); +__thread int tls_data; + +int +foo (int bound) +{ + int i = 0; + int sum = 0; + + for (i; i < bound; i++) + sum = cal (sum, tls_data); + + return sum; +} + +/* Insn sequences for TLS descriptor should be hoisted out of the loop. */ +/* { dg-final { scan-rtl-dump "Decided" "loop2_invariant" } } */ -- 2.30.2