From d4f9e81976066e1d67c8dd5ddaf24ebe3b0695ed Mon Sep 17 00:00:00 2001 From: Richard Sandiford Date: Wed, 30 Sep 2020 11:52:06 +0100 Subject: [PATCH] aarch64: Tweak movti and movtf patterns movti lacked an way of zeroing an FPR, meaning that we'd do: mov x0, 0 mov x1, 0 fmov d0, x0 fmov v0.d[1], x1 instead of just: movi v0.2d, #0 movtf had the opposite problem for GPRs: we'd generate: movi v0.2d, #0 fmov x0, d0 fmov x1, v0.d[1] instead of just: mov x0, 0 mov x1, 0 Also, there was an unnecessary earlyclobber on the GPR<-GPR movtf alternative (but not the movti one). The splitter handles overlap correctly. The TF splitter used aarch64_reg_or_imm, but the _imm part only accepts integer constants, not floating-point ones. The patch changes it to nonmemory_operand instead. gcc/ * config/aarch64/aarch64.c (aarch64_split_128bit_move_p): Add a function comment. Tighten check for FP moves. * config/aarch64/aarch64.md (*movti_aarch64): Add a w<-Z alternative. (*movtf_aarch64): Handle r<-Y like r<-r. Remove unnecessary earlyclobber. Change splitter predicate from aarch64_reg_or_imm to nonmemory_operand. gcc/testsuite/ * gcc.target/aarch64/movtf_1.c: New test. * gcc.target/aarch64/movti_1.c: Likewise. --- gcc/config/aarch64/aarch64.c | 9 ++- gcc/config/aarch64/aarch64.md | 17 +++-- gcc/testsuite/gcc.target/aarch64/movtf_1.c | 87 ++++++++++++++++++++++ gcc/testsuite/gcc.target/aarch64/movti_1.c | 87 ++++++++++++++++++++++ 4 files changed, 190 insertions(+), 10 deletions(-) create mode 100644 gcc/testsuite/gcc.target/aarch64/movtf_1.c create mode 100644 gcc/testsuite/gcc.target/aarch64/movti_1.c diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index 491fc582dab..9e88438b3c3 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -3422,11 +3422,16 @@ aarch64_split_128bit_move (rtx dst, rtx src) } } +/* Return true if we should split a move from 128-bit value SRC + to 128-bit register DEST. */ + bool aarch64_split_128bit_move_p (rtx dst, rtx src) { - return (! REG_P (src) - || ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src)))); + if (FP_REGNUM_P (REGNO (dst))) + return REG_P (src) && !FP_REGNUM_P (REGNO (src)); + /* All moves to GPRs need to be split. */ + return true; } /* Split a complex SIMD combine. */ diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index 19ec9e33f9f..78fe7c43a00 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -1361,13 +1361,14 @@ (define_insn "*movti_aarch64" [(set (match_operand:TI 0 - "nonimmediate_operand" "= r,w, r,w,r,m,m,w,m") + "nonimmediate_operand" "= r,w,w, r,w,r,m,m,w,m") (match_operand:TI 1 - "aarch64_movti_operand" " rUti,r, w,w,m,r,Z,m,w"))] + "aarch64_movti_operand" " rUti,Z,r, w,w,m,r,Z,m,w"))] "(register_operand (operands[0], TImode) || aarch64_reg_or_zero (operands[1], TImode))" "@ # + movi\\t%0.2d, #0 # # mov\\t%0.16b, %1.16b @@ -1376,11 +1377,11 @@ stp\\txzr, xzr, %0 ldr\\t%q0, %1 str\\t%q1, %0" - [(set_attr "type" "multiple,f_mcr,f_mrc,neon_logic_q, \ + [(set_attr "type" "multiple,neon_move,f_mcr,f_mrc,neon_logic_q, \ load_16,store_16,store_16,\ load_16,store_16") - (set_attr "length" "8,8,8,4,4,4,4,4,4") - (set_attr "arch" "*,*,*,simd,*,*,*,fp,fp")] + (set_attr "length" "8,4,8,8,4,4,4,4,4,4") + (set_attr "arch" "*,simd,*,*,simd,*,*,*,fp,fp")] ) ;; Split a TImode register-register or register-immediate move into @@ -1511,9 +1512,9 @@ (define_insn "*movtf_aarch64" [(set (match_operand:TF 0 - "nonimmediate_operand" "=w,?&r,w ,?r,w,?w,w,m,?r,m ,m") + "nonimmediate_operand" "=w,?r ,w ,?r,w,?w,w,m,?r,m ,m") (match_operand:TF 1 - "general_operand" " w,?r, ?r,w ,Y,Y ,m,w,m ,?r,Y"))] + "general_operand" " w,?rY,?r,w ,Y,Y ,m,w,m ,?r,Y"))] "TARGET_FLOAT && (register_operand (operands[0], TFmode) || aarch64_reg_or_fp_zero (operands[1], TFmode))" "@ @@ -1536,7 +1537,7 @@ (define_split [(set (match_operand:TF 0 "register_operand" "") - (match_operand:TF 1 "aarch64_reg_or_imm" ""))] + (match_operand:TF 1 "nonmemory_operand" ""))] "reload_completed && aarch64_split_128bit_move_p (operands[0], operands[1])" [(const_int 0)] { diff --git a/gcc/testsuite/gcc.target/aarch64/movtf_1.c b/gcc/testsuite/gcc.target/aarch64/movtf_1.c new file mode 100644 index 00000000000..570de931389 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/movtf_1.c @@ -0,0 +1,87 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** zero_q: +** movi v0.2d, #0 +** ret +*/ +void +zero_q () +{ + register _Float128 q0 asm ("q0"); + q0 = 0; + asm volatile ("" :: "w" (q0)); +} + +/* +** load_q: +** ldr q0, \[x0\] +** ret +*/ +void +load_q (_Float128 *ptr) +{ + register _Float128 q0 asm ("q0"); + q0 = *ptr; + asm volatile ("" :: "w" (q0)); +} + +/* +** store_q: +** str q0, \[x0\] +** ret +*/ +void +store_q (_Float128 *ptr) +{ + register _Float128 q0 asm ("q0"); + asm volatile ("" : "=w" (q0)); + *ptr = q0; +} + +/* +** zero_x: +** ( +** mov x0, #?0 +** mov x1, #?0 +** | +** mov x1, #?0 +** mov x0, #?0 +** ) +** ret +*/ +void +zero_x () +{ + register _Float128 x0 asm ("x0"); + x0 = 0; + asm volatile ("" :: "r" (x0)); +} + +/* +** load_x: +** ldp x2, x3, \[x0\] +** ret +*/ +void +load_x (_Float128 *ptr) +{ + register _Float128 x2 asm ("x2"); + x2 = *ptr; + asm volatile ("" :: "r" (x2)); +} + +/* +** store_x: +** stp x2, x3, \[x0\] +** ret +*/ +void +store_x (_Float128 *ptr) +{ + register _Float128 x2 asm ("x2"); + asm volatile ("" : "=r" (x2)); + *ptr = x2; +} diff --git a/gcc/testsuite/gcc.target/aarch64/movti_1.c b/gcc/testsuite/gcc.target/aarch64/movti_1.c new file mode 100644 index 00000000000..160e1acd281 --- /dev/null +++ b/gcc/testsuite/gcc.target/aarch64/movti_1.c @@ -0,0 +1,87 @@ +/* { dg-do compile } */ +/* { dg-options "-O" } */ +/* { dg-final { check-function-bodies "**" "" } } */ + +/* +** zero_q: +** movi v0.2d, #0 +** ret +*/ +void +zero_q () +{ + register __int128_t q0 asm ("q0"); + q0 = 0; + asm volatile ("" :: "w" (q0)); +} + +/* +** load_q: +** ldr q0, \[x0\] +** ret +*/ +void +load_q (__int128_t *ptr) +{ + register __int128_t q0 asm ("q0"); + q0 = *ptr; + asm volatile ("" :: "w" (q0)); +} + +/* +** store_q: +** str q0, \[x0\] +** ret +*/ +void +store_q (__int128_t *ptr) +{ + register __int128_t q0 asm ("q0"); + asm volatile ("" : "=w" (q0)); + *ptr = q0; +} + +/* +** zero_x: +** ( +** mov x0, #?0 +** mov x1, #?0 +** | +** mov x1, #?0 +** mov x0, #?0 +** ) +** ret +*/ +void +zero_x () +{ + register __int128_t x0 asm ("x0"); + x0 = 0; + asm volatile ("" :: "r" (x0)); +} + +/* +** load_x: +** ldp x2, x3, \[x0\] +** ret +*/ +void +load_x (__int128_t *ptr) +{ + register __int128_t x2 asm ("x2"); + x2 = *ptr; + asm volatile ("" :: "r" (x2)); +} + +/* +** store_x: +** stp x2, x3, \[x0\] +** ret +*/ +void +store_x (__int128_t *ptr) +{ + register __int128_t x2 asm ("x2"); + asm volatile ("" : "=r" (x2)); + *ptr = x2; +} -- 2.30.2