From 41dab855dce20d5d7042c9330dd8124d0ece19c0 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Thu, 6 Jun 2019 13:59:07 +0000 Subject: [PATCH] [AArch64] PR tree-optimization/90332: Implement vec_init where N is a vector mode This patch fixes the failing gcc.dg/vect/slp-reduc-sad-2.c testcase on aarch64 by implementing a vec_init optab that can handle two half-width vectors producing a full-width one by concatenating them. In the gcc.dg/vect/slp-reduc-sad-2.c case it's a V8QI reg concatenated with a V8QI const_vector of zeroes. This can be implemented efficiently using the aarch64_combinez pattern that just loads a D-register to make use of the implicit zero-extending semantics of that load. Otherwise it concatenates the two vector using aarch64_simd_combine. With this patch I'm seeing the effect from richi's original patch that added gcc.dg/vect/slp-reduc-sad-2.c on aarch64 and 525.x264_r improves by about 1.5%. PR tree-optimization/90332 * config/aarch64/aarch64.c (aarch64_expand_vector_init): Handle VALS containing two vectors. * config/aarch64/aarch64-simd.md (*aarch64_combinez): Rename to... (@aarch64_combinez): ... This. (*aarch64_combinez_be): Rename to... (@aarch64_combinez_be): ... This. (vec_init): New define_expand. * config/aarch64/iterators.md (Vhalf): Handle V8HF. From-SVN: r272002 --- gcc/ChangeLog | 13 ++++++++++ gcc/config/aarch64/aarch64-simd.md | 13 ++++++++-- gcc/config/aarch64/aarch64.c | 39 ++++++++++++++++++++++++++++++ gcc/config/aarch64/iterators.md | 1 + 4 files changed, 64 insertions(+), 2 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 41e0c17f33c..ae15b05c65f 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,16 @@ +2019-06-06 Kyrylo Tkachov + + PR tree-optimization/90332 + * config/aarch64/aarch64.c (aarch64_expand_vector_init): + Handle VALS containing two vectors. + * config/aarch64/aarch64-simd.md (*aarch64_combinez): Rename + to... + (@aarch64_combinez): ... This. + (*aarch64_combinez_be): Rename to... + (@aarch64_combinez_be): ... This. + (vec_init): New define_expand. + * config/aarch64/iterators.md (Vhalf): Handle V8HF. + 2019-06-06 Jozef Lawrynowicz * config/msp430/msp430.md (ashlhi3): Use the const_variant of shift diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index b648e9e7916..df8bf1d9778 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -3216,7 +3216,7 @@ ;; In this insn, operand 1 should be low, and operand 2 the high part of the ;; dest vector. -(define_insn "*aarch64_combinez" +(define_insn "@aarch64_combinez" [(set (match_operand: 0 "register_operand" "=w,w,w") (vec_concat: (match_operand:VDC 1 "general_operand" "w,?r,m") @@ -3230,7 +3230,7 @@ (set_attr "arch" "simd,fp,simd")] ) -(define_insn "*aarch64_combinez_be" +(define_insn "@aarch64_combinez_be" [(set (match_operand: 0 "register_operand" "=w,w,w") (vec_concat: (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero") @@ -5959,6 +5959,15 @@ DONE; }) +(define_expand "vec_init" + [(match_operand:VQ_NO2E 0 "register_operand" "") + (match_operand 1 "" "")] + "TARGET_SIMD" +{ + aarch64_expand_vector_init (operands[0], operands[1]); + DONE; +}) + (define_insn "*aarch64_simd_ld1r" [(set (match_operand:VALL_F16 0 "register_operand" "=w") (vec_duplicate:VALL_F16 diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index b3c42e210c5..9a035dd9ed8 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -15106,6 +15106,45 @@ aarch64_expand_vector_init (rtx target, rtx vals) rtx v0 = XVECEXP (vals, 0, 0); bool all_same = true; + /* This is a special vec_init where N is not an element mode but a + vector mode with half the elements of M. We expect to find two entries + of mode N in VALS and we must put their concatentation into TARGET. */ + if (XVECLEN (vals, 0) == 2 && VECTOR_MODE_P (GET_MODE (XVECEXP (vals, 0, 0)))) + { + gcc_assert (known_eq (GET_MODE_SIZE (mode), + 2 * GET_MODE_SIZE (GET_MODE (XVECEXP (vals, 0, 0))))); + rtx lo = XVECEXP (vals, 0, 0); + rtx hi = XVECEXP (vals, 0, 1); + machine_mode narrow_mode = GET_MODE (lo); + gcc_assert (GET_MODE_INNER (narrow_mode) == inner_mode); + gcc_assert (narrow_mode == GET_MODE (hi)); + + /* When we want to concatenate a half-width vector with zeroes we can + use the aarch64_combinez[_be] patterns. Just make sure that the + zeroes are in the right half. */ + if (BYTES_BIG_ENDIAN + && aarch64_simd_imm_zero (lo, narrow_mode) + && general_operand (hi, narrow_mode)) + emit_insn (gen_aarch64_combinez_be (narrow_mode, target, hi, lo)); + else if (!BYTES_BIG_ENDIAN + && aarch64_simd_imm_zero (hi, narrow_mode) + && general_operand (lo, narrow_mode)) + emit_insn (gen_aarch64_combinez (narrow_mode, target, lo, hi)); + else + { + /* Else create the two half-width registers and combine them. */ + if (!REG_P (lo)) + lo = force_reg (GET_MODE (lo), lo); + if (!REG_P (hi)) + hi = force_reg (GET_MODE (hi), hi); + + if (BYTES_BIG_ENDIAN) + std::swap (lo, hi); + emit_insn (gen_aarch64_simd_combine (narrow_mode, target, lo, hi)); + } + return; + } + /* Count the number of variable elements to initialise. */ for (int i = 0; i < n_elts; ++i) { diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index d0070b1a732..2179e6f21db 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -768,6 +768,7 @@ ;; Half modes of all vector modes, in lower-case. (define_mode_attr Vhalf [(V8QI "v4qi") (V16QI "v8qi") (V4HI "v2hi") (V8HI "v4hi") + (V8HF "v4hf") (V2SI "si") (V4SI "v2si") (V2DI "di") (V2SF "sf") (V4SF "v2sf") (V2DF "df")]) -- 2.30.2