From 43705f3fa343e08b2fb030460fc5e2a969954398 Mon Sep 17 00:00:00 2001 From: Kyrylo Tkachov Date: Thu, 21 Jan 2021 14:06:16 +0000 Subject: [PATCH] aarch64: Use canonical RTL for sqdmlal patterns The aarch64_sqdmll patterns are of the form: [(set (match_operand: 0 "register_operand" "=w") (SBINQOPS: (match_operand: 1 "register_operand" "0") (ss_ashift: (mult: (sign_extend: (match_operand:VSD_HSI 2 "register_operand" "w")) (sign_extend: (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] where SBINQOPS is ss_plus and ss_minus. The problem is that for the ss_plus case the RTL is not canonical: the (match_oprand 1) should be the second arm of the PLUS. I've seen this manifest in combine missing some legitimate simplifications because it generates the canonical ss_plus form and fails to match the pattern. This patch splits the patterns into the ss_plus and ss_minus forms with the canonical form for each. I've seen this improve my testcase (which I can't include as it's too large and not easy to test reliably). gcc/ChangeLog: * config/aarch64/aarch64-simd.md (aarch64_sqdmll): Split into... (aarch64_sqdmlal): ... This... (aarch64_sqdmlsl): ... And this. (aarch64_sqdmll_lane): Split into... (aarch64_sqdmlal_lane): ... This... (aarch64_sqdmlsl_lane): ... And this. (aarch64_sqdmll_laneq): Split into... (aarch64_sqdmlsl_laneq): ... This... (aarch64_sqdmlal_laneq): ... And this. (aarch64_sqdmll_n): Split into... (aarch64_sqdmlsl_n): ... This... (aarch64_sqdmlal_n): ... And this. (aarch64_sqdmll2_internal): Split into... (aarch64_sqdmlal2_internal): ... This... (aarch64_sqdmlsl2_internal): ... And this. --- gcc/config/aarch64/aarch64-simd.md | 193 +++++++++++++++++++++++++---- 1 file changed, 172 insertions(+), 21 deletions(-) diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md index d7acd72bede..be2a5a86517 100644 --- a/gcc/config/aarch64/aarch64-simd.md +++ b/gcc/config/aarch64/aarch64-simd.md @@ -4155,9 +4155,25 @@ ;; vqdml[sa]l -(define_insn "aarch64_sqdmll" +(define_insn "aarch64_sqdmlal" [(set (match_operand: 0 "register_operand" "=w") - (SBINQOPS: + (ss_plus: + (ss_ashift: + (mult: + (sign_extend: + (match_operand:VSD_HSI 2 "register_operand" "w")) + (sign_extend: + (match_operand:VSD_HSI 3 "register_operand" "w"))) + (const_int 1)) + (match_operand: 1 "register_operand" "0")))] + "TARGET_SIMD" + "sqdmlal\\t%0, %2, %3" + [(set_attr "type" "neon_sat_mla__long")] +) + +(define_insn "aarch64_sqdmlsl" + [(set (match_operand: 0 "register_operand" "=w") + (ss_minus: (match_operand: 1 "register_operand" "0") (ss_ashift: (mult: @@ -4167,15 +4183,39 @@ (match_operand:VSD_HSI 3 "register_operand" "w"))) (const_int 1))))] "TARGET_SIMD" - "sqdmll\\t%0, %2, %3" + "sqdmlsl\\t%0, %2, %3" [(set_attr "type" "neon_sat_mla__long")] ) ;; vqdml[sa]l_lane -(define_insn "aarch64_sqdmll_lane" +(define_insn "aarch64_sqdmlal_lane" [(set (match_operand: 0 "register_operand" "=w") - (SBINQOPS: + (ss_plus: + (ss_ashift: + (mult: + (sign_extend: + (match_operand:VD_HSI 2 "register_operand" "w")) + (sign_extend: + (vec_duplicate:VD_HSI + (vec_select: + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + )) + (const_int 1)) + (match_operand: 1 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[4] = aarch64_endian_lane_rtx (mode, INTVAL (operands[4])); + return + "sqdmlal\\t%0, %2, %3.[%4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_insn "aarch64_sqdmlsl_lane" + [(set (match_operand: 0 "register_operand" "=w") + (ss_minus: (match_operand: 1 "register_operand" "0") (ss_ashift: (mult: @@ -4192,14 +4232,15 @@ { operands[4] = aarch64_endian_lane_rtx (mode, INTVAL (operands[4])); return - "sqdmll\\t%0, %2, %3.[%4]"; + "sqdmlsl\\t%0, %2, %3.[%4]"; } [(set_attr "type" "neon_sat_mla__scalar_long")] ) -(define_insn "aarch64_sqdmll_laneq" + +(define_insn "aarch64_sqdmlsl_laneq" [(set (match_operand: 0 "register_operand" "=w") - (SBINQOPS: + (ss_minus: (match_operand: 1 "register_operand" "0") (ss_ashift: (mult: @@ -4216,14 +4257,62 @@ { operands[4] = aarch64_endian_lane_rtx (mode, INTVAL (operands[4])); return - "sqdmll\\t%0, %2, %3.[%4]"; + "sqdmlsl\\t%0, %2, %3.[%4]"; } [(set_attr "type" "neon_sat_mla__scalar_long")] ) -(define_insn "aarch64_sqdmll_lane" +(define_insn "aarch64_sqdmlal_laneq" [(set (match_operand: 0 "register_operand" "=w") - (SBINQOPS: + (ss_plus: + (ss_ashift: + (mult: + (sign_extend: + (match_operand:VD_HSI 2 "register_operand" "w")) + (sign_extend: + (vec_duplicate:VD_HSI + (vec_select: + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + )) + (const_int 1)) + (match_operand: 1 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[4] = aarch64_endian_lane_rtx (mode, INTVAL (operands[4])); + return + "sqdmlal\\t%0, %2, %3.[%4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + + +(define_insn "aarch64_sqdmlal_lane" + [(set (match_operand: 0 "register_operand" "=w") + (ss_plus: + (ss_ashift: + (mult: + (sign_extend: + (match_operand:SD_HSI 2 "register_operand" "w")) + (sign_extend: + (vec_select: + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + ) + (const_int 1)) + (match_operand: 1 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[4] = aarch64_endian_lane_rtx (mode, INTVAL (operands[4])); + return + "sqdmlal\\t%0, %2, %3.[%4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_insn "aarch64_sqdmlsl_lane" + [(set (match_operand: 0 "register_operand" "=w") + (ss_minus: (match_operand: 1 "register_operand" "0") (ss_ashift: (mult: @@ -4239,14 +4328,38 @@ { operands[4] = aarch64_endian_lane_rtx (mode, INTVAL (operands[4])); return - "sqdmll\\t%0, %2, %3.[%4]"; + "sqdmlsl\\t%0, %2, %3.[%4]"; } [(set_attr "type" "neon_sat_mla__scalar_long")] ) -(define_insn "aarch64_sqdmll_laneq" + +(define_insn "aarch64_sqdmlal_laneq" [(set (match_operand: 0 "register_operand" "=w") - (SBINQOPS: + (ss_plus: + (ss_ashift: + (mult: + (sign_extend: + (match_operand:SD_HSI 2 "register_operand" "w")) + (sign_extend: + (vec_select: + (match_operand: 3 "register_operand" "") + (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) + ) + (const_int 1)) + (match_operand: 1 "register_operand" "0")))] + "TARGET_SIMD" + { + operands[4] = aarch64_endian_lane_rtx (mode, INTVAL (operands[4])); + return + "sqdmlal\\t%0, %2, %3.[%4]"; + } + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_insn "aarch64_sqdmlsl_laneq" + [(set (match_operand: 0 "register_operand" "=w") + (ss_minus: (match_operand: 1 "register_operand" "0") (ss_ashift: (mult: @@ -4262,16 +4375,16 @@ { operands[4] = aarch64_endian_lane_rtx (mode, INTVAL (operands[4])); return - "sqdmll\\t%0, %2, %3.[%4]"; + "sqdmlsl\\t%0, %2, %3.[%4]"; } [(set_attr "type" "neon_sat_mla__scalar_long")] ) ;; vqdml[sa]l_n -(define_insn "aarch64_sqdmll_n" +(define_insn "aarch64_sqdmlsl_n" [(set (match_operand: 0 "register_operand" "=w") - (SBINQOPS: + (ss_minus: (match_operand: 1 "register_operand" "0") (ss_ashift: (mult: @@ -4282,15 +4395,53 @@ (match_operand: 3 "register_operand" "")))) (const_int 1))))] "TARGET_SIMD" - "sqdmll\\t%0, %2, %3.[0]" + "sqdmlsl\\t%0, %2, %3.[0]" + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_insn "aarch64_sqdmlal_n" + [(set (match_operand: 0 "register_operand" "=w") + (ss_plus: + (ss_ashift: + (mult: + (sign_extend: + (match_operand:VD_HSI 2 "register_operand" "w")) + (sign_extend: + (vec_duplicate:VD_HSI + (match_operand: 3 "register_operand" "")))) + (const_int 1)) + (match_operand: 1 "register_operand" "0")))] + "TARGET_SIMD" + "sqdmlal\\t%0, %2, %3.[0]" [(set_attr "type" "neon_sat_mla__scalar_long")] ) + ;; sqdml[as]l2 -(define_insn "aarch64_sqdmll2_internal" +(define_insn "aarch64_sqdmlal2_internal" [(set (match_operand: 0 "register_operand" "=w") - (SBINQOPS: + (ss_plus: + (ss_ashift: + (mult: + (sign_extend: + (vec_select: + (match_operand:VQ_HSI 2 "register_operand" "w") + (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" ""))) + (sign_extend: + (vec_select: + (match_operand:VQ_HSI 3 "register_operand" "w") + (match_dup 4)))) + (const_int 1)) + (match_operand: 1 "register_operand" "0")))] + "TARGET_SIMD" + "sqdmlal2\\t%0, %2, %3" + [(set_attr "type" "neon_sat_mla__scalar_long")] +) + +(define_insn "aarch64_sqdmlsl2_internal" + [(set (match_operand: 0 "register_operand" "=w") + (ss_minus: (match_operand: 1 "register_operand" "0") (ss_ashift: (mult: @@ -4304,7 +4455,7 @@ (match_dup 4)))) (const_int 1))))] "TARGET_SIMD" - "sqdmll2\\t%0, %2, %3" + "sqdmlsl2\\t%0, %2, %3" [(set_attr "type" "neon_sat_mla__scalar_long")] ) -- 2.30.2