;; predicate for the first alternative, but using Upa or X isn't likely
;; to gain much and would make the instruction seem less uniform to the
;; register allocator.
-(define_insn "*mul<mode>3"
+(define_insn_and_split "*mul<mode>3"
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
(unspec:SVE_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
"@
- mul\t%0.<Vetype>, %0.<Vetype>, #%3
+ #
mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0, %2\;mul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ ; Split the unpredicated form after reload, so that we don't have
+ ; the unnecessary PTRUE.
+ "&& reload_completed
+ && !register_operand (operands[3], <MODE>mode)"
+ [(set (match_dup 0) (mult:SVE_I (match_dup 2) (match_dup 3)))]
+ ""
[(set_attr "movprfx" "*,*,yes")]
)
+;; Unpredicated multiplications by a constant (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_mul<mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w")
+ (mult:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "0")
+ (match_operand:SVE_I 2 "aarch64_sve_mul_immediate")))]
+ "TARGET_SVE && reload_completed"
+ "mul\t%0.<Vetype>, %0.<Vetype>, #%2"
+)
+
(define_insn "*madd<mode>"
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
(plus:SVE_I
;; actually need the predicate for the first alternative, but using Upa
;; or X isn't likely to gain much and would make the instruction seem
;; less uniform to the register allocator.
-(define_insn "*v<optab><mode>3"
+(define_insn_and_split "*v<optab><mode>3"
[(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
(unspec:SVE_I
[(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
UNSPEC_MERGE_PTRUE))]
"TARGET_SVE"
"@
- <shift>\t%0.<Vetype>, %2.<Vetype>, #%3
+ #
<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+ "&& reload_completed
+ && !register_operand (operands[3], <MODE>mode)"
+ [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
+ ""
[(set_attr "movprfx" "*,*,yes")]
)
+;; Unpredicated shift operations by a constant (post-RA only).
+;; These are generated by splitting a predicated instruction whose
+;; predicate is unused.
+(define_insn "*post_ra_v<optab><mode>3"
+ [(set (match_operand:SVE_I 0 "register_operand" "=w")
+ (ASHIFT:SVE_I
+ (match_operand:SVE_I 1 "register_operand" "w")
+ (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
+ "TARGET_SVE && reload_completed"
+ "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
+)
+
;; LSL, LSR and ASR by a scalar, which expands into one of the vector
;; shifts above.
(define_expand "<ASHIFT:optab><mode>3"
--- /dev/null
+/* { dg-options "-O2 -ftree-vectorize" } */
+
+#include <stdint.h>
+
+#define TEST_OP(NAME, TYPE, OP) \
+ void \
+ NAME##_##TYPE (TYPE *restrict a, TYPE *restrict b, int n) \
+ { \
+ for (int i = 0; i < n; ++i) \
+ a[i] = b[i] OP; \
+ }
+
+#define TEST_TYPE(TYPE) \
+ TEST_OP (shl, TYPE, << 6) \
+ TEST_OP (shr, TYPE, >> 6) \
+ TEST_OP (mult, TYPE, * 0x2b)
+
+TEST_TYPE (int8_t)
+TEST_TYPE (int16_t)
+TEST_TYPE (int32_t)
+TEST_TYPE (int64_t)
+TEST_TYPE (uint8_t)
+TEST_TYPE (uint16_t)
+TEST_TYPE (uint32_t)
+TEST_TYPE (uint64_t)
+
+/* { dg-final { scan-assembler-times {\tlsl\t} 8 } } */
+/* { dg-final { scan-assembler-times {\tlsr\t} 4 } } */
+/* { dg-final { scan-assembler-times {\tasr\t} 4 } } */
+/* { dg-final { scan-assembler-times {\tmul\t} 8 } } */
+/* { dg-final { scan-assembler-not {\tptrue\t} } } */