1 ;; Machine description for AArch64 SVE.
2 ;; Copyright (C) 2009-2020 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; The file is organised into the following sections (search for the full
25 ;; ---- Note on the handling of big-endian SVE
26 ;; ---- Description of UNSPEC_PTEST
27 ;; ---- Description of UNSPEC_PRED_Z
28 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
29 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
30 ;; ---- Note on FFR handling
33 ;; ---- Moves of single vectors
34 ;; ---- Moves of multiple vectors
35 ;; ---- Moves of predicates
36 ;; ---- Moves relating to the FFR
39 ;; ---- Normal contiguous loads
40 ;; ---- Extending contiguous loads
41 ;; ---- First-faulting contiguous loads
42 ;; ---- First-faulting extending contiguous loads
43 ;; ---- Non-temporal contiguous loads
44 ;; ---- Normal gather loads
45 ;; ---- Extending gather loads
46 ;; ---- First-faulting gather loads
47 ;; ---- First-faulting extending gather loads
50 ;; ---- Contiguous prefetches
51 ;; ---- Gather prefetches
54 ;; ---- Normal contiguous stores
55 ;; ---- Truncating contiguous stores
56 ;; ---- Non-temporal contiguous stores
57 ;; ---- Normal scatter stores
58 ;; ---- Truncating scatter stores
61 ;; ---- [INT,FP] Duplicate element
62 ;; ---- [INT,FP] Initialize from individual elements
63 ;; ---- [INT] Linear series
64 ;; ---- [PRED] Duplicate element
66 ;; == Vector decomposition
67 ;; ---- [INT,FP] Extract index
68 ;; ---- [INT,FP] Extract active element
69 ;; ---- [PRED] Extract index
71 ;; == Unary arithmetic
72 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
73 ;; ---- [INT] General unary arithmetic corresponding to unspecs
74 ;; ---- [INT] Sign and zero extension
75 ;; ---- [INT] Truncation
76 ;; ---- [INT] Logical inverse
77 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
78 ;; ---- [FP] General unary arithmetic corresponding to unspecs
79 ;; ---- [FP] Square root
80 ;; ---- [FP] Reciprocal square root
81 ;; ---- [PRED] Inverse
83 ;; == Binary arithmetic
84 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
85 ;; ---- [INT] Addition
86 ;; ---- [INT] Subtraction
87 ;; ---- [INT] Take address
88 ;; ---- [INT] Absolute difference
89 ;; ---- [INT] Saturating addition and subtraction
90 ;; ---- [INT] Highpart multiplication
91 ;; ---- [INT] Division
92 ;; ---- [INT] Binary logical operations
93 ;; ---- [INT] Binary logical operations (inverted second input)
94 ;; ---- [INT] Shifts (rounding towards -Inf)
95 ;; ---- [INT] Shifts (rounding towards 0)
96 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
97 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
98 ;; ---- [FP] General binary arithmetic corresponding to unspecs
100 ;; ---- [FP] Complex addition
101 ;; ---- [FP] Subtraction
102 ;; ---- [FP] Absolute difference
103 ;; ---- [FP] Multiplication
104 ;; ---- [FP] Division
105 ;; ---- [FP] Binary logical operations
106 ;; ---- [FP] Sign copying
107 ;; ---- [FP] Maximum and minimum
108 ;; ---- [PRED] Binary logical operations
109 ;; ---- [PRED] Binary logical operations (inverted second input)
110 ;; ---- [PRED] Binary logical operations (inverted result)
112 ;; == Ternary arithmetic
113 ;; ---- [INT] MLA and MAD
114 ;; ---- [INT] MLS and MSB
115 ;; ---- [INT] Dot product
116 ;; ---- [INT] Sum of absolute differences
117 ;; ---- [INT] Matrix multiply-accumulate
118 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
119 ;; ---- [FP] Complex multiply-add
120 ;; ---- [FP] Trigonometric multiply-add
121 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
122 ;; ---- [FP] Matrix multiply-accumulate
124 ;; == Comparisons and selects
125 ;; ---- [INT,FP] Select based on predicates
126 ;; ---- [INT,FP] Compare and select
127 ;; ---- [INT] Comparisons
128 ;; ---- [INT] While tests
129 ;; ---- [FP] Direct comparisons
130 ;; ---- [FP] Absolute comparisons
131 ;; ---- [PRED] Select
132 ;; ---- [PRED] Test bits
135 ;; ---- [INT,FP] Conditional reductions
136 ;; ---- [INT] Tree reductions
137 ;; ---- [FP] Tree reductions
138 ;; ---- [FP] Left-to-right reductions
141 ;; ---- [INT,FP] General permutes
142 ;; ---- [INT,FP] Special-purpose unary permutes
143 ;; ---- [INT,FP] Special-purpose binary permutes
144 ;; ---- [PRED] Special-purpose unary permutes
145 ;; ---- [PRED] Special-purpose binary permutes
148 ;; ---- [INT<-INT] Packs
149 ;; ---- [INT<-INT] Unpacks
150 ;; ---- [INT<-FP] Conversions
151 ;; ---- [INT<-FP] Packs
152 ;; ---- [INT<-FP] Unpacks
153 ;; ---- [FP<-INT] Conversions
154 ;; ---- [FP<-INT] Packs
155 ;; ---- [FP<-INT] Unpacks
156 ;; ---- [FP<-FP] Packs
157 ;; ---- [FP<-FP] Packs (bfloat16)
158 ;; ---- [FP<-FP] Unpacks
159 ;; ---- [PRED<-PRED] Packs
160 ;; ---- [PRED<-PRED] Unpacks
162 ;; == Vector partitioning
163 ;; ---- [PRED] Unary partitioning
164 ;; ---- [PRED] Binary partitioning
165 ;; ---- [PRED] Scalarization
167 ;; == Counting elements
168 ;; ---- [INT] Count elements in a pattern (scalar)
169 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
170 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
171 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
172 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
173 ;; ---- [INT] Count elements in a predicate (scalar)
174 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
175 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
176 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
177 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
179 ;; =========================================================================
181 ;; =========================================================================
183 ;; -------------------------------------------------------------------------
184 ;; ---- Note on the handling of big-endian SVE
185 ;; -------------------------------------------------------------------------
187 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
188 ;; same way as movdi or movti would: the first byte of memory goes
189 ;; into the most significant byte of the register and the last byte
190 ;; of memory goes into the least significant byte of the register.
191 ;; This is the most natural ordering for Advanced SIMD and matches
192 ;; the ABI layout for 64-bit and 128-bit vector types.
194 ;; As a result, the order of bytes within the register is what GCC
195 ;; expects for a big-endian target, and subreg offsets therefore work
196 ;; as expected, with the first element in memory having subreg offset 0
197 ;; and the last element in memory having the subreg offset associated
198 ;; with a big-endian lowpart. However, this ordering also means that
199 ;; GCC's lane numbering does not match the architecture's numbering:
200 ;; GCC always treats the element at the lowest address in memory
201 ;; (subreg offset 0) as element 0, while the architecture treats
202 ;; the least significant end of the register as element 0.
204 ;; The situation for SVE is different. We want the layout of the
205 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
206 ;; logically, a mov<mode> load must be indistinguishable from a
207 ;; maskload<mode> whose mask is all true. We therefore need the
208 ;; register layout to match LD1 rather than LDR. The ABI layout of
209 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
211 ;; As a result, the architecture lane numbering matches GCC's lane
212 ;; numbering, with element 0 always being the first in memory.
215 ;; - Applying a subreg offset to a register does not give the element
216 ;; that GCC expects: the first element in memory has the subreg offset
217 ;; associated with a big-endian lowpart while the last element in memory
218 ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
220 ;; - We cannot use LDR and STR for spill slots that might be accessed
221 ;; via subregs, since although the elements have the order GCC expects,
222 ;; the order of the bytes within the elements is different. We instead
223 ;; access spill slots via LD1 and ST1, using secondary reloads to
224 ;; reserve a predicate register.
226 ;; -------------------------------------------------------------------------
227 ;; ---- Description of UNSPEC_PTEST
228 ;; -------------------------------------------------------------------------
230 ;; SVE provides a PTEST instruction for testing the active lanes of a
231 ;; predicate and setting the flags based on the result. The associated
232 ;; condition code tests are:
234 ;; - any (= ne): at least one active bit is set
235 ;; - none (= eq): all active bits are clear (*)
236 ;; - first (= mi): the first active bit is set
237 ;; - nfrst (= pl): the first active bit is clear (*)
238 ;; - last (= cc): the last active bit is set
239 ;; - nlast (= cs): the last active bit is clear (*)
241 ;; where the conditions marked (*) are also true when there are no active
242 ;; lanes (i.e. when the governing predicate is a PFALSE). The flags results
243 ;; of a PTEST use the condition code mode CC_NZC.
245 ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
246 ;; This means that for other predicate modes, we need a governing predicate
247 ;; in which all bits are defined.
249 ;; For example, most predicated .H operations ignore the odd bits of the
250 ;; governing predicate, so that an active lane is represented by the
251 ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
252 ;; any value. To test a .H predicate, we instead need "10" and "00"
253 ;; respectively, so that the condition only tests the even bits of the
256 ;; Several instructions set the flags as a side-effect, in the same way
257 ;; that a separate PTEST would. It's important for code quality that we
258 ;; use these flags results as often as possible, particularly in the case
259 ;; of WHILE* and RDFFR.
261 ;; Also, some of the instructions that set the flags are unpredicated
262 ;; and instead implicitly test all .B, .H, .S or .D elements, as though
263 ;; they were predicated on a PTRUE of that size. For example, a .S
264 ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
267 ;; We therefore need to represent PTEST operations in a way that
268 ;; makes it easy to combine them with both predicated and unpredicated
269 ;; operations, while using a VNx16BI governing predicate for all
270 ;; predicate modes. We do this using:
272 ;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
276 ;; - GP is the real VNx16BI governing predicate
278 ;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting
279 ;; GP to CAST_GP are guaranteed to be clear in GP.
281 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
282 ;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
283 ;; SVE_MAYBE_NOT_PTRUE otherwise.
285 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
287 ;; -------------------------------------------------------------------------
288 ;; ---- Description of UNSPEC_PRED_Z
289 ;; -------------------------------------------------------------------------
291 ;; SVE integer comparisons are predicated and return zero for inactive
292 ;; lanes. Sometimes we use them with predicates that are all-true and
293 ;; sometimes we use them with general predicates.
295 ;; The integer comparisons also set the flags and so build-in the effect
296 ;; of a PTEST. We therefore want to be able to combine integer comparison
297 ;; patterns with PTESTs of the result. One difficulty with doing this is
298 ;; that (as noted above) the PTEST is always a .B operation and so can place
299 ;; stronger requirements on the governing predicate than the comparison does.
301 ;; For example, when applying a separate PTEST to the result of a full-vector
302 ;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
303 ;; .B PTRUE. In constrast, the comparison might be predicated on either
304 ;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
305 ;; bits don't matter for .H operations.
307 ;; We therefore can't rely on a full-vector comparison using the same
308 ;; predicate register as a following PTEST. We instead need to remember
309 ;; whether a comparison is known to be a full-vector comparison and use
310 ;; this information in addition to a check for equal predicate registers.
311 ;; At the same time, it's useful to have a common representation for all
312 ;; integer comparisons, so that they can be handled by a single set of
315 ;; We therefore take a similar approach to UNSPEC_PTEST above and use:
317 ;; (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
321 ;; - GP is the governing predicate, of mode <M:VPRED>
323 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
324 ;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
327 ;; - CODE is the comparison code
329 ;; - OP0 and OP1 are the values being compared, of mode M
331 ;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
333 ;; -------------------------------------------------------------------------
334 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
335 ;; -------------------------------------------------------------------------
337 ;; Many SVE integer operations are predicated. We can generate them
338 ;; from four sources:
340 ;; (1) Using normal unpredicated optabs. In this case we need to create
341 ;; an all-true predicate register to act as the governing predicate
342 ;; for the SVE instruction. There are no inactive lanes, and thus
343 ;; the values of inactive lanes don't matter.
345 ;; (2) Using _x ACLE functions. In this case the function provides a
346 ;; specific predicate and some lanes might be inactive. However,
347 ;; as for (1), the values of the inactive lanes don't matter.
348 ;; We can make extra lanes active without changing the behavior
349 ;; (although for code-quality reasons we should avoid doing so
352 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
353 ;; These optabs have a predicate operand that specifies which lanes are
354 ;; active and another operand that provides the values of inactive lanes.
356 ;; (4) Using _m and _z ACLE functions. These functions map to the same
357 ;; patterns as (3), with the _z functions setting inactive lanes to zero
358 ;; and the _m functions setting the inactive lanes to one of the function
361 ;; For (1) and (2) we need a way of attaching the predicate to a normal
362 ;; unpredicated integer operation. We do this using:
364 ;; (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
366 ;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
367 ;; is a predicate of mode <M:VPRED>. PRED might or might not be a PTRUE;
368 ;; it always is for (1), but might not be for (2).
370 ;; The unspec as a whole has the same value as (code:M ...) when PRED is
371 ;; all-true. It is always semantically valid to replace PRED with a PTRUE,
372 ;; but as noted above, we should only do so if there's a specific benefit.
374 ;; (The "_X" in the unspec is named after the ACLE functions in (2).)
376 ;; For (3) and (4) we can simply use the SVE port's normal representation
377 ;; of a predicate-based select:
379 ;; (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
381 ;; where INACTIVE specifies the values of inactive lanes.
383 ;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
384 ;; than inserting the integer operation directly. This is mostly useful
385 ;; if we want the combine pass to merge an integer operation with an explicit
386 ;; vcond_mask (in other words, with a following SEL instruction). However,
387 ;; it's generally better to merge such operations at the gimple level
390 ;; -------------------------------------------------------------------------
391 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
392 ;; -------------------------------------------------------------------------
394 ;; Most SVE floating-point operations are predicated. We can generate
395 ;; them from four sources:
397 ;; (1) Using normal unpredicated optabs. In this case we need to create
398 ;; an all-true predicate register to act as the governing predicate
399 ;; for the SVE instruction. There are no inactive lanes, and thus
400 ;; the values of inactive lanes don't matter.
402 ;; (2) Using _x ACLE functions. In this case the function provides a
403 ;; specific predicate and some lanes might be inactive. However,
404 ;; as for (1), the values of the inactive lanes don't matter.
406 ;; The instruction must have the same exception behavior as the
407 ;; function call unless things like command-line flags specifically
408 ;; allow otherwise. For example, with -ffast-math, it is OK to
409 ;; raise exceptions for inactive lanes, but normally it isn't.
411 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
412 ;; These optabs have a predicate operand that specifies which lanes are
413 ;; active and another operand that provides the values of inactive lanes.
415 ;; (4) Using _m and _z ACLE functions. These functions map to the same
416 ;; patterns as (3), with the _z functions setting inactive lanes to zero
417 ;; and the _m functions setting the inactive lanes to one of the function
422 ;; - In (1), the predicate is known to be all true and the pattern can use
423 ;; unpredicated operations where available.
425 ;; - In (2), the predicate might or might not be all true. The pattern can
426 ;; use unpredicated instructions if the predicate is all-true or if things
427 ;; like command-line flags allow exceptions for inactive lanes.
429 ;; - (3) and (4) represent a native SVE predicated operation. Some lanes
430 ;; might be inactive and inactive lanes of the result must have specific
431 ;; values. There is no scope for using unpredicated instructions (and no
432 ;; reason to want to), so the question about command-line flags doesn't
435 ;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
436 ;; in combination with a separate predicate operand, e.g.
438 ;; (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
439 ;; (sqrt:SVE_FULL_F 2 "register_operand" "w")]
442 ;; because (sqrt ...) can raise an exception for any lane, including
443 ;; inactive ones. We therefore need to use an unspec instead.
445 ;; Also, (2) requires some way of distinguishing the case in which the
446 ;; predicate might have inactive lanes and cannot be changed from the
447 ;; case in which the predicate has no inactive lanes or can be changed.
448 ;; This information is also useful when matching combined FP patterns
449 ;; in which the predicates might not be equal.
451 ;; We therefore model FP operations as an unspec of the form:
453 ;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
457 ;; - PRED is the governing predicate.
459 ;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the
460 ;; value SVE_STRICT_GP if PRED might have inactive lanes and if those
461 ;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise.
463 ;; - OP0 OP1 ... are the normal input operands to the operation.
465 ;; - MNEMONIC is the mnemonic of the associated SVE instruction.
467 ;; -------------------------------------------------------------------------
468 ;; ---- Note on FFR handling
469 ;; -------------------------------------------------------------------------
471 ;; Logically we want to divide FFR-related instructions into regions
472 ;; that contain exactly one of:
474 ;; - a single write to the FFR
475 ;; - any number of reads from the FFR (but only one read is likely)
476 ;; - any number of LDFF1 and LDNF1 instructions
478 ;; However, LDFF1 and LDNF1 instructions should otherwise behave like
479 ;; normal loads as far as possible. This means that they should be
480 ;; schedulable within a region in the same way that LD1 would be,
481 ;; and they should be deleted as dead if the result is unused. The loads
482 ;; should therefore not write to the FFR, since that would both serialize
483 ;; the loads with respect to each other and keep the loads live for any
486 ;; We get around this by using a fake "FFR token" (FFRT) to help describe
487 ;; the dependencies. Writing to the FFRT starts a new "FFRT region",
488 ;; while using the FFRT keeps the instruction within its region.
491 ;; - Writes start a new FFRT region as well as setting the FFR:
493 ;; W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
495 ;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
496 ;; loads stay within the same FFRT region:
498 ;; L1: load data while using the FFRT
500 ;; In addition, any FFRT region that includes a load also has at least one
503 ;; L2: FFR = update(FFR, FFRT) [type == no_insn]
505 ;; to make it clear that the region both reads from and writes to the FFR.
507 ;; - Reads do the following:
509 ;; R1: FFRT = FFR [type == no_insn]
510 ;; R2: read from the FFRT
511 ;; R3: FFRT = update(FFRT) [type == no_insn]
513 ;; R1 and R3 both create new FFRT regions, so that previous LDFF1s and
514 ;; LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
515 ;; cannot move backwards across R3.
517 ;; This way, writes are only kept alive by later loads or reads,
518 ;; and write/read pairs fold normally. For two consecutive reads,
519 ;; the first R3 is made dead by the second R1, which in turn becomes
520 ;; redundant with the first R1. We then have:
522 ;; first R1: FFRT = FFR
523 ;; first read from the FFRT
524 ;; second read from the FFRT
525 ;; second R3: FFRT = update(FFRT)
527 ;; i.e. the two FFRT regions collapse into a single one with two
528 ;; independent reads.
530 ;; The model still prevents some valid optimizations though. For example,
531 ;; if all loads in an FFRT region are deleted as dead, nothing would remove
532 ;; the L2 instructions.
534 ;; =========================================================================
536 ;; =========================================================================
538 ;; -------------------------------------------------------------------------
539 ;; ---- Moves of single vectors
540 ;; -------------------------------------------------------------------------
542 ;; - MOV (including aliases)
543 ;; - LD1B (contiguous form)
548 ;; - ST1B (contiguous form)
553 ;; -------------------------------------------------------------------------
555 (define_expand "mov<mode>"
556 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
557 (match_operand:SVE_ALL 1 "general_operand"))]
560 /* Use the predicated load and store patterns where possible.
561 This is required for big-endian targets (see the comment at the
562 head of the file) and increases the addressing choices for
564 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
565 && can_create_pseudo_p ())
567 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
571 if (CONSTANT_P (operands[1]))
573 aarch64_expand_mov_immediate (operands[0], operands[1]);
577 /* Optimize subregs on big-endian targets: we can use REV[BHW]
578 instead of going through memory. */
580 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
585 (define_expand "movmisalign<mode>"
586 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
587 (match_operand:SVE_ALL 1 "general_operand"))]
590 /* Equivalent to a normal move for our purpooses. */
591 emit_move_insn (operands[0], operands[1]);
596 ;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
597 ;; little-endian ordering is acceptable. Only allow memory operations during
598 ;; and after RA; before RA we want the predicated load and store patterns to
600 (define_insn "*aarch64_sve_mov<mode>_ldr_str"
601 [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
602 (match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
604 && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
605 && ((lra_in_progress || reload_completed)
606 || (register_operand (operands[0], <MODE>mode)
607 && nonmemory_operand (operands[1], <MODE>mode)))"
612 * return aarch64_output_sve_mov_immediate (operands[1]);"
615 ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
616 ;; or vectors for which little-endian ordering isn't acceptable. Memory
617 ;; accesses require secondary reloads.
618 (define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
619 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
620 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
622 && <MODE>mode != VNx16QImode
624 || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
627 * return aarch64_output_sve_mov_immediate (operands[1]);"
630 ;; Handle memory reloads for modes that can't use LDR and STR. We use
631 ;; byte PTRUE for all modes to try to encourage reuse. This pattern
632 ;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
633 (define_expand "aarch64_sve_reload_mem"
635 [(set (match_operand 0)
637 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
640 /* Create a PTRUE. */
641 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
643 /* Refer to the PTRUE in the appropriate mode for this move. */
644 machine_mode mode = GET_MODE (operands[0]);
645 rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
647 /* Emit a predicated load or store. */
648 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
653 ;; A predicated move in which the predicate is known to be all-true.
654 ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
655 ;; so changes to this pattern will need changes there as well.
656 (define_insn_and_split "@aarch64_pred_mov<mode>"
657 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
659 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
660 (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
663 && (register_operand (operands[0], <MODE>mode)
664 || register_operand (operands[2], <MODE>mode))"
667 ld1<Vesize>\t%0.<Vctype>, %1/z, %2
668 st1<Vesize>\t%2.<Vctype>, %1, %0"
669 "&& register_operand (operands[0], <MODE>mode)
670 && register_operand (operands[2], <MODE>mode)"
671 [(set (match_dup 0) (match_dup 2))]
674 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
675 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
676 ;; for details. We use a special predicate for operand 2 to reduce
677 ;; the number of patterns.
678 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
679 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
681 [(match_operand:VNx16BI 1 "register_operand" "Upl")
682 (match_operand 2 "aarch64_any_register_operand" "w")]
684 "TARGET_SVE && BYTES_BIG_ENDIAN"
686 "&& reload_completed"
689 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
694 ;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
695 ;; This is equivalent to a subreg on little-endian targets but not for
696 ;; big-endian; see the comment at the head of the file for details.
697 (define_expand "@aarch64_sve_reinterpret<mode>"
698 [(set (match_operand:SVE_ALL 0 "register_operand")
700 [(match_operand 1 "aarch64_any_register_operand")]
701 UNSPEC_REINTERPRET))]
704 machine_mode src_mode = GET_MODE (operands[1]);
705 if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
707 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
713 ;; A pattern for handling type punning on big-endian targets. We use a
714 ;; special predicate for operand 1 to reduce the number of patterns.
715 (define_insn_and_split "*aarch64_sve_reinterpret<mode>"
716 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
718 [(match_operand 1 "aarch64_any_register_operand" "w")]
719 UNSPEC_REINTERPRET))]
722 "&& reload_completed"
723 [(set (match_dup 0) (match_dup 1))]
725 operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
729 ;; -------------------------------------------------------------------------
730 ;; ---- Moves of multiple vectors
731 ;; -------------------------------------------------------------------------
732 ;; All patterns in this section are synthetic and split to real
733 ;; instructions after reload.
734 ;; -------------------------------------------------------------------------
736 (define_expand "mov<mode>"
737 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
738 (match_operand:SVE_STRUCT 1 "general_operand"))]
741 /* Big-endian loads and stores need to be done via LD1 and ST1;
742 see the comment at the head of the file for details. */
743 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
746 gcc_assert (can_create_pseudo_p ());
747 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
751 if (CONSTANT_P (operands[1]))
753 aarch64_expand_mov_immediate (operands[0], operands[1]);
759 ;; Unpredicated structure moves (little-endian).
760 (define_insn "*aarch64_sve_mov<mode>_le"
761 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
762 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
763 "TARGET_SVE && !BYTES_BIG_ENDIAN"
765 [(set_attr "length" "<insn_length>")]
768 ;; Unpredicated structure moves (big-endian). Memory accesses require
769 ;; secondary reloads.
770 (define_insn "*aarch64_sve_mov<mode>_be"
771 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
772 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
773 "TARGET_SVE && BYTES_BIG_ENDIAN"
775 [(set_attr "length" "<insn_length>")]
778 ;; Split unpredicated structure moves into pieces. This is the same
779 ;; for both big-endian and little-endian code, although it only needs
780 ;; to handle memory operands for little-endian code.
782 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
783 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
784 "TARGET_SVE && reload_completed"
787 rtx dest = operands[0];
788 rtx src = operands[1];
789 if (REG_P (dest) && REG_P (src))
790 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
792 for (unsigned int i = 0; i < <vector_count>; ++i)
794 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
795 i * BYTES_PER_SVE_VECTOR);
796 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
797 i * BYTES_PER_SVE_VECTOR);
798 emit_insn (gen_rtx_SET (subdest, subsrc));
804 ;; Predicated structure moves. This works for both endiannesses but in
805 ;; practice is only useful for big-endian.
806 (define_insn_and_split "@aarch64_pred_mov<mode>"
807 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
809 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
810 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
813 && (register_operand (operands[0], <MODE>mode)
814 || register_operand (operands[2], <MODE>mode))"
816 "&& reload_completed"
819 for (unsigned int i = 0; i < <vector_count>; ++i)
821 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
823 i * BYTES_PER_SVE_VECTOR);
824 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
826 i * BYTES_PER_SVE_VECTOR);
827 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
831 [(set_attr "length" "<insn_length>")]
834 ;; -------------------------------------------------------------------------
835 ;; ---- Moves of predicates
836 ;; -------------------------------------------------------------------------
844 ;; -------------------------------------------------------------------------
846 (define_expand "mov<mode>"
847 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
848 (match_operand:PRED_ALL 1 "general_operand"))]
851 if (GET_CODE (operands[0]) == MEM)
852 operands[1] = force_reg (<MODE>mode, operands[1]);
854 if (CONSTANT_P (operands[1]))
856 aarch64_expand_mov_immediate (operands[0], operands[1]);
862 (define_insn "*aarch64_sve_mov<mode>"
863 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa")
864 (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))]
866 && (register_operand (operands[0], <MODE>mode)
867 || register_operand (operands[1], <MODE>mode))"
872 * return aarch64_output_sve_mov_immediate (operands[1]);"
875 ;; Match PTRUES Pn.B when both the predicate and flags are useful.
876 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
877 [(set (reg:CC_NZC CC_REGNUM)
881 (const_int SVE_KNOWN_PTRUE)
882 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
884 [(match_operand:SI 4 "const_int_operand")
885 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
888 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
892 return aarch64_output_sve_ptrues (operands[1]);
894 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
896 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
900 ;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
901 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
902 [(set (reg:CC_NZC CC_REGNUM)
906 (const_int SVE_KNOWN_PTRUE)
908 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
910 [(match_operand:SI 4 "const_int_operand")
911 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
914 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
918 return aarch64_output_sve_ptrues (operands[1]);
920 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
922 operands[2] = CONSTM1_RTX (VNx16BImode);
923 operands[3] = CONSTM1_RTX (<MODE>mode);
927 ;; Match PTRUES Pn.B when only the flags result is useful (which is
928 ;; a way of testing VL).
929 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
930 [(set (reg:CC_NZC CC_REGNUM)
934 (const_int SVE_KNOWN_PTRUE)
935 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
937 [(match_operand:SI 4 "const_int_operand")
938 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
941 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
944 return aarch64_output_sve_ptrues (operands[1]);
946 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
948 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
952 ;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
953 ;; a way of testing VL).
954 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
955 [(set (reg:CC_NZC CC_REGNUM)
959 (const_int SVE_KNOWN_PTRUE)
961 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
963 [(match_operand:SI 4 "const_int_operand")
964 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
967 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
970 return aarch64_output_sve_ptrues (operands[1]);
972 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
974 operands[2] = CONSTM1_RTX (VNx16BImode);
975 operands[3] = CONSTM1_RTX (<MODE>mode);
979 ;; -------------------------------------------------------------------------
980 ;; ---- Moves relating to the FFR
981 ;; -------------------------------------------------------------------------
986 ;; -------------------------------------------------------------------------
988 ;; [W1 in the block comment above about FFR handling]
990 ;; Write to the FFR and start a new FFRT scheduling region.
991 (define_insn "aarch64_wrffr"
992 [(set (reg:VNx16BI FFR_REGNUM)
993 (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa"))
994 (set (reg:VNx16BI FFRT_REGNUM)
995 (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
1002 ;; [L2 in the block comment above about FFR handling]
1004 ;; Introduce a read from and write to the FFR in the current FFRT region,
1005 ;; so that the FFR value is live on entry to the region and so that the FFR
1006 ;; value visibly changes within the region. This is used (possibly multiple
1007 ;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
1008 (define_insn "aarch64_update_ffr_for_load"
1009 [(set (reg:VNx16BI FFR_REGNUM)
1010 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
1011 (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
1014 [(set_attr "type" "no_insn")]
1017 ;; [R1 in the block comment above about FFR handling]
1019 ;; Notionally copy the FFR to the FFRT, so that the current FFR value
1020 ;; can be read from there by the RDFFR instructions below. This acts
1021 ;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
1022 ;; creates a natural dependency with earlier writes.
1023 (define_insn "aarch64_copy_ffr_to_ffrt"
1024 [(set (reg:VNx16BI FFRT_REGNUM)
1025 (reg:VNx16BI FFR_REGNUM))]
1028 [(set_attr "type" "no_insn")]
1031 ;; [R2 in the block comment above about FFR handling]
1033 ;; Read the FFR via the FFRT.
1034 (define_insn "aarch64_rdffr"
1035 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1036 (reg:VNx16BI FFRT_REGNUM))]
1041 ;; Likewise with zero predication.
1042 (define_insn "aarch64_rdffr_z"
1043 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1045 (reg:VNx16BI FFRT_REGNUM)
1046 (match_operand:VNx16BI 1 "register_operand" "Upa")))]
1051 ;; Read the FFR to test for a fault, without using the predicate result.
1052 (define_insn "*aarch64_rdffr_z_ptest"
1053 [(set (reg:CC_NZC CC_REGNUM)
1055 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1057 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1059 (reg:VNx16BI FFRT_REGNUM)
1062 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1064 "rdffrs\t%0.b, %1/z"
1067 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1068 (define_insn "*aarch64_rdffr_ptest"
1069 [(set (reg:CC_NZC CC_REGNUM)
1071 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1073 (const_int SVE_KNOWN_PTRUE)
1074 (reg:VNx16BI FFRT_REGNUM)]
1076 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1078 "rdffrs\t%0.b, %1/z"
1081 ;; Read the FFR with zero predication and test the result.
1082 (define_insn "*aarch64_rdffr_z_cc"
1083 [(set (reg:CC_NZC CC_REGNUM)
1085 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1087 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1089 (reg:VNx16BI FFRT_REGNUM)
1092 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1094 (reg:VNx16BI FFRT_REGNUM)
1097 "rdffrs\t%0.b, %1/z"
1100 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1101 (define_insn "*aarch64_rdffr_cc"
1102 [(set (reg:CC_NZC CC_REGNUM)
1104 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1106 (const_int SVE_KNOWN_PTRUE)
1107 (reg:VNx16BI FFRT_REGNUM)]
1109 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1110 (reg:VNx16BI FFRT_REGNUM))]
1112 "rdffrs\t%0.b, %1/z"
1115 ;; [R3 in the block comment above about FFR handling]
1117 ;; Arbitrarily update the FFRT after a read from the FFR. This acts as
1118 ;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
1119 (define_insn "aarch64_update_ffrt"
1120 [(set (reg:VNx16BI FFRT_REGNUM)
1121 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
1124 [(set_attr "type" "no_insn")]
1127 ;; =========================================================================
1129 ;; =========================================================================
1131 ;; -------------------------------------------------------------------------
1132 ;; ---- Normal contiguous loads
1133 ;; -------------------------------------------------------------------------
1134 ;; Includes contiguous forms of:
1151 ;; -------------------------------------------------------------------------
1154 (define_insn "maskload<mode><vpred>"
1155 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1157 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1158 (match_operand:SVE_ALL 1 "memory_operand" "m")]
1161 "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
1164 ;; Unpredicated LD[234].
1165 (define_expand "vec_load_lanes<mode><vsingle>"
1166 [(set (match_operand:SVE_STRUCT 0 "register_operand")
1169 (match_operand:SVE_STRUCT 1 "memory_operand")]
1173 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1177 ;; Predicated LD[234].
1178 (define_insn "vec_mask_load_lanes<mode><vsingle>"
1179 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
1181 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1182 (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
1185 "ld<vector_count><Vesize>\t%0, %2/z, %1"
1188 ;; -------------------------------------------------------------------------
1189 ;; ---- Extending contiguous loads
1190 ;; -------------------------------------------------------------------------
1191 ;; Includes contiguous forms of:
1198 ;; -------------------------------------------------------------------------
1200 ;; Predicated load and extend, with 8 elements per 128-bit block.
1201 (define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1202 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1204 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1205 (ANY_EXTEND:SVE_HSDI
1206 (unspec:SVE_PARTIAL_I
1207 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1208 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
1211 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1212 "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1213 "&& !CONSTANT_P (operands[3])"
1215 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1219 ;; -------------------------------------------------------------------------
1220 ;; ---- First-faulting contiguous loads
1221 ;; -------------------------------------------------------------------------
1222 ;; Includes contiguous forms of:
1231 ;; -------------------------------------------------------------------------
1233 ;; Contiguous non-extending first-faulting or non-faulting loads.
1234 (define_insn "@aarch64_ld<fn>f1<mode>"
1235 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1237 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1238 (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1239 (reg:VNx16BI FFRT_REGNUM)]
1242 "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
1245 ;; -------------------------------------------------------------------------
1246 ;; ---- First-faulting extending contiguous loads
1247 ;; -------------------------------------------------------------------------
1248 ;; Includes contiguous forms of:
1261 ;; -------------------------------------------------------------------------
1263 ;; Predicated first-faulting or non-faulting load and extend.
1264 (define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1265 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1267 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1268 (ANY_EXTEND:SVE_HSDI
1269 (unspec:SVE_PARTIAL_I
1270 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1271 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1272 (reg:VNx16BI FFRT_REGNUM)]
1275 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1276 "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1277 "&& !CONSTANT_P (operands[3])"
1279 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1283 ;; -------------------------------------------------------------------------
1284 ;; ---- Non-temporal contiguous loads
1285 ;; -------------------------------------------------------------------------
1291 ;; -------------------------------------------------------------------------
1293 ;; Predicated contiguous non-temporal load.
1294 (define_insn "@aarch64_ldnt1<mode>"
1295 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1297 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1298 (match_operand:SVE_FULL 1 "memory_operand" "m")]
1301 "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
1304 ;; -------------------------------------------------------------------------
1305 ;; ---- Normal gather loads
1306 ;; -------------------------------------------------------------------------
1307 ;; Includes gather forms of:
1310 ;; -------------------------------------------------------------------------
1312 ;; Unpredicated gather loads.
1313 (define_expand "gather_load<mode><v_int_container>"
1314 [(set (match_operand:SVE_24 0 "register_operand")
1317 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1318 (match_operand:<V_INT_CONTAINER> 2 "register_operand")
1319 (match_operand:DI 3 "const_int_operand")
1320 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1321 (mem:BLK (scratch))]
1322 UNSPEC_LD1_GATHER))]
1325 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
1329 ;; Predicated gather loads for 32-bit elements. Operand 3 is true for
1330 ;; unsigned extension and false for signed extension.
1331 (define_insn "mask_gather_load<mode><v_int_container>"
1332 [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w")
1334 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1335 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
1336 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1337 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1338 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1339 (mem:BLK (scratch))]
1340 UNSPEC_LD1_GATHER))]
1343 ld1<Vesize>\t%0.s, %5/z, [%2.s]
1344 ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1345 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1346 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1347 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1348 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1351 ;; Predicated gather loads for 64-bit elements. The value of operand 3
1352 ;; doesn't matter in this case.
1353 (define_insn "mask_gather_load<mode><v_int_container>"
1354 [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w")
1356 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1357 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
1358 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1359 (match_operand:DI 3 "const_int_operand")
1360 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
1361 (mem:BLK (scratch))]
1362 UNSPEC_LD1_GATHER))]
1365 ld1<Vesize>\t%0.d, %5/z, [%2.d]
1366 ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1367 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1368 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1371 ;; Likewise, but with the offset being extended from 32 bits.
1372 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
1373 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1375 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1376 (match_operand:DI 1 "register_operand" "rk, rk")
1380 (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1382 (match_operand:DI 3 "const_int_operand")
1383 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1384 (mem:BLK (scratch))]
1385 UNSPEC_LD1_GATHER))]
1388 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
1389 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]"
1390 "&& !CONSTANT_P (operands[6])"
1392 operands[6] = CONSTM1_RTX (VNx2BImode);
1396 ;; Likewise, but with the offset being truncated to 32 bits and then
1398 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
1399 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1401 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1402 (match_operand:DI 1 "register_operand" "rk, rk")
1407 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1409 (match_operand:DI 3 "const_int_operand")
1410 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1411 (mem:BLK (scratch))]
1412 UNSPEC_LD1_GATHER))]
1415 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1416 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1417 "&& !CONSTANT_P (operands[6])"
1419 operands[6] = CONSTM1_RTX (VNx2BImode);
1423 ;; Likewise, but with the offset being truncated to 32 bits and then
1425 (define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
1426 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1428 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1429 (match_operand:DI 1 "register_operand" "rk, rk")
1431 (match_operand:VNx2DI 2 "register_operand" "w, w")
1432 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1433 (match_operand:DI 3 "const_int_operand")
1434 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1435 (mem:BLK (scratch))]
1436 UNSPEC_LD1_GATHER))]
1439 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1440 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1443 ;; -------------------------------------------------------------------------
1444 ;; ---- Extending gather loads
1445 ;; -------------------------------------------------------------------------
1446 ;; Includes gather forms of:
1453 ;; -------------------------------------------------------------------------
1455 ;; Predicated extending gather loads for 32-bit elements. Operand 3 is
1456 ;; true for unsigned extension and false for signed extension.
1457 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
1458 [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w")
1460 [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1461 (ANY_EXTEND:SVE_4HSI
1463 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1464 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk")
1465 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1466 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1467 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1468 (mem:BLK (scratch))]
1469 UNSPEC_LD1_GATHER))]
1471 "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
1473 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
1474 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1475 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1476 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1477 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1478 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1479 "&& !CONSTANT_P (operands[6])"
1481 operands[6] = CONSTM1_RTX (VNx4BImode);
1485 ;; Predicated extending gather loads for 64-bit elements. The value of
1486 ;; operand 3 doesn't matter in this case.
1487 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
1488 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w")
1490 [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1491 (ANY_EXTEND:SVE_2HSDI
1493 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1494 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk")
1495 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1496 (match_operand:DI 3 "const_int_operand")
1497 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i")
1498 (mem:BLK (scratch))]
1499 UNSPEC_LD1_GATHER))]
1501 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1503 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
1504 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1505 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
1506 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1507 "&& !CONSTANT_P (operands[6])"
1509 operands[6] = CONSTM1_RTX (VNx2BImode);
1513 ;; Likewise, but with the offset being extended from 32 bits.
1514 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
1515 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1518 (ANY_EXTEND:SVE_2HSDI
1520 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1521 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1525 (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1527 (match_operand:DI 3 "const_int_operand")
1528 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1529 (mem:BLK (scratch))]
1530 UNSPEC_LD1_GATHER))]
1532 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1534 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
1535 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]"
1536 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1538 operands[6] = CONSTM1_RTX (VNx2BImode);
1539 operands[7] = CONSTM1_RTX (VNx2BImode);
1543 ;; Likewise, but with the offset being truncated to 32 bits and then
1545 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
1546 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1549 (ANY_EXTEND:SVE_2HSDI
1551 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1552 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1557 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1559 (match_operand:DI 3 "const_int_operand")
1560 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1561 (mem:BLK (scratch))]
1562 UNSPEC_LD1_GATHER))]
1564 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1566 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1567 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1568 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1570 operands[6] = CONSTM1_RTX (VNx2BImode);
1571 operands[7] = CONSTM1_RTX (VNx2BImode);
1575 ;; Likewise, but with the offset being truncated to 32 bits and then
1577 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
1578 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1581 (ANY_EXTEND:SVE_2HSDI
1583 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1584 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1586 (match_operand:VNx2DI 2 "register_operand" "w, w")
1587 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1588 (match_operand:DI 3 "const_int_operand")
1589 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1590 (mem:BLK (scratch))]
1591 UNSPEC_LD1_GATHER))]
1593 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1595 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1596 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1597 "&& !CONSTANT_P (operands[7])"
1599 operands[7] = CONSTM1_RTX (VNx2BImode);
1603 ;; -------------------------------------------------------------------------
1604 ;; ---- First-faulting gather loads
1605 ;; -------------------------------------------------------------------------
1606 ;; Includes gather forms of:
1609 ;; -------------------------------------------------------------------------
1611 ;; Predicated first-faulting gather loads for 32-bit elements. Operand
1612 ;; 3 is true for unsigned extension and false for signed extension.
1613 (define_insn "@aarch64_ldff1_gather<mode>"
1614 [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w")
1616 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1617 (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk")
1618 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1619 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1620 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i")
1622 (reg:VNx16BI FFRT_REGNUM)]
1623 UNSPEC_LDFF1_GATHER))]
1626 ldff1w\t%0.s, %5/z, [%2.s]
1627 ldff1w\t%0.s, %5/z, [%2.s, #%1]
1628 ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
1629 ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
1630 ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1631 ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1634 ;; Predicated first-faulting gather loads for 64-bit elements. The value
1635 ;; of operand 3 doesn't matter in this case.
1636 (define_insn "@aarch64_ldff1_gather<mode>"
1637 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w")
1639 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1640 (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk")
1641 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1642 (match_operand:DI 3 "const_int_operand")
1643 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i")
1645 (reg:VNx16BI FFRT_REGNUM)]
1646 UNSPEC_LDFF1_GATHER))]
1649 ldff1d\t%0.d, %5/z, [%2.d]
1650 ldff1d\t%0.d, %5/z, [%2.d, #%1]
1651 ldff1d\t%0.d, %5/z, [%1, %2.d]
1652 ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1655 ;; Likewise, but with the offset being sign-extended from 32 bits.
1656 (define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
1657 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1659 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1660 (match_operand:DI 1 "register_operand" "rk, rk")
1665 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1667 (match_operand:DI 3 "const_int_operand")
1668 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1670 (reg:VNx16BI FFRT_REGNUM)]
1671 UNSPEC_LDFF1_GATHER))]
1674 ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
1675 ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1676 "&& !CONSTANT_P (operands[6])"
1678 operands[6] = CONSTM1_RTX (VNx2BImode);
1682 ;; Likewise, but with the offset being zero-extended from 32 bits.
1683 (define_insn "*aarch64_ldff1_gather<mode>_uxtw"
1684 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1686 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1687 (match_operand:DI 1 "register_operand" "rk, rk")
1689 (match_operand:VNx2DI 2 "register_operand" "w, w")
1690 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1691 (match_operand:DI 3 "const_int_operand")
1692 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1694 (reg:VNx16BI FFRT_REGNUM)]
1695 UNSPEC_LDFF1_GATHER))]
1698 ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
1699 ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1702 ;; -------------------------------------------------------------------------
1703 ;; ---- First-faulting extending gather loads
1704 ;; -------------------------------------------------------------------------
1705 ;; Includes gather forms of:
1712 ;; -------------------------------------------------------------------------
1714 ;; Predicated extending first-faulting gather loads for 32-bit elements.
1715 ;; Operand 3 is true for unsigned extension and false for signed extension.
1716 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
1717 [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w")
1719 [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1720 (ANY_EXTEND:VNx4_WIDE
1722 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1723 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
1724 (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w")
1725 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1726 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1728 (reg:VNx16BI FFRT_REGNUM)]
1729 UNSPEC_LDFF1_GATHER))]
1733 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
1734 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1735 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1736 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1737 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1738 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1739 "&& !CONSTANT_P (operands[6])"
1741 operands[6] = CONSTM1_RTX (VNx4BImode);
1745 ;; Predicated extending first-faulting gather loads for 64-bit elements.
1746 ;; The value of operand 3 doesn't matter in this case.
1747 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
1748 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w")
1750 [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1751 (ANY_EXTEND:VNx2_WIDE
1753 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1754 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
1755 (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w")
1756 (match_operand:DI 3 "const_int_operand")
1757 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
1759 (reg:VNx16BI FFRT_REGNUM)]
1760 UNSPEC_LDFF1_GATHER))]
1764 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
1765 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1766 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
1767 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1768 "&& !CONSTANT_P (operands[6])"
1770 operands[6] = CONSTM1_RTX (VNx2BImode);
1774 ;; Likewise, but with the offset being sign-extended from 32 bits.
1775 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
1776 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1779 (ANY_EXTEND:VNx2_WIDE
1781 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1782 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1787 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1789 (match_operand:DI 3 "const_int_operand")
1790 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1792 (reg:VNx16BI FFRT_REGNUM)]
1793 UNSPEC_LDFF1_GATHER))]
1797 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1798 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1799 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1801 operands[6] = CONSTM1_RTX (VNx2BImode);
1802 operands[7] = CONSTM1_RTX (VNx2BImode);
1806 ;; Likewise, but with the offset being zero-extended from 32 bits.
1807 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
1808 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1811 (ANY_EXTEND:VNx2_WIDE
1813 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1814 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1816 (match_operand:VNx2DI 2 "register_operand" "w, w")
1817 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1818 (match_operand:DI 3 "const_int_operand")
1819 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1821 (reg:VNx16BI FFRT_REGNUM)]
1822 UNSPEC_LDFF1_GATHER))]
1826 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1827 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1828 "&& !CONSTANT_P (operands[7])"
1830 operands[7] = CONSTM1_RTX (VNx2BImode);
1834 ;; =========================================================================
1836 ;; =========================================================================
1838 ;; -------------------------------------------------------------------------
1839 ;; ---- Contiguous prefetches
1840 ;; -------------------------------------------------------------------------
1841 ;; Includes contiguous forms of:
1846 ;; -------------------------------------------------------------------------
1848 ;; Contiguous predicated prefetches. Operand 2 gives the real prefetch
1849 ;; operation (as an svprfop), with operands 3 and 4 providing distilled
1851 (define_insn "@aarch64_sve_prefetch<mode>"
1852 [(prefetch (unspec:DI
1853 [(match_operand:<VPRED> 0 "register_operand" "Upl")
1854 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
1855 (match_operand:DI 2 "const_int_operand")]
1856 UNSPEC_SVE_PREFETCH)
1857 (match_operand:DI 3 "const_int_operand")
1858 (match_operand:DI 4 "const_int_operand"))]
1861 operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
1862 return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
1866 ;; -------------------------------------------------------------------------
1867 ;; ---- Gather prefetches
1868 ;; -------------------------------------------------------------------------
1869 ;; Includes gather forms of:
1874 ;; -------------------------------------------------------------------------
1876 ;; Predicated gather prefetches for 32-bit bases and offsets. The operands
1878 ;; 0: the governing predicate
1879 ;; 1: the scalar component of the address
1880 ;; 2: the vector component of the address
1881 ;; 3: 1 for zero extension, 0 for sign extension
1882 ;; 4: the scale multiplier
1883 ;; 5: a vector zero that identifies the mode of data being accessed
1884 ;; 6: the prefetch operator (an svprfop)
1885 ;; 7: the normal RTL prefetch rw flag
1886 ;; 8: the normal RTL prefetch locality value
1887 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
1888 [(prefetch (unspec:DI
1889 [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1890 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
1891 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
1892 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1893 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1894 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1895 (match_operand:DI 6 "const_int_operand")]
1896 UNSPEC_SVE_PREFETCH_GATHER)
1897 (match_operand:DI 7 "const_int_operand")
1898 (match_operand:DI 8 "const_int_operand"))]
1901 static const char *const insns[][2] = {
1902 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
1903 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
1904 "prfb", "%0, [%1, %2.s, sxtw]",
1905 "prfb", "%0, [%1, %2.s, uxtw]",
1906 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
1907 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
1909 const char *const *parts = insns[which_alternative];
1910 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1914 ;; Predicated gather prefetches for 64-bit elements. The value of operand 3
1915 ;; doesn't matter in this case.
1916 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
1917 [(prefetch (unspec:DI
1918 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
1919 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
1920 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
1921 (match_operand:DI 3 "const_int_operand")
1922 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
1923 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1924 (match_operand:DI 6 "const_int_operand")]
1925 UNSPEC_SVE_PREFETCH_GATHER)
1926 (match_operand:DI 7 "const_int_operand")
1927 (match_operand:DI 8 "const_int_operand"))]
1930 static const char *const insns[][2] = {
1931 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
1932 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
1933 "prfb", "%0, [%1, %2.d]",
1934 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
1936 const char *const *parts = insns[which_alternative];
1937 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1941 ;; Likewise, but with the offset being sign-extended from 32 bits.
1942 (define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
1943 [(prefetch (unspec:DI
1944 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
1945 (match_operand:DI 1 "register_operand" "rk, rk")
1950 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1952 (match_operand:DI 3 "const_int_operand")
1953 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
1954 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1955 (match_operand:DI 6 "const_int_operand")]
1956 UNSPEC_SVE_PREFETCH_GATHER)
1957 (match_operand:DI 7 "const_int_operand")
1958 (match_operand:DI 8 "const_int_operand"))]
1961 static const char *const insns[][2] = {
1962 "prfb", "%0, [%1, %2.d, sxtw]",
1963 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
1965 const char *const *parts = insns[which_alternative];
1966 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1968 "&& !rtx_equal_p (operands[0], operands[9])"
1970 operands[9] = copy_rtx (operands[0]);
1974 ;; Likewise, but with the offset being zero-extended from 32 bits.
1975 (define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
1976 [(prefetch (unspec:DI
1977 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
1978 (match_operand:DI 1 "register_operand" "rk, rk")
1980 (match_operand:VNx2DI 2 "register_operand" "w, w")
1981 (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
1982 (match_operand:DI 3 "const_int_operand")
1983 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
1984 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1985 (match_operand:DI 6 "const_int_operand")]
1986 UNSPEC_SVE_PREFETCH_GATHER)
1987 (match_operand:DI 7 "const_int_operand")
1988 (match_operand:DI 8 "const_int_operand"))]
1991 static const char *const insns[][2] = {
1992 "prfb", "%0, [%1, %2.d, uxtw]",
1993 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
1995 const char *const *parts = insns[which_alternative];
1996 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2000 ;; =========================================================================
2002 ;; =========================================================================
2004 ;; -------------------------------------------------------------------------
2005 ;; ---- Normal contiguous stores
2006 ;; -------------------------------------------------------------------------
2007 ;; Includes contiguous forms of:
2024 ;; -------------------------------------------------------------------------
2027 (define_insn "maskstore<mode><vpred>"
2028 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
2030 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2031 (match_operand:SVE_ALL 1 "register_operand" "w")
2035 "st1<Vesize>\t%1.<Vctype>, %2, %0"
2038 ;; Unpredicated ST[234]. This is always a full update, so the dependence
2039 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
2040 ;; There doesn't seem to be any obvious benefit to treating the all-true
2041 ;; case differently though. In particular, it's very unlikely that we'll
2042 ;; only find out during RTL that a store_lanes is dead.
2043 (define_expand "vec_store_lanes<mode><vsingle>"
2044 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
2047 (match_operand:SVE_STRUCT 1 "register_operand")
2052 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2056 ;; Predicated ST[234].
2057 (define_insn "vec_mask_store_lanes<mode><vsingle>"
2058 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
2060 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2061 (match_operand:SVE_STRUCT 1 "register_operand" "w")
2065 "st<vector_count><Vesize>\t%1, %2, %0"
2068 ;; -------------------------------------------------------------------------
2069 ;; ---- Truncating contiguous stores
2070 ;; -------------------------------------------------------------------------
2075 ;; -------------------------------------------------------------------------
2077 ;; Predicated truncate and store, with 8 elements per 128-bit block.
2078 (define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
2079 [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
2081 [(match_operand:VNx8BI 2 "register_operand" "Upl")
2082 (truncate:VNx8_NARROW
2083 (match_operand:VNx8_WIDE 1 "register_operand" "w"))
2087 "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
2090 ;; Predicated truncate and store, with 4 elements per 128-bit block.
2091 (define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2092 [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
2094 [(match_operand:VNx4BI 2 "register_operand" "Upl")
2095 (truncate:VNx4_NARROW
2096 (match_operand:VNx4_WIDE 1 "register_operand" "w"))
2100 "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
2103 ;; Predicated truncate and store, with 2 elements per 128-bit block.
2104 (define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2105 [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
2107 [(match_operand:VNx2BI 2 "register_operand" "Upl")
2108 (truncate:VNx2_NARROW
2109 (match_operand:VNx2_WIDE 1 "register_operand" "w"))
2113 "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
2116 ;; -------------------------------------------------------------------------
2117 ;; ---- Non-temporal contiguous stores
2118 ;; -------------------------------------------------------------------------
2124 ;; -------------------------------------------------------------------------
2126 (define_insn "@aarch64_stnt1<mode>"
2127 [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
2129 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2130 (match_operand:SVE_FULL 1 "register_operand" "w")
2134 "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
2137 ;; -------------------------------------------------------------------------
2138 ;; ---- Normal scatter stores
2139 ;; -------------------------------------------------------------------------
2140 ;; Includes scatter forms of:
2143 ;; -------------------------------------------------------------------------
2145 ;; Unpredicated scatter stores.
2146 (define_expand "scatter_store<mode><v_int_container>"
2147 [(set (mem:BLK (scratch))
2150 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2151 (match_operand:<V_INT_CONTAINER> 1 "register_operand")
2152 (match_operand:DI 2 "const_int_operand")
2153 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2154 (match_operand:SVE_24 4 "register_operand")]
2155 UNSPEC_ST1_SCATTER))]
2158 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
2162 ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
2163 ;; unsigned extension and false for signed extension.
2164 (define_insn "mask_scatter_store<mode><v_int_container>"
2165 [(set (mem:BLK (scratch))
2167 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2168 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
2169 (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2170 (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2171 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2172 (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
2173 UNSPEC_ST1_SCATTER))]
2176 st1<Vesize>\t%4.s, %5, [%1.s]
2177 st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2178 st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2179 st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2180 st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2181 st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2184 ;; Predicated scatter stores for 64-bit elements. The value of operand 2
2185 ;; doesn't matter in this case.
2186 (define_insn "mask_scatter_store<mode><v_int_container>"
2187 [(set (mem:BLK (scratch))
2189 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2190 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
2191 (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2192 (match_operand:DI 2 "const_int_operand")
2193 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
2194 (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
2195 UNSPEC_ST1_SCATTER))]
2198 st1<Vesize>\t%4.d, %5, [%1.d]
2199 st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2200 st1<Vesize>\t%4.d, %5, [%0, %1.d]
2201 st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2204 ;; Likewise, but with the offset being extended from 32 bits.
2205 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
2206 [(set (mem:BLK (scratch))
2208 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2209 (match_operand:DI 0 "register_operand" "rk, rk")
2213 (match_operand:VNx2SI 1 "register_operand" "w, w"))]
2215 (match_operand:DI 2 "const_int_operand")
2216 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2217 (match_operand:SVE_2 4 "register_operand" "w, w")]
2218 UNSPEC_ST1_SCATTER))]
2221 st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
2222 st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
2223 "&& !CONSTANT_P (operands[6])"
2225 operands[6] = CONSTM1_RTX (<VPRED>mode);
2229 ;; Likewise, but with the offset being truncated to 32 bits and then
2231 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
2232 [(set (mem:BLK (scratch))
2234 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2235 (match_operand:DI 0 "register_operand" "rk, rk")
2240 (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2242 (match_operand:DI 2 "const_int_operand")
2243 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2244 (match_operand:SVE_2 4 "register_operand" "w, w")]
2245 UNSPEC_ST1_SCATTER))]
2248 st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2249 st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2250 "&& !CONSTANT_P (operands[6])"
2252 operands[6] = CONSTM1_RTX (<VPRED>mode);
2256 ;; Likewise, but with the offset being truncated to 32 bits and then
2258 (define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
2259 [(set (mem:BLK (scratch))
2261 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2262 (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2264 (match_operand:VNx2DI 1 "register_operand" "w, w")
2265 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2266 (match_operand:DI 2 "const_int_operand")
2267 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2268 (match_operand:SVE_2 4 "register_operand" "w, w")]
2269 UNSPEC_ST1_SCATTER))]
2272 st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2273 st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2276 ;; -------------------------------------------------------------------------
2277 ;; ---- Truncating scatter stores
2278 ;; -------------------------------------------------------------------------
2279 ;; Includes scatter forms of:
2283 ;; -------------------------------------------------------------------------
2285 ;; Predicated truncating scatter stores for 32-bit elements. Operand 2 is
2286 ;; true for unsigned extension and false for signed extension.
2287 (define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2288 [(set (mem:BLK (scratch))
2290 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2291 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
2292 (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2293 (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2294 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2295 (truncate:VNx4_NARROW
2296 (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))]
2297 UNSPEC_ST1_SCATTER))]
2300 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
2301 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
2302 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2303 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2304 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2305 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2308 ;; Predicated truncating scatter stores for 64-bit elements. The value of
2309 ;; operand 2 doesn't matter in this case.
2310 (define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2311 [(set (mem:BLK (scratch))
2313 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2314 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
2315 (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2316 (match_operand:DI 2 "const_int_operand")
2317 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
2318 (truncate:VNx2_NARROW
2319 (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))]
2320 UNSPEC_ST1_SCATTER))]
2323 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
2324 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
2325 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
2326 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2329 ;; Likewise, but with the offset being sign-extended from 32 bits.
2330 (define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
2331 [(set (mem:BLK (scratch))
2333 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2334 (match_operand:DI 0 "register_operand" "rk, rk")
2339 (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2341 (match_operand:DI 2 "const_int_operand")
2342 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2343 (truncate:VNx2_NARROW
2344 (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2345 UNSPEC_ST1_SCATTER))]
2348 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2349 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2350 "&& !rtx_equal_p (operands[5], operands[6])"
2352 operands[6] = copy_rtx (operands[5]);
2356 ;; Likewise, but with the offset being zero-extended from 32 bits.
2357 (define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
2358 [(set (mem:BLK (scratch))
2360 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2361 (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2363 (match_operand:VNx2DI 1 "register_operand" "w, w")
2364 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2365 (match_operand:DI 2 "const_int_operand")
2366 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2367 (truncate:VNx2_NARROW
2368 (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2369 UNSPEC_ST1_SCATTER))]
2372 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2373 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2376 ;; =========================================================================
2377 ;; == Vector creation
2378 ;; =========================================================================
2380 ;; -------------------------------------------------------------------------
2381 ;; ---- [INT,FP] Duplicate element
2382 ;; -------------------------------------------------------------------------
2398 ;; -------------------------------------------------------------------------
2400 (define_expand "vec_duplicate<mode>"
2402 [(set (match_operand:SVE_ALL 0 "register_operand")
2403 (vec_duplicate:SVE_ALL
2404 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2405 (clobber (scratch:VNx16BI))])]
2408 if (MEM_P (operands[1]))
2410 rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
2411 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
2412 CONST0_RTX (<MODE>mode)));
2418 ;; Accept memory operands for the benefit of combine, and also in case
2419 ;; the scalar input gets spilled to memory during RA. We want to split
2420 ;; the load at the first opportunity in order to allow the PTRUE to be
2421 ;; optimized with surrounding code.
2422 (define_insn_and_split "*vec_duplicate<mode>_reg"
2423 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
2424 (vec_duplicate:SVE_ALL
2425 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
2426 (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
2429 mov\t%0.<Vetype>, %<vwcore>1
2430 mov\t%0.<Vetype>, %<Vetype>1
2432 "&& MEM_P (operands[1])"
2435 if (GET_CODE (operands[2]) == SCRATCH)
2436 operands[2] = gen_reg_rtx (VNx16BImode);
2437 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2438 rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2439 emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
2440 CONST0_RTX (<MODE>mode)));
2443 [(set_attr "length" "4,4,8")]
2446 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
2447 (define_insn "@aarch64_vec_duplicate_vq<mode>_le"
2448 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2449 (vec_duplicate:SVE_FULL
2450 (match_operand:<V128> 1 "register_operand" "w")))]
2451 "TARGET_SVE && !BYTES_BIG_ENDIAN"
2453 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2454 return "dup\t%0.q, %1.q[0]";
2458 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
2459 ;; The SVE register layout puts memory lane N into (architectural)
2460 ;; register lane N, whereas the Advanced SIMD layout puts the memory
2461 ;; lsb into the register lsb. We therefore have to describe this in rtl
2462 ;; terms as a reverse of the V128 vector followed by a duplicate.
2463 (define_insn "@aarch64_vec_duplicate_vq<mode>_be"
2464 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2465 (vec_duplicate:SVE_FULL
2467 (match_operand:<V128> 1 "register_operand" "w")
2468 (match_operand 2 "descending_int_parallel"))))]
2471 && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
2472 GET_MODE_NUNITS (<V128>mode) - 1)"
2474 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2475 return "dup\t%0.q, %1.q[0]";
2479 ;; This is used for vec_duplicate<mode>s from memory, but can also
2480 ;; be used by combine to optimize selects of a vec_duplicate<mode>
2482 (define_insn "sve_ld1r<mode>"
2483 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2485 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2486 (vec_duplicate:SVE_ALL
2487 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
2488 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
2491 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
2494 ;; Load 128 bits from memory under predicate control and duplicate to
2496 (define_insn "@aarch64_sve_ld1rq<mode>"
2497 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2499 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2500 (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
2504 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2505 return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
2509 (define_insn "@aarch64_sve_ld1ro<mode>"
2510 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2512 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2513 (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
2518 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2519 return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
2523 ;; -------------------------------------------------------------------------
2524 ;; ---- [INT,FP] Initialize from individual elements
2525 ;; -------------------------------------------------------------------------
2528 ;; -------------------------------------------------------------------------
2530 (define_expand "vec_init<mode><Vel>"
2531 [(match_operand:SVE_FULL 0 "register_operand")
2532 (match_operand 1 "")]
2535 aarch64_sve_expand_vector_init (operands[0], operands[1]);
2540 ;; Shift an SVE vector left and insert a scalar into element 0.
2541 (define_insn "vec_shl_insert_<mode>"
2542 [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w")
2544 [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w")
2545 (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")]
2549 insr\t%0.<Vetype>, %<vwcore>2
2550 insr\t%0.<Vetype>, %<Vetype>2
2551 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
2552 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2"
2553 [(set_attr "movprfx" "*,*,yes,yes")]
2556 ;; -------------------------------------------------------------------------
2557 ;; ---- [INT] Linear series
2558 ;; -------------------------------------------------------------------------
2561 ;; -------------------------------------------------------------------------
2563 (define_insn "vec_series<mode>"
2564 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
2566 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
2567 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
2570 index\t%0.<Vctype>, #%1, %<vccore>2
2571 index\t%0.<Vctype>, %<vccore>1, #%2
2572 index\t%0.<Vctype>, %<vccore>1, %<vccore>2"
2575 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
2576 ;; of an INDEX instruction.
2577 (define_insn "*vec_series<mode>_plus"
2578 [(set (match_operand:SVE_I 0 "register_operand" "=w")
2580 (vec_duplicate:SVE_I
2581 (match_operand:<VEL> 1 "register_operand" "r"))
2582 (match_operand:SVE_I 2 "immediate_operand")))]
2583 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
2585 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
2586 return "index\t%0.<Vctype>, %<vccore>1, #%2";
2590 ;; -------------------------------------------------------------------------
2591 ;; ---- [PRED] Duplicate element
2592 ;; -------------------------------------------------------------------------
2593 ;; The patterns in this section are synthetic.
2594 ;; -------------------------------------------------------------------------
2596 ;; Implement a predicate broadcast by shifting the low bit of the scalar
2597 ;; input into the top bit and using a WHILELO. An alternative would be to
2598 ;; duplicate the input and do a compare with zero.
2599 (define_expand "vec_duplicate<mode>"
2600 [(set (match_operand:PRED_ALL 0 "register_operand")
2601 (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
2604 rtx tmp = gen_reg_rtx (DImode);
2605 rtx op1 = gen_lowpart (DImode, operands[1]);
2606 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
2607 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
2612 ;; =========================================================================
2613 ;; == Vector decomposition
2614 ;; =========================================================================
2616 ;; -------------------------------------------------------------------------
2617 ;; ---- [INT,FP] Extract index
2618 ;; -------------------------------------------------------------------------
2620 ;; - DUP (Advanced SIMD)
2623 ;; - ST1 (Advanced SIMD)
2624 ;; - UMOV (Advanced SIMD)
2625 ;; -------------------------------------------------------------------------
2627 (define_expand "vec_extract<mode><Vel>"
2628 [(set (match_operand:<VEL> 0 "register_operand")
2630 (match_operand:SVE_FULL 1 "register_operand")
2631 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
2635 if (poly_int_rtx_p (operands[2], &val)
2636 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
2638 /* The last element can be extracted with a LASTB and a false
2640 rtx sel = aarch64_pfalse_reg (<VPRED>mode);
2641 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2644 if (!CONST_INT_P (operands[2]))
2646 /* Create an index with operand[2] as the base and -1 as the step.
2647 It will then be zero for the element we care about. */
2648 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
2649 index = force_reg (<VEL_INT>mode, index);
2650 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
2651 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
2653 /* Get a predicate that is true for only that element. */
2654 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
2655 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
2656 rtx sel = gen_reg_rtx (<VPRED>mode);
2657 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
2659 /* Select the element using LASTB. */
2660 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2666 ;; Extract element zero. This is a special case because we want to force
2667 ;; the registers to be the same for the second alternative, and then
2668 ;; split the instruction into nothing after RA.
2669 (define_insn_and_split "*vec_extract<mode><Vel>_0"
2670 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2672 (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
2673 (parallel [(const_int 0)])))]
2676 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2677 switch (which_alternative)
2680 return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
2684 return "st1\\t{%1.<Vetype>}[0], %0";
2689 "&& reload_completed
2690 && REG_P (operands[0])
2691 && REGNO (operands[0]) == REGNO (operands[1])"
2694 emit_note (NOTE_INSN_DELETED);
2697 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
2700 ;; Extract an element from the Advanced SIMD portion of the register.
2701 ;; We don't just reuse the aarch64-simd.md pattern because we don't
2702 ;; want any change in lane number on big-endian targets.
2703 (define_insn "*vec_extract<mode><Vel>_v128"
2704 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2706 (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
2707 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2709 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
2711 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2712 switch (which_alternative)
2715 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2717 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2719 return "st1\\t{%1.<Vetype>}[%2], %0";
2724 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
2727 ;; Extract an element in the range of DUP. This pattern allows the
2728 ;; source and destination to be different.
2729 (define_insn "*vec_extract<mode><Vel>_dup"
2730 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2732 (match_operand:SVE_FULL 1 "register_operand" "w")
2733 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2735 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
2737 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2738 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
2742 ;; Extract an element outside the range of DUP. This pattern requires the
2743 ;; source and destination to be the same.
2744 (define_insn "*vec_extract<mode><Vel>_ext"
2745 [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
2747 (match_operand:SVE_FULL 1 "register_operand" "0, w")
2748 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2749 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
2751 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2752 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
2753 return (which_alternative == 0
2754 ? "ext\t%0.b, %0.b, %0.b, #%2"
2755 : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
2757 [(set_attr "movprfx" "*,yes")]
2760 ;; -------------------------------------------------------------------------
2761 ;; ---- [INT,FP] Extract active element
2762 ;; -------------------------------------------------------------------------
2766 ;; -------------------------------------------------------------------------
2768 ;; Extract the last active element of operand 1 into operand 0.
2769 ;; If no elements are active, extract the last inactive element instead.
2770 (define_insn "@extract_<last_op>_<mode>"
2771 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
2773 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2774 (match_operand:SVE_FULL 2 "register_operand" "w, w")]
2778 last<ab>\t%<vwcore>0, %1, %2.<Vetype>
2779 last<ab>\t%<Vetype>0, %1, %2.<Vetype>"
2782 ;; -------------------------------------------------------------------------
2783 ;; ---- [PRED] Extract index
2784 ;; -------------------------------------------------------------------------
2785 ;; The patterns in this section are synthetic.
2786 ;; -------------------------------------------------------------------------
2788 ;; Handle extractions from a predicate by converting to an integer vector
2789 ;; and extracting from there.
2790 (define_expand "vec_extract<vpred><Vel>"
2791 [(match_operand:<VEL> 0 "register_operand")
2792 (match_operand:<VPRED> 1 "register_operand")
2793 (match_operand:SI 2 "nonmemory_operand")
2794 ;; Dummy operand to which we can attach the iterator.
2795 (reg:SVE_FULL_I V0_REGNUM)]
2798 rtx tmp = gen_reg_rtx (<MODE>mode);
2799 emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
2800 CONST1_RTX (<MODE>mode),
2801 CONST0_RTX (<MODE>mode)));
2802 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
2807 ;; =========================================================================
2808 ;; == Unary arithmetic
2809 ;; =========================================================================
2811 ;; -------------------------------------------------------------------------
2812 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
2813 ;; -------------------------------------------------------------------------
2818 ;; - CNT (= popcount)
2821 ;; -------------------------------------------------------------------------
2823 ;; Unpredicated integer unary arithmetic.
2824 (define_expand "<optab><mode>2"
2825 [(set (match_operand:SVE_I 0 "register_operand")
2828 (SVE_INT_UNARY:SVE_I
2829 (match_operand:SVE_I 1 "register_operand"))]
2833 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2837 ;; Integer unary arithmetic predicated with a PTRUE.
2838 (define_insn "@aarch64_pred_<optab><mode>"
2839 [(set (match_operand:SVE_I 0 "register_operand" "=w")
2841 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2842 (SVE_INT_UNARY:SVE_I
2843 (match_operand:SVE_I 2 "register_operand" "w"))]
2846 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2849 ;; Predicated integer unary arithmetic with merging.
2850 (define_expand "@cond_<optab><mode>"
2851 [(set (match_operand:SVE_FULL_I 0 "register_operand")
2853 [(match_operand:<VPRED> 1 "register_operand")
2854 (SVE_INT_UNARY:SVE_FULL_I
2855 (match_operand:SVE_FULL_I 2 "register_operand"))
2856 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
2861 ;; Predicated integer unary arithmetic, merging with the first input.
2862 (define_insn "*cond_<optab><mode>_2"
2863 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
2865 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2866 (SVE_INT_UNARY:SVE_FULL_I
2867 (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
2872 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
2873 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2874 [(set_attr "movprfx" "*,yes")]
2877 ;; Predicated integer unary arithmetic, merging with an independent value.
2879 ;; The earlyclobber isn't needed for the first alternative, but omitting
2880 ;; it would only help the case in which operands 2 and 3 are the same,
2881 ;; which is handled above rather than here. Marking all the alternatives
2882 ;; as earlyclobber helps to make the instruction more regular to the
2883 ;; register allocator.
2884 (define_insn "*cond_<optab><mode>_any"
2885 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
2887 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2888 (SVE_INT_UNARY:SVE_FULL_I
2889 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w"))
2890 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2892 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
2894 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2895 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2896 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2897 [(set_attr "movprfx" "*,yes,yes")]
2900 ;; -------------------------------------------------------------------------
2901 ;; ---- [INT] General unary arithmetic corresponding to unspecs
2902 ;; -------------------------------------------------------------------------
2908 ;; -------------------------------------------------------------------------
2910 ;; Predicated integer unary operations.
2911 (define_insn "@aarch64_pred_<optab><mode>"
2912 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
2914 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2916 [(match_operand:SVE_FULL_I 2 "register_operand" "w")]
2919 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
2920 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2923 ;; Predicated integer unary operations with merging.
2924 (define_insn "@cond_<optab><mode>"
2925 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w")
2927 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2929 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")]
2931 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2933 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
2935 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2936 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2937 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2938 [(set_attr "movprfx" "*,yes,yes")]
2941 ;; -------------------------------------------------------------------------
2942 ;; ---- [INT] Sign and zero extension
2943 ;; -------------------------------------------------------------------------
2951 ;; -------------------------------------------------------------------------
2953 ;; Unpredicated sign and zero extension from a narrower mode.
2954 (define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
2955 [(set (match_operand:SVE_HSDI 0 "register_operand")
2958 (ANY_EXTEND:SVE_HSDI
2959 (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
2961 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2963 operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
2967 ;; Predicated sign and zero extension from a narrower mode.
2968 (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
2969 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
2971 [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl")
2972 (ANY_EXTEND:SVE_HSDI
2973 (match_operand:SVE_PARTIAL_I 2 "register_operand" "w"))]
2975 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2976 "<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
2979 ;; Predicated truncate-and-sign-extend operations.
2980 (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
2981 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
2982 (unspec:SVE_FULL_HSDI
2983 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
2984 (sign_extend:SVE_FULL_HSDI
2985 (truncate:SVE_PARTIAL_I
2986 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")))]
2989 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2990 "sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
2993 ;; Predicated truncate-and-sign-extend operations with merging.
2994 (define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
2995 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w")
2996 (unspec:SVE_FULL_HSDI
2997 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
2998 (sign_extend:SVE_FULL_HSDI
2999 (truncate:SVE_PARTIAL_I
3000 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")))
3001 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3004 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3006 sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3007 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3008 movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
3009 [(set_attr "movprfx" "*,yes,yes")]
3012 ;; Predicated truncate-and-zero-extend operations, merging with the
3015 ;; The canonical form of this operation is an AND of a constant rather
3016 ;; than (zero_extend (truncate ...)).
3017 (define_insn "*cond_uxt<mode>_2"
3018 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3020 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3022 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3023 (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate"))
3028 uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
3029 movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3030 [(set_attr "movprfx" "*,yes")]
3033 ;; Predicated truncate-and-zero-extend operations, merging with an
3034 ;; independent value.
3036 ;; The earlyclobber isn't needed for the first alternative, but omitting
3037 ;; it would only help the case in which operands 2 and 4 are the same,
3038 ;; which is handled above rather than here. Marking all the alternatives
3039 ;; as early-clobber helps to make the instruction more regular to the
3040 ;; register allocator.
3041 (define_insn "*cond_uxt<mode>_any"
3042 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
3044 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3046 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
3047 (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate"))
3048 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3050 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
3052 uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3053 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3054 movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3055 [(set_attr "movprfx" "*,yes,yes")]
3058 ;; -------------------------------------------------------------------------
3059 ;; ---- [INT] Truncation
3060 ;; -------------------------------------------------------------------------
3061 ;; The patterns in this section are synthetic.
3062 ;; -------------------------------------------------------------------------
3064 ;; Truncate to a partial SVE vector from either a full vector or a
3065 ;; wider partial vector. This is a no-op, because we can just ignore
3066 ;; the unused upper bits of the source.
3067 (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
3068 [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
3069 (truncate:SVE_PARTIAL_I
3070 (match_operand:SVE_HSDI 1 "register_operand" "w")))]
3071 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3073 "&& reload_completed"
3074 [(set (match_dup 0) (match_dup 1))]
3076 operands[1] = aarch64_replace_reg_mode (operands[1],
3077 <SVE_PARTIAL_I:MODE>mode);
3081 ;; -------------------------------------------------------------------------
3082 ;; ---- [INT] Logical inverse
3083 ;; -------------------------------------------------------------------------
3086 ;; -------------------------------------------------------------------------
3088 ;; Predicated logical inverse.
3089 (define_expand "@aarch64_pred_cnot<mode>"
3090 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3093 [(match_operand:<VPRED> 1 "register_operand")
3094 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
3096 (match_operand:SVE_FULL_I 3 "register_operand")
3104 operands[4] = CONST0_RTX (<MODE>mode);
3105 operands[5] = CONST1_RTX (<MODE>mode);
3109 (define_insn "*cnot<mode>"
3110 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
3113 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3114 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
3116 (match_operand:SVE_FULL_I 2 "register_operand" "w")
3117 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3119 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3123 "cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3126 ;; Predicated logical inverse with merging.
3127 (define_expand "@cond_cnot<mode>"
3128 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3130 [(match_operand:<VPRED> 1 "register_operand")
3134 (const_int SVE_KNOWN_PTRUE)
3136 (match_operand:SVE_FULL_I 2 "register_operand")
3142 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3146 operands[4] = CONSTM1_RTX (<VPRED>mode);
3147 operands[5] = CONST0_RTX (<MODE>mode);
3148 operands[6] = CONST1_RTX (<MODE>mode);
3152 ;; Predicated logical inverse, merging with the first input.
3153 (define_insn_and_rewrite "*cond_cnot<mode>_2"
3154 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3156 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3157 ;; Logical inverse of operand 2 (as above).
3161 (const_int SVE_KNOWN_PTRUE)
3163 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3164 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3166 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3173 cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
3174 movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3175 "&& !CONSTANT_P (operands[5])"
3177 operands[5] = CONSTM1_RTX (<VPRED>mode);
3179 [(set_attr "movprfx" "*,yes")]
3182 ;; Predicated logical inverse, merging with an independent value.
3184 ;; The earlyclobber isn't needed for the first alternative, but omitting
3185 ;; it would only help the case in which operands 2 and 6 are the same,
3186 ;; which is handled above rather than here. Marking all the alternatives
3187 ;; as earlyclobber helps to make the instruction more regular to the
3188 ;; register allocator.
3189 (define_insn_and_rewrite "*cond_cnot<mode>_any"
3190 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
3192 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3193 ;; Logical inverse of operand 2 (as above).
3197 (const_int SVE_KNOWN_PTRUE)
3199 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
3200 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3202 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3205 (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3207 "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
3209 cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3210 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3211 movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3212 "&& !CONSTANT_P (operands[5])"
3214 operands[5] = CONSTM1_RTX (<VPRED>mode);
3216 [(set_attr "movprfx" "*,yes,yes")]
3219 ;; -------------------------------------------------------------------------
3220 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
3221 ;; -------------------------------------------------------------------------
3224 ;; -------------------------------------------------------------------------
3226 ;; Unpredicated unary operations that take an integer and return a float.
3227 (define_insn "@aarch64_sve_<optab><mode>"
3228 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3230 [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
3233 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3236 ;; -------------------------------------------------------------------------
3237 ;; ---- [FP] General unary arithmetic corresponding to unspecs
3238 ;; -------------------------------------------------------------------------
3253 ;; -------------------------------------------------------------------------
3255 ;; Unpredicated floating-point unary operations.
3256 (define_insn "@aarch64_sve_<optab><mode>"
3257 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3259 [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
3262 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3265 ;; Unpredicated floating-point unary operations.
3266 (define_expand "<optab><mode>2"
3267 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3270 (const_int SVE_RELAXED_GP)
3271 (match_operand:SVE_FULL_F 1 "register_operand")]
3272 SVE_COND_FP_UNARY_OPTAB))]
3275 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3279 ;; Predicated floating-point unary operations.
3280 (define_insn "@aarch64_pred_<optab><mode>"
3281 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3283 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3284 (match_operand:SI 3 "aarch64_sve_gp_strictness")
3285 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
3286 SVE_COND_FP_UNARY))]
3288 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3291 ;; Predicated floating-point unary arithmetic with merging.
3292 (define_expand "@cond_<optab><mode>"
3293 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3295 [(match_operand:<VPRED> 1 "register_operand")
3298 (const_int SVE_STRICT_GP)
3299 (match_operand:SVE_FULL_F 2 "register_operand")]
3301 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3306 ;; Predicated floating-point unary arithmetic, merging with the first input.
3307 (define_insn_and_rewrite "*cond_<optab><mode>_2"
3308 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3310 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3313 (match_operand:SI 4 "aarch64_sve_gp_strictness")
3314 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3318 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])"
3320 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3321 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3322 "&& !rtx_equal_p (operands[1], operands[3])"
3324 operands[3] = copy_rtx (operands[1]);
3326 [(set_attr "movprfx" "*,yes")]
3329 ;; Predicated floating-point unary arithmetic, merging with an independent
3332 ;; The earlyclobber isn't needed for the first alternative, but omitting
3333 ;; it would only help the case in which operands 2 and 3 are the same,
3334 ;; which is handled above rather than here. Marking all the alternatives
3335 ;; as earlyclobber helps to make the instruction more regular to the
3336 ;; register allocator.
3337 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3338 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
3340 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3343 (match_operand:SI 5 "aarch64_sve_gp_strictness")
3344 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
3346 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3349 && !rtx_equal_p (operands[2], operands[3])
3350 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
3352 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3353 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3354 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3355 "&& !rtx_equal_p (operands[1], operands[4])"
3357 operands[4] = copy_rtx (operands[1]);
3359 [(set_attr "movprfx" "*,yes,yes")]
3362 ;; -------------------------------------------------------------------------
3363 ;; ---- [FP] Square root
3364 ;; -------------------------------------------------------------------------
3366 (define_expand "sqrt<mode>2"
3367 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3370 (const_int SVE_RELAXED_GP)
3371 (match_operand:SVE_FULL_F 1 "register_operand")]
3372 UNSPEC_COND_FSQRT))]
3375 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
3377 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3380 ;; -------------------------------------------------------------------------
3381 ;; ---- [FP] Reciprocal square root
3382 ;; -------------------------------------------------------------------------
3384 (define_expand "rsqrt<mode>2"
3385 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3386 (unspec:SVE_FULL_SDF
3387 [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3391 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
3395 (define_expand "@aarch64_rsqrte<mode>"
3396 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3397 (unspec:SVE_FULL_SDF
3398 [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3403 (define_expand "@aarch64_rsqrts<mode>"
3404 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3405 (unspec:SVE_FULL_SDF
3406 [(match_operand:SVE_FULL_SDF 1 "register_operand")
3407 (match_operand:SVE_FULL_SDF 2 "register_operand")]
3412 ;; -------------------------------------------------------------------------
3413 ;; ---- [PRED] Inverse
3414 ;; -------------------------------------------------------------------------
3417 ;; -------------------------------------------------------------------------
3419 ;; Unpredicated predicate inverse.
3420 (define_expand "one_cmpl<mode>2"
3421 [(set (match_operand:PRED_ALL 0 "register_operand")
3423 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
3427 operands[2] = aarch64_ptrue_reg (<MODE>mode);
3431 ;; Predicated predicate inverse.
3432 (define_insn "*one_cmpl<mode>3"
3433 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3435 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3436 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3438 "not\t%0.b, %1/z, %2.b"
3441 ;; =========================================================================
3442 ;; == Binary arithmetic
3443 ;; =========================================================================
3445 ;; -------------------------------------------------------------------------
3446 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
3447 ;; -------------------------------------------------------------------------
3449 ;; - ADD (merging form only)
3450 ;; - AND (merging form only)
3451 ;; - ASR (merging form only)
3452 ;; - EOR (merging form only)
3453 ;; - LSL (merging form only)
3454 ;; - LSR (merging form only)
3456 ;; - ORR (merging form only)
3459 ;; - SQADD (SVE2 merging form only)
3460 ;; - SQSUB (SVE2 merging form only)
3461 ;; - SUB (merging form only)
3464 ;; - UQADD (SVE2 merging form only)
3465 ;; - UQSUB (SVE2 merging form only)
3466 ;; -------------------------------------------------------------------------
3468 ;; Unpredicated integer binary operations that have an immediate form.
3469 (define_expand "<optab><mode>3"
3470 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3473 (SVE_INT_BINARY_IMM:SVE_FULL_I
3474 (match_operand:SVE_FULL_I 1 "register_operand")
3475 (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
3479 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3483 ;; Integer binary operations that have an immediate form, predicated
3484 ;; with a PTRUE. We don't actually need the predicate for the first
3485 ;; and third alternatives, but using Upa or X isn't likely to gain much
3486 ;; and would make the instruction seem less uniform to the register
3488 (define_insn_and_split "@aarch64_pred_<optab><mode>"
3489 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
3491 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
3492 (SVE_INT_BINARY_IMM:SVE_FULL_I
3493 (match_operand:SVE_FULL_I 2 "register_operand" "%0, 0, w, w")
3494 (match_operand:SVE_FULL_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
3499 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3501 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3502 ; Split the unpredicated form after reload, so that we don't have
3503 ; the unnecessary PTRUE.
3504 "&& reload_completed
3505 && !register_operand (operands[3], <MODE>mode)"
3507 (SVE_INT_BINARY_IMM:SVE_FULL_I (match_dup 2) (match_dup 3)))]
3509 [(set_attr "movprfx" "*,*,yes,yes")]
3512 ;; Unpredicated binary operations with a constant (post-RA only).
3513 ;; These are generated by splitting a predicated instruction whose
3514 ;; predicate is unused.
3515 (define_insn "*post_ra_<optab><mode>3"
3516 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3517 (SVE_INT_BINARY_IMM:SVE_FULL_I
3518 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")
3519 (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
3520 "TARGET_SVE && reload_completed"
3522 <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
3523 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
3524 [(set_attr "movprfx" "*,yes")]
3527 ;; Predicated integer operations with merging.
3528 (define_expand "@cond_<optab><mode>"
3529 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3531 [(match_operand:<VPRED> 1 "register_operand")
3532 (SVE_INT_BINARY:SVE_FULL_I
3533 (match_operand:SVE_FULL_I 2 "register_operand")
3534 (match_operand:SVE_FULL_I 3 "<sve_pred_int_rhs2_operand>"))
3535 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3540 ;; Predicated integer operations, merging with the first input.
3541 (define_insn "*cond_<optab><mode>_2"
3542 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3544 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3545 (SVE_INT_BINARY:SVE_FULL_I
3546 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3547 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
3552 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3553 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3554 [(set_attr "movprfx" "*,yes")]
3557 ;; Predicated integer operations, merging with the second input.
3558 (define_insn "*cond_<optab><mode>_3"
3559 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3561 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3562 (SVE_INT_BINARY:SVE_FULL_I
3563 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
3564 (match_operand:SVE_FULL_I 3 "register_operand" "0, w"))
3569 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3570 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
3571 [(set_attr "movprfx" "*,yes")]
3574 ;; Predicated integer operations, merging with an independent value.
3575 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3576 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3578 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3579 (SVE_INT_BINARY:SVE_FULL_I
3580 (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
3581 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))
3582 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3585 && !rtx_equal_p (operands[2], operands[4])
3586 && !rtx_equal_p (operands[3], operands[4])"
3588 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3589 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3590 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3591 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3593 "&& reload_completed
3594 && register_operand (operands[4], <MODE>mode)
3595 && !rtx_equal_p (operands[0], operands[4])"
3597 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3598 operands[4], operands[1]));
3599 operands[4] = operands[2] = operands[0];
3601 [(set_attr "movprfx" "yes")]
3604 ;; -------------------------------------------------------------------------
3605 ;; ---- [INT] Addition
3606 ;; -------------------------------------------------------------------------
3618 ;; -------------------------------------------------------------------------
3620 (define_insn "add<mode>3"
3621 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w")
3623 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w")
3624 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))]
3627 add\t%0.<Vetype>, %0.<Vetype>, #%D2
3628 sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3629 * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]);
3630 movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
3631 movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3632 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3633 [(set_attr "movprfx" "*,*,*,yes,yes,*")]
3636 ;; Merging forms are handled through SVE_INT_BINARY.
3638 ;; -------------------------------------------------------------------------
3639 ;; ---- [INT] Subtraction
3640 ;; -------------------------------------------------------------------------
3644 ;; -------------------------------------------------------------------------
3646 (define_insn "sub<mode>3"
3647 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
3649 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa")
3650 (match_operand:SVE_I 2 "register_operand" "w, 0, w")))]
3653 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
3654 subr\t%0.<Vetype>, %0.<Vetype>, #%D1
3655 movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
3656 [(set_attr "movprfx" "*,*,yes")]
3659 ;; Merging forms are handled through SVE_INT_BINARY.
3661 ;; -------------------------------------------------------------------------
3662 ;; ---- [INT] Take address
3663 ;; -------------------------------------------------------------------------
3666 ;; -------------------------------------------------------------------------
3668 ;; An unshifted and unscaled ADR. This is functionally equivalent to an ADD,
3669 ;; but the svadrb intrinsics should preserve the user's choice.
3670 (define_insn "@aarch64_adr<mode>"
3671 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3672 (unspec:SVE_FULL_SDI
3673 [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
3674 (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
3677 "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
3680 ;; Same, but with the offset being sign-extended from the low 32 bits.
3681 (define_insn_and_rewrite "*aarch64_adr_sxtw"
3682 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3684 [(match_operand:VNx2DI 1 "register_operand" "w")
3689 (match_operand:VNx2DI 2 "register_operand" "w")))]
3693 "adr\t%0.d, [%1.d, %2.d, sxtw]"
3694 "&& !CONSTANT_P (operands[3])"
3696 operands[3] = CONSTM1_RTX (VNx2BImode);
3700 ;; Same, but with the offset being zero-extended from the low 32 bits.
3701 (define_insn "*aarch64_adr_uxtw_unspec"
3702 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3704 [(match_operand:VNx2DI 1 "register_operand" "w")
3706 (match_operand:VNx2DI 2 "register_operand" "w")
3707 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
3710 "adr\t%0.d, [%1.d, %2.d, uxtw]"
3713 ;; Same, matching as a PLUS rather than unspec.
3714 (define_insn "*aarch64_adr_uxtw_and"
3715 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3718 (match_operand:VNx2DI 2 "register_operand" "w")
3719 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
3720 (match_operand:VNx2DI 1 "register_operand" "w")))]
3722 "adr\t%0.d, [%1.d, %2.d, uxtw]"
3725 ;; ADR with a nonzero shift.
3726 (define_expand "@aarch64_adr<mode>_shift"
3727 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
3729 (unspec:SVE_FULL_SDI
3731 (ashift:SVE_FULL_SDI
3732 (match_operand:SVE_FULL_SDI 2 "register_operand")
3733 (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3735 (match_operand:SVE_FULL_SDI 1 "register_operand")))]
3738 operands[4] = CONSTM1_RTX (<VPRED>mode);
3742 (define_insn_and_rewrite "*aarch64_adr<mode>_shift"
3743 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3745 (unspec:SVE_FULL_SDI
3747 (ashift:SVE_FULL_SDI
3748 (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
3749 (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3751 (match_operand:SVE_FULL_SDI 1 "register_operand" "w")))]
3753 "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>, lsl %3]"
3754 "&& !CONSTANT_P (operands[4])"
3756 operands[4] = CONSTM1_RTX (<VPRED>mode);
3760 ;; Same, but with the index being sign-extended from the low 32 bits.
3761 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
3762 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3771 (match_operand:VNx2DI 2 "register_operand" "w")))]
3773 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3775 (match_operand:VNx2DI 1 "register_operand" "w")))]
3777 "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
3778 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3780 operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
3784 ;; Same, but with the index being zero-extended from the low 32 bits.
3785 (define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
3786 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3792 (match_operand:VNx2DI 2 "register_operand" "w")
3793 (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
3794 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3796 (match_operand:VNx2DI 1 "register_operand" "w")))]
3798 "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
3799 "&& !CONSTANT_P (operands[5])"
3801 operands[5] = CONSTM1_RTX (VNx2BImode);
3805 ;; -------------------------------------------------------------------------
3806 ;; ---- [INT] Absolute difference
3807 ;; -------------------------------------------------------------------------
3811 ;; -------------------------------------------------------------------------
3813 ;; Unpredicated integer absolute difference.
3814 (define_expand "<su>abd<mode>_3"
3815 [(use (match_operand:SVE_FULL_I 0 "register_operand"))
3817 (match_operand:SVE_FULL_I 1 "register_operand")
3818 (match_operand:SVE_FULL_I 2 "register_operand"))]
3821 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
3822 emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
3828 ;; Predicated integer absolute difference.
3829 (define_insn "@aarch64_pred_<su>abd<mode>"
3830 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3832 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3835 (match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
3836 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
3837 (<max_opp>:SVE_FULL_I
3843 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3844 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3845 [(set_attr "movprfx" "*,yes")]
3848 (define_expand "@aarch64_cond_<su>abd<mode>"
3849 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3851 [(match_operand:<VPRED> 1 "register_operand")
3856 (match_operand:SVE_FULL_I 2 "register_operand")
3857 (match_operand:SVE_FULL_I 3 "register_operand"))]
3861 (<max_opp>:SVE_FULL_I
3865 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3869 if (rtx_equal_p (operands[3], operands[4]))
3870 std::swap (operands[2], operands[3]);
3873 ;; Predicated integer absolute difference, merging with the first input.
3874 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
3875 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3877 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3882 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3883 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
3887 (<max_opp>:SVE_FULL_I
3895 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3896 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3897 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3899 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
3901 [(set_attr "movprfx" "*,yes")]
3904 ;; Predicated integer absolute difference, merging with an independent value.
3905 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
3906 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3908 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3913 (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
3914 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))]
3918 (<max_opp>:SVE_FULL_I
3922 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3925 && !rtx_equal_p (operands[2], operands[4])
3926 && !rtx_equal_p (operands[3], operands[4])"
3928 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3929 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3930 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3931 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3935 if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
3936 operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
3937 else if (reload_completed
3938 && register_operand (operands[4], <MODE>mode)
3939 && !rtx_equal_p (operands[0], operands[4]))
3941 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3942 operands[4], operands[1]));
3943 operands[4] = operands[2] = operands[0];
3948 [(set_attr "movprfx" "yes")]
3951 ;; -------------------------------------------------------------------------
3952 ;; ---- [INT] Saturating addition and subtraction
3953 ;; -------------------------------------------------------------------------
3958 ;; -------------------------------------------------------------------------
3960 ;; Unpredicated saturating signed addition and subtraction.
3961 (define_insn "@aarch64_sve_<optab><mode>"
3962 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w")
3963 (SBINQOPS:SVE_FULL_I
3964 (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w")
3965 (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))]
3968 <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3969 <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
3970 movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3971 movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
3972 <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3973 [(set_attr "movprfx" "*,*,yes,yes,*")]
3976 ;; Unpredicated saturating unsigned addition and subtraction.
3977 (define_insn "@aarch64_sve_<optab><mode>"
3978 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w")
3979 (UBINQOPS:SVE_FULL_I
3980 (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w")
3981 (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))]
3984 <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3985 movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3986 <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3987 [(set_attr "movprfx" "*,yes,*")]
3990 ;; -------------------------------------------------------------------------
3991 ;; ---- [INT] Highpart multiplication
3992 ;; -------------------------------------------------------------------------
3996 ;; -------------------------------------------------------------------------
3998 ;; Unpredicated highpart multiplication.
3999 (define_expand "<su>mul<mode>3_highpart"
4000 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4004 [(match_operand:SVE_FULL_I 1 "register_operand")
4005 (match_operand:SVE_FULL_I 2 "register_operand")]
4010 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4014 ;; Predicated highpart multiplication.
4015 (define_insn "@aarch64_pred_<optab><mode>"
4016 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4018 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4020 [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
4021 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4026 <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4027 movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4028 [(set_attr "movprfx" "*,yes")]
4031 ;; Predicated highpart multiplications with merging.
4032 (define_expand "@cond_<optab><mode>"
4033 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4035 [(match_operand:<VPRED> 1 "register_operand")
4037 [(match_operand:SVE_FULL_I 2 "register_operand")
4038 (match_operand:SVE_FULL_I 3 "register_operand")]
4040 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4044 /* Only target code is aware of these operations, so we don't need
4045 to handle the fully-general case. */
4046 gcc_assert (rtx_equal_p (operands[2], operands[4])
4047 || CONSTANT_P (operands[4]));
4050 ;; Predicated highpart multiplications, merging with the first input.
4051 (define_insn "*cond_<optab><mode>_2"
4052 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4054 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4056 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4057 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4063 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4064 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4065 [(set_attr "movprfx" "*,yes")])
4067 ;; Predicated highpart multiplications, merging with zero.
4068 (define_insn "*cond_<optab><mode>_z"
4069 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
4071 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4073 [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
4074 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4076 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4080 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4081 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4082 [(set_attr "movprfx" "yes")])
4084 ;; -------------------------------------------------------------------------
4085 ;; ---- [INT] Division
4086 ;; -------------------------------------------------------------------------
4092 ;; -------------------------------------------------------------------------
4094 ;; Unpredicated integer division.
4095 (define_expand "<optab><mode>3"
4096 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4097 (unspec:SVE_FULL_SDI
4099 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4100 (match_operand:SVE_FULL_SDI 1 "register_operand")
4101 (match_operand:SVE_FULL_SDI 2 "register_operand"))]
4105 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4109 ;; Integer division predicated with a PTRUE.
4110 (define_insn "@aarch64_pred_<optab><mode>"
4111 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w")
4112 (unspec:SVE_FULL_SDI
4113 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4114 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4115 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w")
4116 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))]
4120 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4121 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4122 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4123 [(set_attr "movprfx" "*,*,yes")]
4126 ;; Predicated integer division with merging.
4127 (define_expand "@cond_<optab><mode>"
4128 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4129 (unspec:SVE_FULL_SDI
4130 [(match_operand:<VPRED> 1 "register_operand")
4131 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4132 (match_operand:SVE_FULL_SDI 2 "register_operand")
4133 (match_operand:SVE_FULL_SDI 3 "register_operand"))
4134 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4139 ;; Predicated integer division, merging with the first input.
4140 (define_insn "*cond_<optab><mode>_2"
4141 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4142 (unspec:SVE_FULL_SDI
4143 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4144 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4145 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w")
4146 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))
4151 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4152 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4153 [(set_attr "movprfx" "*,yes")]
4156 ;; Predicated integer division, merging with the second input.
4157 (define_insn "*cond_<optab><mode>_3"
4158 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4159 (unspec:SVE_FULL_SDI
4160 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4161 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4162 (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w")
4163 (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w"))
4168 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4169 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4170 [(set_attr "movprfx" "*,yes")]
4173 ;; Predicated integer division, merging with an independent value.
4174 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4175 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4176 (unspec:SVE_FULL_SDI
4177 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4178 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4179 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w")
4180 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w"))
4181 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4184 && !rtx_equal_p (operands[2], operands[4])
4185 && !rtx_equal_p (operands[3], operands[4])"
4187 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4188 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4189 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4190 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4192 "&& reload_completed
4193 && register_operand (operands[4], <MODE>mode)
4194 && !rtx_equal_p (operands[0], operands[4])"
4196 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4197 operands[4], operands[1]));
4198 operands[4] = operands[2] = operands[0];
4200 [(set_attr "movprfx" "yes")]
4203 ;; -------------------------------------------------------------------------
4204 ;; ---- [INT] Binary logical operations
4205 ;; -------------------------------------------------------------------------
4210 ;; -------------------------------------------------------------------------
4212 ;; Unpredicated integer binary logical operations.
4213 (define_insn "<optab><mode>3"
4214 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?w, w")
4216 (match_operand:SVE_I 1 "register_operand" "%0, w, w")
4217 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))]
4220 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4221 movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4222 <logical>\t%0.d, %1.d, %2.d"
4223 [(set_attr "movprfx" "*,yes,*")]
4226 ;; Merging forms are handled through SVE_INT_BINARY.
4228 ;; -------------------------------------------------------------------------
4229 ;; ---- [INT] Binary logical operations (inverted second input)
4230 ;; -------------------------------------------------------------------------
4233 ;; -------------------------------------------------------------------------
4235 ;; Unpredicated BIC.
4236 (define_expand "@aarch64_bic<mode>"
4237 [(set (match_operand:SVE_I 0 "register_operand")
4241 (not:SVE_I (match_operand:SVE_I 2 "register_operand"))]
4243 (match_operand:SVE_I 1 "register_operand")))]
4246 operands[3] = CONSTM1_RTX (<VPRED>mode);
4251 (define_insn_and_rewrite "*bic<mode>3"
4252 [(set (match_operand:SVE_I 0 "register_operand" "=w")
4257 (match_operand:SVE_I 2 "register_operand" "w"))]
4259 (match_operand:SVE_I 1 "register_operand" "w")))]
4261 "bic\t%0.d, %1.d, %2.d"
4262 "&& !CONSTANT_P (operands[3])"
4264 operands[3] = CONSTM1_RTX (<VPRED>mode);
4268 ;; Predicated BIC with merging.
4269 (define_expand "@cond_bic<mode>"
4270 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4272 [(match_operand:<VPRED> 1 "register_operand")
4274 (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
4275 (match_operand:SVE_FULL_I 2 "register_operand"))
4276 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4281 ;; Predicated integer BIC, merging with the first input.
4282 (define_insn "*cond_bic<mode>_2"
4283 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4285 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4288 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
4289 (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
4294 bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4295 movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4296 [(set_attr "movprfx" "*,yes")]
4299 ;; Predicated integer BIC, merging with an independent value.
4300 (define_insn_and_rewrite "*cond_bic<mode>_any"
4301 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, ?&w")
4303 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4306 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, w"))
4307 (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w"))
4308 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4310 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4312 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4313 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4314 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4316 "&& reload_completed
4317 && register_operand (operands[4], <MODE>mode)
4318 && !rtx_equal_p (operands[0], operands[4])"
4320 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4321 operands[4], operands[1]));
4322 operands[4] = operands[2] = operands[0];
4324 [(set_attr "movprfx" "yes")]
4327 ;; -------------------------------------------------------------------------
4328 ;; ---- [INT] Shifts (rounding towards -Inf)
4329 ;; -------------------------------------------------------------------------
4337 ;; -------------------------------------------------------------------------
4339 ;; Unpredicated shift by a scalar, which expands into one of the vector
4341 (define_expand "<ASHIFT:optab><mode>3"
4342 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4344 (match_operand:SVE_FULL_I 1 "register_operand")
4345 (match_operand:<VEL> 2 "general_operand")))]
4349 if (CONST_INT_P (operands[2]))
4351 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
4352 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
4353 amount = force_reg (<MODE>mode, amount);
4357 amount = gen_reg_rtx (<MODE>mode);
4358 emit_insn (gen_vec_duplicate<mode> (amount,
4359 convert_to_mode (<VEL>mode,
4362 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
4367 ;; Unpredicated shift by a vector.
4368 (define_expand "v<optab><mode>3"
4369 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4373 (match_operand:SVE_FULL_I 1 "register_operand")
4374 (match_operand:SVE_FULL_I 2 "aarch64_sve_<lr>shift_operand"))]
4378 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4382 ;; Shift by a vector, predicated with a PTRUE. We don't actually need
4383 ;; the predicate for the first alternative, but using Upa or X isn't
4384 ;; likely to gain much and would make the instruction seem less uniform
4385 ;; to the register allocator.
4386 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4387 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
4389 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4391 (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w")
4392 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
4397 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4398 <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
4399 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4400 "&& reload_completed
4401 && !register_operand (operands[3], <MODE>mode)"
4402 [(set (match_dup 0) (ASHIFT:SVE_FULL_I (match_dup 2) (match_dup 3)))]
4404 [(set_attr "movprfx" "*,*,*,yes")]
4407 ;; Unpredicated shift operations by a constant (post-RA only).
4408 ;; These are generated by splitting a predicated instruction whose
4409 ;; predicate is unused.
4410 (define_insn "*post_ra_v<optab><mode>3"
4411 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4413 (match_operand:SVE_FULL_I 1 "register_operand" "w")
4414 (match_operand:SVE_FULL_I 2 "aarch64_simd_<lr>shift_imm")))]
4415 "TARGET_SVE && reload_completed"
4416 "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
4419 ;; Predicated integer shift, merging with the first input.
4420 (define_insn "*cond_<optab><mode>_2_const"
4421 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4423 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4425 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4426 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm"))
4431 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4432 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4433 [(set_attr "movprfx" "*,yes")]
4436 ;; Predicated integer shift, merging with an independent value.
4437 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4438 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, &w, ?&w")
4440 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4442 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
4443 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm"))
4444 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4446 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4448 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4449 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4451 "&& reload_completed
4452 && register_operand (operands[4], <MODE>mode)
4453 && !rtx_equal_p (operands[0], operands[4])"
4455 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4456 operands[4], operands[1]));
4457 operands[4] = operands[2] = operands[0];
4459 [(set_attr "movprfx" "yes")]
4462 ;; Unpredicated shifts of narrow elements by 64-bit amounts.
4463 (define_insn "@aarch64_sve_<sve_int_op><mode>"
4464 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
4465 (unspec:SVE_FULL_BHSI
4466 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
4467 (match_operand:VNx2DI 2 "register_operand" "w")]
4470 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
4473 ;; Merging predicated shifts of narrow elements by 64-bit amounts.
4474 (define_expand "@cond_<sve_int_op><mode>"
4475 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4476 (unspec:SVE_FULL_BHSI
4477 [(match_operand:<VPRED> 1 "register_operand")
4478 (unspec:SVE_FULL_BHSI
4479 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4480 (match_operand:VNx2DI 3 "register_operand")]
4482 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
4487 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with
4489 (define_insn "*cond_<sve_int_op><mode>_m"
4490 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w")
4491 (unspec:SVE_FULL_BHSI
4492 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4493 (unspec:SVE_FULL_BHSI
4494 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4495 (match_operand:VNx2DI 3 "register_operand" "w, w")]
4501 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4502 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4503 [(set_attr "movprfx" "*, yes")])
4505 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
4506 (define_insn "*cond_<sve_int_op><mode>_z"
4507 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w")
4508 (unspec:SVE_FULL_BHSI
4509 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4510 (unspec:SVE_FULL_BHSI
4511 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4512 (match_operand:VNx2DI 3 "register_operand" "w, w")]
4514 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
4518 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4519 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4520 [(set_attr "movprfx" "yes")])
4522 ;; -------------------------------------------------------------------------
4523 ;; ---- [INT] Shifts (rounding towards 0)
4524 ;; -------------------------------------------------------------------------
4530 ;; -------------------------------------------------------------------------
4532 ;; Unpredicated <SVE_INT_OP>.
4533 (define_expand "sdiv_pow2<mode>3"
4534 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4538 [(match_operand:SVE_FULL_I 1 "register_operand")
4539 (match_operand 2 "aarch64_simd_rshift_imm")]
4545 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4549 ;; Predicated right shift with merging.
4550 (define_expand "@cond_<sve_int_op><mode>"
4551 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4553 [(match_operand:<VPRED> 1 "register_operand")
4555 [(match_operand:SVE_FULL_I 2 "register_operand")
4556 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4558 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4563 ;; Predicated right shift, merging with the first input.
4564 (define_insn "*cond_<sve_int_op><mode>_2"
4565 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4567 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4569 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4570 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4576 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4577 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4578 [(set_attr "movprfx" "*,yes")])
4580 ;; Predicated right shift, merging with zero.
4581 (define_insn "*cond_<sve_int_op><mode>_z"
4582 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4584 [(match_operand:<VPRED> 1 "register_operand" "Upl")
4586 [(match_operand:SVE_FULL_I 2 "register_operand" "w")
4587 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4589 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4592 "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4593 [(set_attr "movprfx" "yes")])
4595 ;; -------------------------------------------------------------------------
4596 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
4597 ;; -------------------------------------------------------------------------
4602 ;; -------------------------------------------------------------------------
4604 ;; Unpredicated floating-point binary operations that take an integer as
4605 ;; their second operand.
4606 (define_insn "@aarch64_sve_<optab><mode>"
4607 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4609 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4610 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
4611 SVE_FP_BINARY_INT))]
4613 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4616 ;; Predicated floating-point binary operations that take an integer
4617 ;; as their second operand.
4618 (define_insn "@aarch64_pred_<optab><mode>"
4619 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4621 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4622 (match_operand:SI 4 "aarch64_sve_gp_strictness")
4623 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4624 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4625 SVE_COND_FP_BINARY_INT))]
4628 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4629 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4630 [(set_attr "movprfx" "*,yes")]
4633 ;; Predicated floating-point binary operations with merging, taking an
4634 ;; integer as their second operand.
4635 (define_expand "@cond_<optab><mode>"
4636 [(set (match_operand:SVE_FULL_F 0 "register_operand")
4638 [(match_operand:<VPRED> 1 "register_operand")
4641 (const_int SVE_STRICT_GP)
4642 (match_operand:SVE_FULL_F 2 "register_operand")
4643 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
4644 SVE_COND_FP_BINARY_INT)
4645 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4650 ;; Predicated floating-point binary operations that take an integer as their
4651 ;; second operand, with inactive lanes coming from the first operand.
4652 (define_insn_and_rewrite "*cond_<optab><mode>_2"
4653 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4655 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4658 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4659 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4660 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4661 SVE_COND_FP_BINARY_INT)
4664 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4666 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4667 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4668 "&& !rtx_equal_p (operands[1], operands[4])"
4670 operands[4] = copy_rtx (operands[1]);
4672 [(set_attr "movprfx" "*,yes")]
4675 ;; Predicated floating-point binary operations that take an integer as
4676 ;; their second operand, with the values of inactive lanes being distinct
4677 ;; from the other inputs.
4678 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4679 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
4681 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4684 (match_operand:SI 6 "aarch64_sve_gp_strictness")
4685 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
4686 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
4687 SVE_COND_FP_BINARY_INT)
4688 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4691 && !rtx_equal_p (operands[2], operands[4])
4692 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4694 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4695 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4696 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4700 if (reload_completed
4701 && register_operand (operands[4], <MODE>mode)
4702 && !rtx_equal_p (operands[0], operands[4]))
4704 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4705 operands[4], operands[1]));
4706 operands[4] = operands[2] = operands[0];
4708 else if (!rtx_equal_p (operands[1], operands[5]))
4709 operands[5] = copy_rtx (operands[1]);
4713 [(set_attr "movprfx" "yes")]
4716 ;; -------------------------------------------------------------------------
4717 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
4718 ;; -------------------------------------------------------------------------
4719 ;; Includes post-RA forms of:
4723 ;; -------------------------------------------------------------------------
4725 ;; Unpredicated floating-point binary operations (post-RA only).
4726 ;; These are generated by splitting a predicated instruction whose
4727 ;; predicate is unused.
4728 (define_insn "*post_ra_<sve_fp_op><mode>3"
4729 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4730 (SVE_UNPRED_FP_BINARY:SVE_FULL_F
4731 (match_operand:SVE_FULL_F 1 "register_operand" "w")
4732 (match_operand:SVE_FULL_F 2 "register_operand" "w")))]
4733 "TARGET_SVE && reload_completed"
4734 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
4736 ;; -------------------------------------------------------------------------
4737 ;; ---- [FP] General binary arithmetic corresponding to unspecs
4738 ;; -------------------------------------------------------------------------
4739 ;; Includes merging forms of:
4740 ;; - FADD (constant forms handled in the "Addition" section)
4744 ;; - FMAXNM (including #0.0 and #1.0)
4746 ;; - FMINNM (including #0.0 and #1.0)
4747 ;; - FMUL (including #0.5 and #2.0)
4751 ;; - FSUB (constant forms handled in the "Addition" section)
4752 ;; - FSUBR (constant forms handled in the "Subtraction" section)
4753 ;; -------------------------------------------------------------------------
4755 ;; Unpredicated floating-point binary operations.
4756 (define_insn "@aarch64_sve_<optab><mode>"
4757 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4759 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4760 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
4763 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4766 ;; Unpredicated floating-point binary operations that need to be predicated
4768 (define_expand "<optab><mode>3"
4769 [(set (match_operand:SVE_FULL_F 0 "register_operand")
4772 (const_int SVE_RELAXED_GP)
4773 (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
4774 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
4775 SVE_COND_FP_BINARY_OPTAB))]
4778 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4782 ;; Predicated floating-point binary operations that have no immediate forms.
4783 (define_insn "@aarch64_pred_<optab><mode>"
4784 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
4786 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4787 (match_operand:SI 4 "aarch64_sve_gp_strictness")
4788 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w")
4789 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")]
4790 SVE_COND_FP_BINARY_REG))]
4793 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4794 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4795 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4796 [(set_attr "movprfx" "*,*,yes")]
4799 ;; Predicated floating-point operations with merging.
4800 (define_expand "@cond_<optab><mode>"
4801 [(set (match_operand:SVE_FULL_F 0 "register_operand")
4803 [(match_operand:<VPRED> 1 "register_operand")
4806 (const_int SVE_STRICT_GP)
4807 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>")
4808 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")]
4810 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4815 ;; Predicated floating-point operations, merging with the first input.
4816 (define_insn_and_rewrite "*cond_<optab><mode>_2"
4817 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4819 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4822 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4823 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4824 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
4828 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4830 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4831 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4832 "&& !rtx_equal_p (operands[1], operands[4])"
4834 operands[4] = copy_rtx (operands[1]);
4836 [(set_attr "movprfx" "*,yes")]
4839 ;; Same for operations that take a 1-bit constant.
4840 (define_insn_and_rewrite "*cond_<optab><mode>_2_const"
4841 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
4843 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4846 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4847 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4848 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
4849 SVE_COND_FP_BINARY_I1)
4852 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4854 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4855 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4856 "&& !rtx_equal_p (operands[1], operands[4])"
4858 operands[4] = copy_rtx (operands[1]);
4860 [(set_attr "movprfx" "*,yes")]
4863 ;; Predicated floating-point operations, merging with the second input.
4864 (define_insn_and_rewrite "*cond_<optab><mode>_3"
4865 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4867 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4870 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4871 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
4872 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
4876 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4878 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4879 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4880 "&& !rtx_equal_p (operands[1], operands[4])"
4882 operands[4] = copy_rtx (operands[1]);
4884 [(set_attr "movprfx" "*,yes")]
4887 ;; Predicated floating-point operations, merging with an independent value.
4888 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4889 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4891 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4894 (match_operand:SI 6 "aarch64_sve_gp_strictness")
4895 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
4896 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
4898 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4901 && !rtx_equal_p (operands[2], operands[4])
4902 && !rtx_equal_p (operands[3], operands[4])
4903 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4905 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4906 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4907 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4908 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4912 if (reload_completed
4913 && register_operand (operands[4], <MODE>mode)
4914 && !rtx_equal_p (operands[0], operands[4]))
4916 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4917 operands[4], operands[1]));
4918 operands[4] = operands[2] = operands[0];
4920 else if (!rtx_equal_p (operands[1], operands[5]))
4921 operands[5] = copy_rtx (operands[1]);
4925 [(set_attr "movprfx" "yes")]
4928 ;; Same for operations that take a 1-bit constant.
4929 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4930 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
4932 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4935 (match_operand:SI 6 "aarch64_sve_gp_strictness")
4936 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
4937 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
4938 SVE_COND_FP_BINARY_I1)
4939 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4942 && !rtx_equal_p (operands[2], operands[4])
4943 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4945 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4946 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4950 if (reload_completed
4951 && register_operand (operands[4], <MODE>mode)
4952 && !rtx_equal_p (operands[0], operands[4]))
4954 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4955 operands[4], operands[1]));
4956 operands[4] = operands[2] = operands[0];
4958 else if (!rtx_equal_p (operands[1], operands[5]))
4959 operands[5] = copy_rtx (operands[1]);
4963 [(set_attr "movprfx" "yes")]
4966 ;; -------------------------------------------------------------------------
4967 ;; ---- [FP] Addition
4968 ;; -------------------------------------------------------------------------
4972 ;; -------------------------------------------------------------------------
4974 ;; Predicated floating-point addition.
4975 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4976 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w")
4978 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl")
4979 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1")
4980 (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w")
4981 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")]
4985 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4986 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4988 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4989 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4990 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4991 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4992 ; Split the unpredicated form after reload, so that we don't have
4993 ; the unnecessary PTRUE.
4994 "&& reload_completed
4995 && register_operand (operands[3], <MODE>mode)
4996 && INTVAL (operands[4]) == SVE_RELAXED_GP"
4997 [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
4999 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
5002 ;; Predicated floating-point addition of a constant, merging with the
5004 (define_insn_and_rewrite "*cond_add<mode>_2_const"
5005 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
5007 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5010 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5011 (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
5012 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
5016 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5018 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5019 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5020 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5021 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
5022 "&& !rtx_equal_p (operands[1], operands[4])"
5024 operands[4] = copy_rtx (operands[1]);
5026 [(set_attr "movprfx" "*,*,yes,yes")]
5029 ;; Predicated floating-point addition of a constant, merging with an
5030 ;; independent value.
5031 (define_insn_and_rewrite "*cond_add<mode>_any_const"
5032 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
5034 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5037 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5038 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
5039 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
5041 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
5044 && !rtx_equal_p (operands[2], operands[4])
5045 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5047 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5048 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5049 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5050 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5055 if (reload_completed
5056 && register_operand (operands[4], <MODE>mode)
5057 && !rtx_equal_p (operands[0], operands[4]))
5059 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5060 operands[4], operands[1]));
5061 operands[4] = operands[2] = operands[0];
5063 else if (!rtx_equal_p (operands[1], operands[5]))
5064 operands[5] = copy_rtx (operands[1]);
5068 [(set_attr "movprfx" "yes")]
5071 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5073 ;; -------------------------------------------------------------------------
5074 ;; ---- [FP] Complex addition
5075 ;; -------------------------------------------------------------------------
5078 ;; -------------------------------------------------------------------------
5080 ;; Predicated FCADD.
5081 (define_insn "@aarch64_pred_<optab><mode>"
5082 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5084 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5085 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5086 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5087 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5091 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5092 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5093 [(set_attr "movprfx" "*,yes")]
5096 ;; Predicated FCADD with merging.
5097 (define_expand "@cond_<optab><mode>"
5098 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5100 [(match_operand:<VPRED> 1 "register_operand")
5103 (const_int SVE_STRICT_GP)
5104 (match_operand:SVE_FULL_F 2 "register_operand")
5105 (match_operand:SVE_FULL_F 3 "register_operand")]
5107 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5112 ;; Predicated FCADD, merging with the first input.
5113 (define_insn_and_rewrite "*cond_<optab><mode>_2"
5114 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5116 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5119 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5120 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5121 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5125 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5127 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5128 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5129 "&& !rtx_equal_p (operands[1], operands[4])"
5131 operands[4] = copy_rtx (operands[1]);
5133 [(set_attr "movprfx" "*,yes")]
5136 ;; Predicated FCADD, merging with an independent value.
5137 (define_insn_and_rewrite "*cond_<optab><mode>_any"
5138 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
5140 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5143 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5144 (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
5145 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
5147 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
5150 && !rtx_equal_p (operands[2], operands[4])
5151 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5153 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5154 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5155 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5159 if (reload_completed
5160 && register_operand (operands[4], <MODE>mode)
5161 && !rtx_equal_p (operands[0], operands[4]))
5163 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5164 operands[4], operands[1]));
5165 operands[4] = operands[2] = operands[0];
5167 else if (!rtx_equal_p (operands[1], operands[5]))
5168 operands[5] = copy_rtx (operands[1]);
5172 [(set_attr "movprfx" "yes")]
5175 ;; -------------------------------------------------------------------------
5176 ;; ---- [FP] Subtraction
5177 ;; -------------------------------------------------------------------------
5181 ;; -------------------------------------------------------------------------
5183 ;; Predicated floating-point subtraction.
5184 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5185 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w")
5187 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5188 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1")
5189 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w")
5190 (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")]
5194 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5196 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5197 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5198 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5199 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5200 ; Split the unpredicated form after reload, so that we don't have
5201 ; the unnecessary PTRUE.
5202 "&& reload_completed
5203 && register_operand (operands[2], <MODE>mode)
5204 && INTVAL (operands[4]) == SVE_RELAXED_GP"
5205 [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5207 [(set_attr "movprfx" "*,*,*,*,yes,yes")]
5210 ;; Predicated floating-point subtraction from a constant, merging with the
5212 (define_insn_and_rewrite "*cond_sub<mode>_3_const"
5213 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5215 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5218 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5219 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5220 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5224 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5226 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5227 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
5228 "&& !rtx_equal_p (operands[1], operands[4])"
5230 operands[4] = copy_rtx (operands[1]);
5232 [(set_attr "movprfx" "*,yes")]
5235 ;; Predicated floating-point subtraction from a constant, merging with an
5236 ;; independent value.
5237 (define_insn_and_rewrite "*cond_sub<mode>_any_const"
5238 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5240 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5243 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5244 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5245 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
5247 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5250 && !rtx_equal_p (operands[3], operands[4])
5251 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5253 movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5254 movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5258 if (reload_completed
5259 && register_operand (operands[4], <MODE>mode)
5260 && !rtx_equal_p (operands[0], operands[4]))
5262 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5263 operands[4], operands[1]));
5264 operands[4] = operands[3] = operands[0];
5266 else if (!rtx_equal_p (operands[1], operands[5]))
5267 operands[5] = copy_rtx (operands[1]);
5271 [(set_attr "movprfx" "yes")]
5274 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5276 ;; -------------------------------------------------------------------------
5277 ;; ---- [FP] Absolute difference
5278 ;; -------------------------------------------------------------------------
5281 ;; -------------------------------------------------------------------------
5283 ;; Predicated floating-point absolute difference.
5284 (define_expand "@aarch64_pred_abd<mode>"
5285 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5287 [(match_operand:<VPRED> 1 "register_operand")
5288 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5292 (match_operand:SVE_FULL_F 2 "register_operand")
5293 (match_operand:SVE_FULL_F 3 "register_operand")]
5299 ;; Predicated floating-point absolute difference.
5300 (define_insn_and_rewrite "*aarch64_pred_abd<mode>"
5301 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5303 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5304 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5307 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5308 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
5309 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5312 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5314 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5315 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5316 "&& !rtx_equal_p (operands[1], operands[5])"
5318 operands[5] = copy_rtx (operands[1]);
5320 [(set_attr "movprfx" "*,yes")]
5323 (define_expand "@aarch64_cond_abd<mode>"
5324 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5326 [(match_operand:<VPRED> 1 "register_operand")
5329 (const_int SVE_STRICT_GP)
5332 (const_int SVE_STRICT_GP)
5333 (match_operand:SVE_FULL_F 2 "register_operand")
5334 (match_operand:SVE_FULL_F 3 "register_operand")]
5337 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5341 if (rtx_equal_p (operands[3], operands[4]))
5342 std::swap (operands[2], operands[3]);
5345 ;; Predicated floating-point absolute difference, merging with the first
5347 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_2"
5348 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5350 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5353 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5356 (match_operand:SI 7 "aarch64_sve_gp_strictness")
5357 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5358 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5364 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
5365 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
5367 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5368 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5369 "&& (!rtx_equal_p (operands[1], operands[4])
5370 || !rtx_equal_p (operands[1], operands[6]))"
5372 operands[4] = copy_rtx (operands[1]);
5373 operands[6] = copy_rtx (operands[1]);
5375 [(set_attr "movprfx" "*,yes")]
5378 ;; Predicated floating-point absolute difference, merging with the second
5380 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_3"
5381 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5383 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5386 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5389 (match_operand:SI 7 "aarch64_sve_gp_strictness")
5390 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5391 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5397 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
5398 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
5400 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5401 movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5402 "&& (!rtx_equal_p (operands[1], operands[4])
5403 || !rtx_equal_p (operands[1], operands[6]))"
5405 operands[4] = copy_rtx (operands[1]);
5406 operands[6] = copy_rtx (operands[1]);
5408 [(set_attr "movprfx" "*,yes")]
5411 ;; Predicated floating-point absolute difference, merging with an
5412 ;; independent value.
5413 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any"
5414 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5416 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5419 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5422 (match_operand:SI 8 "aarch64_sve_gp_strictness")
5423 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5424 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5427 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5430 && !rtx_equal_p (operands[2], operands[4])
5431 && !rtx_equal_p (operands[3], operands[4])
5432 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
5433 && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
5435 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5436 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5437 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5438 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5442 if (reload_completed
5443 && register_operand (operands[4], <MODE>mode)
5444 && !rtx_equal_p (operands[0], operands[4]))
5446 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5447 operands[4], operands[1]));
5448 operands[4] = operands[3] = operands[0];
5450 else if (!rtx_equal_p (operands[1], operands[5])
5451 || !rtx_equal_p (operands[1], operands[7]))
5453 operands[5] = copy_rtx (operands[1]);
5454 operands[7] = copy_rtx (operands[1]);
5459 [(set_attr "movprfx" "yes")]
5462 ;; -------------------------------------------------------------------------
5463 ;; ---- [FP] Multiplication
5464 ;; -------------------------------------------------------------------------
5467 ;; -------------------------------------------------------------------------
5469 ;; Predicated floating-point multiplication.
5470 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5471 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w")
5473 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5474 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1")
5475 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w")
5476 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")]
5480 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5482 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5483 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5484 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5485 ; Split the unpredicated form after reload, so that we don't have
5486 ; the unnecessary PTRUE.
5487 "&& reload_completed
5488 && register_operand (operands[3], <MODE>mode)
5489 && INTVAL (operands[4]) == SVE_RELAXED_GP"
5490 [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5492 [(set_attr "movprfx" "*,*,*,yes,yes")]
5495 ;; Merging forms are handled through SVE_COND_FP_BINARY and
5496 ;; SVE_COND_FP_BINARY_I1.
5498 ;; Unpredicated multiplication by selected lanes.
5499 (define_insn "@aarch64_mul_lane_<mode>"
5500 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5503 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>")
5504 (match_operand:SI 3 "const_int_operand")]
5505 UNSPEC_SVE_LANE_SELECT)
5506 (match_operand:SVE_FULL_F 1 "register_operand" "w")))]
5508 "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
5511 ;; -------------------------------------------------------------------------
5512 ;; ---- [FP] Division
5513 ;; -------------------------------------------------------------------------
5514 ;; The patterns in this section are synthetic.
5515 ;; -------------------------------------------------------------------------
5517 (define_expand "div<mode>3"
5518 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5521 (const_int SVE_RELAXED_GP)
5522 (match_operand:SVE_FULL_F 1 "nonmemory_operand")
5523 (match_operand:SVE_FULL_F 2 "register_operand")]
5527 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
5530 operands[1] = force_reg (<MODE>mode, operands[1]);
5531 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5535 (define_expand "@aarch64_frecpe<mode>"
5536 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5538 [(match_operand:SVE_FULL_F 1 "register_operand")]
5543 (define_expand "@aarch64_frecps<mode>"
5544 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5546 [(match_operand:SVE_FULL_F 1 "register_operand")
5547 (match_operand:SVE_FULL_F 2 "register_operand")]
5552 ;; -------------------------------------------------------------------------
5553 ;; ---- [FP] Binary logical operations
5554 ;; -------------------------------------------------------------------------
5559 ;; -------------------------------------------------------------------------
5561 ;; Binary logical operations on floating-point modes. We avoid subregs
5562 ;; by providing this, but we need to use UNSPECs since rtx logical ops
5563 ;; aren't defined for floating-point modes.
5564 (define_insn "*<optab><mode>3"
5565 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5567 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5568 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
5571 "<logicalf_op>\t%0.d, %1.d, %2.d"
5574 ;; -------------------------------------------------------------------------
5575 ;; ---- [FP] Sign copying
5576 ;; -------------------------------------------------------------------------
5577 ;; The patterns in this section are synthetic.
5578 ;; -------------------------------------------------------------------------
5580 (define_expand "copysign<mode>3"
5581 [(match_operand:SVE_FULL_F 0 "register_operand")
5582 (match_operand:SVE_FULL_F 1 "register_operand")
5583 (match_operand:SVE_FULL_F 2 "register_operand")]
5586 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
5587 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
5588 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
5589 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
5591 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
5592 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
5594 emit_insn (gen_and<v_int_equiv>3
5596 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5599 emit_insn (gen_and<v_int_equiv>3
5601 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5604 emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
5605 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
5610 (define_expand "xorsign<mode>3"
5611 [(match_operand:SVE_FULL_F 0 "register_operand")
5612 (match_operand:SVE_FULL_F 1 "register_operand")
5613 (match_operand:SVE_FULL_F 2 "register_operand")]
5616 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
5617 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
5618 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
5620 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
5621 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
5623 emit_insn (gen_and<v_int_equiv>3
5625 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5628 emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
5629 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
5634 ;; -------------------------------------------------------------------------
5635 ;; ---- [FP] Maximum and minimum
5636 ;; -------------------------------------------------------------------------
5642 ;; -------------------------------------------------------------------------
5644 ;; Unpredicated fmax/fmin (the libm functions). The optabs for the
5645 ;; smin/smax rtx codes are handled in the generic section above.
5646 (define_expand "<maxmin_uns><mode>3"
5647 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5650 (const_int SVE_RELAXED_GP)
5651 (match_operand:SVE_FULL_F 1 "register_operand")
5652 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")]
5653 SVE_COND_FP_MAXMIN_PUBLIC))]
5656 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5660 ;; Predicated floating-point maximum/minimum.
5661 (define_insn "@aarch64_pred_<optab><mode>"
5662 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w")
5664 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5665 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5666 (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w")
5667 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")]
5668 SVE_COND_FP_MAXMIN))]
5671 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5672 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5673 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5674 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5675 [(set_attr "movprfx" "*,*,yes,yes")]
5678 ;; Merging forms are handled through SVE_COND_FP_BINARY and
5679 ;; SVE_COND_FP_BINARY_I1.
5681 ;; -------------------------------------------------------------------------
5682 ;; ---- [PRED] Binary logical operations
5683 ;; -------------------------------------------------------------------------
5691 ;; -------------------------------------------------------------------------
5693 ;; Predicate AND. We can reuse one of the inputs as the GP.
5694 ;; Doubling the second operand is the preferred implementation
5695 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
5696 (define_insn "and<mode>3"
5697 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5698 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
5699 (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
5701 "and\t%0.b, %1/z, %2.b, %2.b"
5704 ;; Unpredicated predicate EOR and ORR.
5705 (define_expand "<optab><mode>3"
5706 [(set (match_operand:PRED_ALL 0 "register_operand")
5708 (LOGICAL_OR:PRED_ALL
5709 (match_operand:PRED_ALL 1 "register_operand")
5710 (match_operand:PRED_ALL 2 "register_operand"))
5714 operands[3] = aarch64_ptrue_reg (<MODE>mode);
5718 ;; Predicated predicate AND, EOR and ORR.
5719 (define_insn "@aarch64_pred_<optab><mode>_z"
5720 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5723 (match_operand:PRED_ALL 2 "register_operand" "Upa")
5724 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5725 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5727 "<logical>\t%0.b, %1/z, %2.b, %3.b"
5730 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
5731 ;; the GP. Store the result in operand 0 and set the flags in the same
5732 ;; way as for PTEST.
5733 (define_insn "*<optab><mode>3_cc"
5734 [(set (reg:CC_NZC CC_REGNUM)
5736 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5738 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5741 (match_operand:PRED_ALL 2 "register_operand" "Upa")
5742 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5745 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5746 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
5749 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
5752 ;; Same with just the flags result.
5753 (define_insn "*<optab><mode>3_ptest"
5754 [(set (reg:CC_NZC CC_REGNUM)
5756 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5758 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5761 (match_operand:PRED_ALL 2 "register_operand" "Upa")
5762 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5765 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5767 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
5770 ;; -------------------------------------------------------------------------
5771 ;; ---- [PRED] Binary logical operations (inverted second input)
5772 ;; -------------------------------------------------------------------------
5776 ;; -------------------------------------------------------------------------
5778 ;; Predicated predicate BIC and ORN.
5779 (define_insn "aarch64_pred_<nlogical><mode>_z"
5780 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5783 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5784 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5785 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5787 "<nlogical>\t%0.b, %1/z, %2.b, %3.b"
5790 ;; Same, but set the flags as a side-effect.
5791 (define_insn "*<nlogical><mode>3_cc"
5792 [(set (reg:CC_NZC CC_REGNUM)
5794 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5796 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5800 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5801 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5804 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5805 (and:PRED_ALL (NLOGICAL:PRED_ALL
5806 (not:PRED_ALL (match_dup 3))
5810 "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
5813 ;; Same with just the flags result.
5814 (define_insn "*<nlogical><mode>3_ptest"
5815 [(set (reg:CC_NZC CC_REGNUM)
5817 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5819 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5823 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5824 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5827 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5829 "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
5832 ;; -------------------------------------------------------------------------
5833 ;; ---- [PRED] Binary logical operations (inverted result)
5834 ;; -------------------------------------------------------------------------
5838 ;; -------------------------------------------------------------------------
5840 ;; Predicated predicate NAND and NOR.
5841 (define_insn "aarch64_pred_<logical_nn><mode>_z"
5842 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5845 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5846 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5847 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5849 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
5852 ;; Same, but set the flags as a side-effect.
5853 (define_insn "*<logical_nn><mode>3_cc"
5854 [(set (reg:CC_NZC CC_REGNUM)
5856 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5858 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5862 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5864 (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5867 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5868 (and:PRED_ALL (NLOGICAL:PRED_ALL
5869 (not:PRED_ALL (match_dup 2))
5870 (not:PRED_ALL (match_dup 3)))
5873 "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
5876 ;; Same with just the flags result.
5877 (define_insn "*<logical_nn><mode>3_ptest"
5878 [(set (reg:CC_NZC CC_REGNUM)
5880 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5882 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5886 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5888 (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5891 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5893 "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
5896 ;; =========================================================================
5897 ;; == Ternary arithmetic
5898 ;; =========================================================================
5900 ;; -------------------------------------------------------------------------
5901 ;; ---- [INT] MLA and MAD
5902 ;; -------------------------------------------------------------------------
5906 ;; -------------------------------------------------------------------------
5908 ;; Unpredicated integer addition of product.
5909 (define_expand "fma<mode>4"
5910 [(set (match_operand:SVE_FULL_I 0 "register_operand")
5915 (match_operand:SVE_FULL_I 1 "register_operand")
5916 (match_operand:SVE_FULL_I 2 "nonmemory_operand"))]
5918 (match_operand:SVE_FULL_I 3 "register_operand")))]
5921 if (aarch64_prepare_sve_int_fma (operands, PLUS))
5923 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
5927 ;; Predicated integer addition of product.
5928 (define_insn "@aarch64_pred_fma<mode>"
5929 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
5932 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5934 (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
5935 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
5937 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")))]
5940 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5941 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5942 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
5943 [(set_attr "movprfx" "*,*,yes")]
5946 ;; Predicated integer addition of product with merging.
5947 (define_expand "cond_fma<mode>"
5948 [(set (match_operand:SVE_FULL_I 0 "register_operand")
5950 [(match_operand:<VPRED> 1 "register_operand")
5953 (match_operand:SVE_FULL_I 2 "register_operand")
5954 (match_operand:SVE_FULL_I 3 "general_operand"))
5955 (match_operand:SVE_FULL_I 4 "register_operand"))
5956 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
5960 if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
5962 /* Swap the multiplication operands if the fallback value is the
5963 second of the two. */
5964 if (rtx_equal_p (operands[3], operands[5]))
5965 std::swap (operands[2], operands[3]);
5969 ;; Predicated integer addition of product, merging with the first input.
5970 (define_insn "*cond_fma<mode>_2"
5971 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
5973 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5976 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
5977 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
5978 (match_operand:SVE_FULL_I 4 "register_operand" "w, w"))
5983 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5984 movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
5985 [(set_attr "movprfx" "*,yes")]
5988 ;; Predicated integer addition of product, merging with the third input.
5989 (define_insn "*cond_fma<mode>_4"
5990 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
5992 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5995 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
5996 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
5997 (match_operand:SVE_FULL_I 4 "register_operand" "0, w"))
6002 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6003 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6004 [(set_attr "movprfx" "*,yes")]
6007 ;; Predicated integer addition of product, merging with an independent value.
6008 (define_insn_and_rewrite "*cond_fma<mode>_any"
6009 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6011 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6014 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w")
6015 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w"))
6016 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w"))
6017 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6020 && !rtx_equal_p (operands[2], operands[5])
6021 && !rtx_equal_p (operands[3], operands[5])
6022 && !rtx_equal_p (operands[4], operands[5])"
6024 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6025 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6026 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6027 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6028 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6030 "&& reload_completed
6031 && register_operand (operands[5], <MODE>mode)
6032 && !rtx_equal_p (operands[0], operands[5])"
6034 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6035 operands[5], operands[1]));
6036 operands[5] = operands[4] = operands[0];
6038 [(set_attr "movprfx" "yes")]
6041 ;; -------------------------------------------------------------------------
6042 ;; ---- [INT] MLS and MSB
6043 ;; -------------------------------------------------------------------------
6047 ;; -------------------------------------------------------------------------
6049 ;; Unpredicated integer subtraction of product.
6050 (define_expand "fnma<mode>4"
6051 [(set (match_operand:SVE_FULL_I 0 "register_operand")
6053 (match_operand:SVE_FULL_I 3 "register_operand")
6057 (match_operand:SVE_FULL_I 1 "register_operand")
6058 (match_operand:SVE_FULL_I 2 "general_operand"))]
6062 if (aarch64_prepare_sve_int_fma (operands, MINUS))
6064 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6068 ;; Predicated integer subtraction of product.
6069 (define_insn "@aarch64_pred_fnma<mode>"
6070 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
6072 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")
6074 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6076 (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
6077 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
6081 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6082 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6083 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6084 [(set_attr "movprfx" "*,*,yes")]
6087 ;; Predicated integer subtraction of product with merging.
6088 (define_expand "cond_fnma<mode>"
6089 [(set (match_operand:SVE_FULL_I 0 "register_operand")
6091 [(match_operand:<VPRED> 1 "register_operand")
6093 (match_operand:SVE_FULL_I 4 "register_operand")
6095 (match_operand:SVE_FULL_I 2 "register_operand")
6096 (match_operand:SVE_FULL_I 3 "general_operand")))
6097 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
6101 if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
6103 /* Swap the multiplication operands if the fallback value is the
6104 second of the two. */
6105 if (rtx_equal_p (operands[3], operands[5]))
6106 std::swap (operands[2], operands[3]);
6110 ;; Predicated integer subtraction of product, merging with the first input.
6111 (define_insn "*cond_fnma<mode>_2"
6112 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6114 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6116 (match_operand:SVE_FULL_I 4 "register_operand" "w, w")
6118 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
6119 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))
6124 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6125 movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6126 [(set_attr "movprfx" "*,yes")]
6129 ;; Predicated integer subtraction of product, merging with the third input.
6130 (define_insn "*cond_fnma<mode>_4"
6131 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6133 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6135 (match_operand:SVE_FULL_I 4 "register_operand" "0, w")
6137 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6138 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))
6143 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6144 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6145 [(set_attr "movprfx" "*,yes")]
6148 ;; Predicated integer subtraction of product, merging with an
6149 ;; independent value.
6150 (define_insn_and_rewrite "*cond_fnma<mode>_any"
6151 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6153 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6155 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w")
6157 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w")
6158 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w")))
6159 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6162 && !rtx_equal_p (operands[2], operands[5])
6163 && !rtx_equal_p (operands[3], operands[5])
6164 && !rtx_equal_p (operands[4], operands[5])"
6166 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6167 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6168 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6169 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6170 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6172 "&& reload_completed
6173 && register_operand (operands[5], <MODE>mode)
6174 && !rtx_equal_p (operands[0], operands[5])"
6176 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6177 operands[5], operands[1]));
6178 operands[5] = operands[4] = operands[0];
6180 [(set_attr "movprfx" "yes")]
6183 ;; -------------------------------------------------------------------------
6184 ;; ---- [INT] Dot product
6185 ;; -------------------------------------------------------------------------
6191 ;; -------------------------------------------------------------------------
6193 ;; Four-element integer dot-product with accumulation.
6194 (define_insn "<sur>dot_prod<vsi2qi>"
6195 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6197 (unspec:SVE_FULL_SDI
6198 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6199 (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6201 (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))]
6204 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
6205 movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
6206 [(set_attr "movprfx" "*,yes")]
6209 ;; Four-element integer dot-product by selected lanes with accumulation.
6210 (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6211 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6213 (unspec:SVE_FULL_SDI
6214 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6216 [(match_operand:<VSI2QI> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6217 (match_operand:SI 3 "const_int_operand")]
6218 UNSPEC_SVE_LANE_SELECT)]
6220 (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))]
6223 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
6224 movprfx\t%0, %4\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]"
6225 [(set_attr "movprfx" "*,yes")]
6228 (define_insn "@aarch64_<sur>dot_prod<vsi2qi>"
6229 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6232 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6233 (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6235 (match_operand:VNx4SI_ONLY 3 "register_operand" "0, w")))]
6238 <sur>dot\\t%0.s, %1.b, %2.b
6239 movprfx\t%0, %3\;<sur>dot\\t%0.s, %1.b, %2.b"
6240 [(set_attr "movprfx" "*,yes")]
6243 (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6244 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6247 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6249 [(match_operand:<VSI2QI> 2 "register_operand" "y, y")
6250 (match_operand:SI 3 "const_int_operand")]
6251 UNSPEC_SVE_LANE_SELECT)]
6253 (match_operand:VNx4SI_ONLY 4 "register_operand" "0, w")))]
6256 <sur>dot\\t%0.s, %1.b, %2.b[%3]
6257 movprfx\t%0, %4\;<sur>dot\\t%0.s, %1.b, %2.b[%3]"
6258 [(set_attr "movprfx" "*,yes")]
6261 ;; -------------------------------------------------------------------------
6262 ;; ---- [INT] Sum of absolute differences
6263 ;; -------------------------------------------------------------------------
6264 ;; The patterns in this section are synthetic.
6265 ;; -------------------------------------------------------------------------
6267 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
6268 ;; operands 1 and 2. The sequence also has to perform a widening reduction of
6269 ;; the difference into a vector and accumulate that into operand 3 before
6270 ;; copying that into the result operand 0.
6271 ;; Perform that with a sequence of:
6273 ;; [SU]ABD diff.b, p0/m, op1.b, op2.b
6274 ;; MOVPRFX op0, op3 // If necessary
6275 ;; UDOT op0.s, diff.b, ones.b
6276 (define_expand "<sur>sad<vsi2qi>"
6277 [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
6278 (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
6279 (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
6280 (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
6283 rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
6284 rtx diff = gen_reg_rtx (<VSI2QI>mode);
6285 emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
6286 emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
6291 ;; -------------------------------------------------------------------------
6292 ;; ---- [INT] Matrix multiply-accumulate
6293 ;; -------------------------------------------------------------------------
6298 ;; -------------------------------------------------------------------------
6300 (define_insn "@aarch64_sve_add_<optab><vsi2qi>"
6301 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6304 [(match_operand:<VSI2QI> 2 "register_operand" "w, w")
6305 (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
6307 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
6310 <sur>mmla\\t%0.s, %2.b, %3.b
6311 movprfx\t%0, %1\;<sur>mmla\\t%0.s, %2.b, %3.b"
6312 [(set_attr "movprfx" "*,yes")]
6315 ;; -------------------------------------------------------------------------
6316 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
6317 ;; -------------------------------------------------------------------------
6318 ;; Includes merging patterns for:
6327 ;; -------------------------------------------------------------------------
6329 ;; Unpredicated floating-point ternary operations.
6330 (define_expand "<optab><mode>4"
6331 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6334 (const_int SVE_RELAXED_GP)
6335 (match_operand:SVE_FULL_F 1 "register_operand")
6336 (match_operand:SVE_FULL_F 2 "register_operand")
6337 (match_operand:SVE_FULL_F 3 "register_operand")]
6338 SVE_COND_FP_TERNARY))]
6341 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6345 ;; Predicated floating-point ternary operations.
6346 (define_insn "@aarch64_pred_<optab><mode>"
6347 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
6349 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6350 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6351 (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w")
6352 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")
6353 (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")]
6354 SVE_COND_FP_TERNARY))]
6357 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6358 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6359 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6360 [(set_attr "movprfx" "*,*,yes")]
6363 ;; Predicated floating-point ternary operations with merging.
6364 (define_expand "@cond_<optab><mode>"
6365 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6367 [(match_operand:<VPRED> 1 "register_operand")
6370 (const_int SVE_STRICT_GP)
6371 (match_operand:SVE_FULL_F 2 "register_operand")
6372 (match_operand:SVE_FULL_F 3 "register_operand")
6373 (match_operand:SVE_FULL_F 4 "register_operand")]
6374 SVE_COND_FP_TERNARY)
6375 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
6379 /* Swap the multiplication operands if the fallback value is the
6380 second of the two. */
6381 if (rtx_equal_p (operands[3], operands[5]))
6382 std::swap (operands[2], operands[3]);
6385 ;; Predicated floating-point ternary operations, merging with the
6387 (define_insn_and_rewrite "*cond_<optab><mode>_2"
6388 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6390 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6393 (match_operand:SI 6 "aarch64_sve_gp_strictness")
6394 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
6395 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6396 (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
6397 SVE_COND_FP_TERNARY)
6400 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6402 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6403 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6404 "&& !rtx_equal_p (operands[1], operands[5])"
6406 operands[5] = copy_rtx (operands[1]);
6408 [(set_attr "movprfx" "*,yes")]
6411 ;; Predicated floating-point ternary operations, merging with the
6413 (define_insn_and_rewrite "*cond_<optab><mode>_4"
6414 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6416 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6419 (match_operand:SI 6 "aarch64_sve_gp_strictness")
6420 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6421 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6422 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6423 SVE_COND_FP_TERNARY)
6426 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6428 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6429 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6430 "&& !rtx_equal_p (operands[1], operands[5])"
6432 operands[5] = copy_rtx (operands[1]);
6434 [(set_attr "movprfx" "*,yes")]
6437 ;; Predicated floating-point ternary operations, merging with an
6438 ;; independent value.
6439 (define_insn_and_rewrite "*cond_<optab><mode>_any"
6440 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6442 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6445 (match_operand:SI 7 "aarch64_sve_gp_strictness")
6446 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
6447 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
6448 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
6449 SVE_COND_FP_TERNARY)
6450 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6453 && !rtx_equal_p (operands[2], operands[5])
6454 && !rtx_equal_p (operands[3], operands[5])
6455 && !rtx_equal_p (operands[4], operands[5])
6456 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
6458 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6459 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6460 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6461 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6462 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6466 if (reload_completed
6467 && register_operand (operands[5], <MODE>mode)
6468 && !rtx_equal_p (operands[0], operands[5]))
6470 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6471 operands[5], operands[1]));
6472 operands[5] = operands[4] = operands[0];
6474 else if (!rtx_equal_p (operands[1], operands[6]))
6475 operands[6] = copy_rtx (operands[1]);
6479 [(set_attr "movprfx" "yes")]
6482 ;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using
6483 ;; (fma ...) since target-independent code won't understand the indexing.
6484 (define_insn "@aarch64_<optab>_lane_<mode>"
6485 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6487 [(match_operand:SVE_FULL_F 1 "register_operand" "w, w")
6489 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6490 (match_operand:SI 3 "const_int_operand")]
6491 UNSPEC_SVE_LANE_SELECT)
6492 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6493 SVE_FP_TERNARY_LANE))]
6496 <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
6497 movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
6498 [(set_attr "movprfx" "*,yes")]
6501 ;; -------------------------------------------------------------------------
6502 ;; ---- [FP] Complex multiply-add
6503 ;; -------------------------------------------------------------------------
6504 ;; Includes merging patterns for:
6506 ;; -------------------------------------------------------------------------
6508 ;; Predicated FCMLA.
6509 (define_insn "@aarch64_pred_<optab><mode>"
6510 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6512 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6513 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6514 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6515 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6516 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6520 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6521 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
6522 [(set_attr "movprfx" "*,yes")]
6525 ;; Predicated FCMLA with merging.
6526 (define_expand "@cond_<optab><mode>"
6527 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6529 [(match_operand:<VPRED> 1 "register_operand")
6532 (const_int SVE_STRICT_GP)
6533 (match_operand:SVE_FULL_F 2 "register_operand")
6534 (match_operand:SVE_FULL_F 3 "register_operand")
6535 (match_operand:SVE_FULL_F 4 "register_operand")]
6537 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
6542 ;; Predicated FCMLA, merging with the third input.
6543 (define_insn_and_rewrite "*cond_<optab><mode>_4"
6544 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6546 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6549 (match_operand:SI 6 "aarch64_sve_gp_strictness")
6550 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6551 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6552 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6556 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6558 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6559 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
6560 "&& !rtx_equal_p (operands[1], operands[5])"
6562 operands[5] = copy_rtx (operands[1]);
6564 [(set_attr "movprfx" "*,yes")]
6567 ;; Predicated FCMLA, merging with an independent value.
6568 (define_insn_and_rewrite "*cond_<optab><mode>_any"
6569 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
6571 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
6574 (match_operand:SI 7 "aarch64_sve_gp_strictness")
6575 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
6576 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
6577 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
6579 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
6582 && !rtx_equal_p (operands[4], operands[5])
6583 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
6585 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6586 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6587 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6591 if (reload_completed
6592 && register_operand (operands[5], <MODE>mode)
6593 && !rtx_equal_p (operands[0], operands[5]))
6595 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6596 operands[5], operands[1]));
6597 operands[5] = operands[4] = operands[0];
6599 else if (!rtx_equal_p (operands[1], operands[6]))
6600 operands[6] = copy_rtx (operands[1]);
6604 [(set_attr "movprfx" "yes")]
6607 ;; Unpredicated FCMLA with indexing.
6608 (define_insn "@aarch64_<optab>_lane_<mode>"
6609 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
6610 (unspec:SVE_FULL_HSF
6611 [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w")
6612 (unspec:SVE_FULL_HSF
6613 [(match_operand:SVE_FULL_HSF 2 "register_operand" "<sve_lane_pair_con>, <sve_lane_pair_con>")
6614 (match_operand:SI 3 "const_int_operand")]
6615 UNSPEC_SVE_LANE_SELECT)
6616 (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")]
6620 fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
6621 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>"
6622 [(set_attr "movprfx" "*,yes")]
6625 ;; -------------------------------------------------------------------------
6626 ;; ---- [FP] Trigonometric multiply-add
6627 ;; -------------------------------------------------------------------------
6630 ;; -------------------------------------------------------------------------
6632 (define_insn "@aarch64_sve_tmad<mode>"
6633 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6635 [(match_operand:SVE_FULL_F 1 "register_operand" "0, w")
6636 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6637 (match_operand:DI 3 "const_int_operand")]
6641 ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
6642 movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
6643 [(set_attr "movprfx" "*,yes")]
6646 ;; -------------------------------------------------------------------------
6647 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
6648 ;; -------------------------------------------------------------------------
6654 ;; -------------------------------------------------------------------------
6656 (define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
6657 [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
6659 [(match_operand:VNx4SF 1 "register_operand" "0, w")
6660 (match_operand:VNx8BF 2 "register_operand" "w, w")
6661 (match_operand:VNx8BF 3 "register_operand" "w, w")]
6662 SVE_BFLOAT_TERNARY_LONG))]
6665 <sve_fp_op>\t%0.s, %2.h, %3.h
6666 movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h"
6667 [(set_attr "movprfx" "*,yes")]
6670 ;; The immediate range is enforced before generating the instruction.
6671 (define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
6672 [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
6674 [(match_operand:VNx4SF 1 "register_operand" "0, w")
6675 (match_operand:VNx8BF 2 "register_operand" "w, w")
6676 (match_operand:VNx8BF 3 "register_operand" "y, y")
6677 (match_operand:SI 4 "const_int_operand")]
6678 SVE_BFLOAT_TERNARY_LONG_LANE))]
6681 <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
6682 movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]"
6683 [(set_attr "movprfx" "*,yes")]
6686 ;; -------------------------------------------------------------------------
6687 ;; ---- [FP] Matrix multiply-accumulate
6688 ;; -------------------------------------------------------------------------
6690 ;; - FMMLA (F32MM,F64MM)
6691 ;; -------------------------------------------------------------------------
6693 ;; The mode iterator enforces the target requirements.
6694 (define_insn "@aarch64_sve_<sve_fp_op><mode>"
6695 [(set (match_operand:SVE_MATMULF 0 "register_operand" "=w, ?&w")
6697 [(match_operand:SVE_MATMULF 2 "register_operand" "w, w")
6698 (match_operand:SVE_MATMULF 3 "register_operand" "w, w")
6699 (match_operand:SVE_MATMULF 1 "register_operand" "0, w")]
6703 <sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
6704 movprfx\t%0, %1\;<sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
6705 [(set_attr "movprfx" "*,yes")]
6708 ;; =========================================================================
6709 ;; == Comparisons and selects
6710 ;; =========================================================================
6712 ;; -------------------------------------------------------------------------
6713 ;; ---- [INT,FP] Select based on predicates
6714 ;; -------------------------------------------------------------------------
6715 ;; Includes merging patterns for:
6719 ;; -------------------------------------------------------------------------
6721 ;; vcond_mask operand order: true, false, mask
6722 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
6723 ;; SEL operand order: mask, true, false
6724 (define_expand "@vcond_mask_<mode><vpred>"
6725 [(set (match_operand:SVE_FULL 0 "register_operand")
6727 [(match_operand:<VPRED> 3 "register_operand")
6728 (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm")
6729 (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero")]
6733 if (register_operand (operands[1], <MODE>mode))
6734 operands[2] = force_reg (<MODE>mode, operands[2]);
6740 ;; - a duplicated immediate and a register
6741 ;; - a duplicated immediate and zero
6742 (define_insn "*vcond_mask_<mode><vpred>"
6743 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
6745 [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
6746 (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
6747 (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
6750 && (!register_operand (operands[1], <MODE>mode)
6751 || register_operand (operands[2], <MODE>mode))"
6753 sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
6754 mov\t%0.<Vetype>, %3/m, #%I1
6755 mov\t%0.<Vetype>, %3/z, #%I1
6756 fmov\t%0.<Vetype>, %3/m, #%1
6757 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
6758 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
6759 movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
6760 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
6763 ;; Optimize selects between a duplicated scalar variable and another vector,
6764 ;; the latter of which can be a zero constant or a variable. Treat duplicates
6765 ;; of GPRs as being more expensive than duplicates of FPRs, since they
6766 ;; involve a cross-file move.
6767 (define_insn "@aarch64_sel_dup<mode>"
6768 [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
6770 [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6771 (vec_duplicate:SVE_FULL
6772 (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
6773 (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
6777 mov\t%0.<Vetype>, %3/m, %<vwcore>1
6778 mov\t%0.<Vetype>, %3/m, %<Vetype>1
6779 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
6780 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
6781 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
6782 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1"
6783 [(set_attr "movprfx" "*,*,yes,yes,yes,yes")]
6786 ;; -------------------------------------------------------------------------
6787 ;; ---- [INT,FP] Compare and select
6788 ;; -------------------------------------------------------------------------
6789 ;; The patterns in this section are synthetic.
6790 ;; -------------------------------------------------------------------------
6792 ;; Integer (signed) vcond. Don't enforce an immediate range here, since it
6793 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
6794 (define_expand "vcond<mode><v_int_equiv>"
6795 [(set (match_operand:SVE_FULL 0 "register_operand")
6796 (if_then_else:SVE_FULL
6797 (match_operator 3 "comparison_operator"
6798 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
6799 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
6800 (match_operand:SVE_FULL 1 "nonmemory_operand")
6801 (match_operand:SVE_FULL 2 "nonmemory_operand")))]
6804 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
6809 ;; Integer vcondu. Don't enforce an immediate range here, since it
6810 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
6811 (define_expand "vcondu<mode><v_int_equiv>"
6812 [(set (match_operand:SVE_FULL 0 "register_operand")
6813 (if_then_else:SVE_FULL
6814 (match_operator 3 "comparison_operator"
6815 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
6816 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
6817 (match_operand:SVE_FULL 1 "nonmemory_operand")
6818 (match_operand:SVE_FULL 2 "nonmemory_operand")))]
6821 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
6826 ;; Floating-point vcond. All comparisons except FCMUO allow a zero operand;
6827 ;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
6828 (define_expand "vcond<mode><v_fp_equiv>"
6829 [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
6830 (if_then_else:SVE_FULL_HSD
6831 (match_operator 3 "comparison_operator"
6832 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
6833 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
6834 (match_operand:SVE_FULL_HSD 1 "nonmemory_operand")
6835 (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))]
6838 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
6843 ;; -------------------------------------------------------------------------
6844 ;; ---- [INT] Comparisons
6845 ;; -------------------------------------------------------------------------
6857 ;; -------------------------------------------------------------------------
6859 ;; Signed integer comparisons. Don't enforce an immediate range here, since
6860 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
6862 (define_expand "vec_cmp<mode><vpred>"
6864 [(set (match_operand:<VPRED> 0 "register_operand")
6865 (match_operator:<VPRED> 1 "comparison_operator"
6866 [(match_operand:SVE_FULL_I 2 "register_operand")
6867 (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
6868 (clobber (reg:CC_NZC CC_REGNUM))])]
6871 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
6872 operands[2], operands[3]);
6877 ;; Unsigned integer comparisons. Don't enforce an immediate range here, since
6878 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
6880 (define_expand "vec_cmpu<mode><vpred>"
6882 [(set (match_operand:<VPRED> 0 "register_operand")
6883 (match_operator:<VPRED> 1 "comparison_operator"
6884 [(match_operand:SVE_FULL_I 2 "register_operand")
6885 (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
6886 (clobber (reg:CC_NZC CC_REGNUM))])]
6889 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
6890 operands[2], operands[3]);
6895 ;; Predicated integer comparisons.
6896 (define_insn "@aarch64_pred_cmp<cmp_op><mode>"
6897 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6899 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6900 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6901 (SVE_INT_CMP:<VPRED>
6902 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")
6903 (match_operand:SVE_FULL_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6905 (clobber (reg:CC_NZC CC_REGNUM))]
6908 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
6909 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
6912 ;; Predicated integer comparisons in which both the flag and predicate
6913 ;; results are interesting.
6914 (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
6915 [(set (reg:CC_NZC CC_REGNUM)
6917 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
6919 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6922 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6923 (SVE_INT_CMP:<VPRED>
6924 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6925 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6928 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6932 (SVE_INT_CMP:<VPRED>
6937 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6939 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
6940 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
6941 "&& !rtx_equal_p (operands[4], operands[6])"
6943 operands[6] = copy_rtx (operands[4]);
6944 operands[7] = operands[5];
6948 ;; Predicated integer comparisons in which only the flags result is
6950 (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
6951 [(set (reg:CC_NZC CC_REGNUM)
6953 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
6955 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6958 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6959 (SVE_INT_CMP:<VPRED>
6960 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6961 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6964 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
6966 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6968 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
6969 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
6970 "&& !rtx_equal_p (operands[4], operands[6])"
6972 operands[6] = copy_rtx (operands[4]);
6973 operands[7] = operands[5];
6977 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
6978 ;; comparison with an AND. Split the instruction into its preferred form
6979 ;; at the earliest opportunity, in order to get rid of the redundant
6981 (define_insn_and_split "*cmp<cmp_op><mode>_and"
6982 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6986 (const_int SVE_KNOWN_PTRUE)
6987 (SVE_INT_CMP:<VPRED>
6988 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6989 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6991 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
6992 (clobber (reg:CC_NZC CC_REGNUM))]
7000 (const_int SVE_MAYBE_NOT_PTRUE)
7001 (SVE_INT_CMP:<VPRED>
7005 (clobber (reg:CC_NZC CC_REGNUM))])]
7008 ;; Predicated integer wide comparisons.
7009 (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
7010 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7012 [(match_operand:VNx16BI 1 "register_operand" "Upl")
7013 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7015 [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w")
7016 (match_operand:VNx2DI 4 "register_operand" "w")]
7017 SVE_COND_INT_CMP_WIDE)]
7019 (clobber (reg:CC_NZC CC_REGNUM))]
7021 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d"
7024 ;; Predicated integer wide comparisons in which both the flag and
7025 ;; predicate results are interesting.
7026 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
7027 [(set (reg:CC_NZC CC_REGNUM)
7029 [(match_operand:VNx16BI 1 "register_operand" "Upl")
7031 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7033 [(match_operand:VNx16BI 6 "register_operand" "Upl")
7034 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7036 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
7037 (match_operand:VNx2DI 3 "register_operand" "w")]
7038 SVE_COND_INT_CMP_WIDE)]
7041 (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7048 SVE_COND_INT_CMP_WIDE)]
7051 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7052 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
7055 ;; Predicated integer wide comparisons in which only the flags result
7057 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
7058 [(set (reg:CC_NZC CC_REGNUM)
7060 [(match_operand:VNx16BI 1 "register_operand" "Upl")
7062 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7064 [(match_operand:VNx16BI 6 "register_operand" "Upl")
7065 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7067 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
7068 (match_operand:VNx2DI 3 "register_operand" "w")]
7069 SVE_COND_INT_CMP_WIDE)]
7072 (clobber (match_scratch:<VPRED> 0 "=Upa"))]
7074 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7075 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
7078 ;; -------------------------------------------------------------------------
7079 ;; ---- [INT] While tests
7080 ;; -------------------------------------------------------------------------
7092 ;; -------------------------------------------------------------------------
7094 ;; Set element I of the result if (cmp (plus operand1 J) operand2) is
7095 ;; true for all J in [0, I].
7096 (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
7097 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7098 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7099 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7101 (clobber (reg:CC_NZC CC_REGNUM))]
7103 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7106 ;; The WHILE instructions set the flags in the same way as a PTEST with
7107 ;; a PTRUE GP. Handle the case in which both results are useful. The GP
7108 ;; operands to the PTEST aren't needed, so we allow them to be anything.
7109 (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
7110 [(set (reg:CC_NZC CC_REGNUM)
7114 (const_int SVE_KNOWN_PTRUE)
7116 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7117 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7120 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7121 (unspec:PRED_ALL [(match_dup 1)
7125 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7126 ;; Force the compiler to drop the unused predicate operand, so that we
7127 ;; don't have an unnecessary PTRUE.
7128 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
7130 operands[3] = CONSTM1_RTX (VNx16BImode);
7131 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
7135 ;; Same, but handle the case in which only the flags result is useful.
7136 (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
7137 [(set (reg:CC_NZC CC_REGNUM)
7141 (const_int SVE_KNOWN_PTRUE)
7143 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7144 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7147 (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
7149 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7150 ;; Force the compiler to drop the unused predicate operand, so that we
7151 ;; don't have an unnecessary PTRUE.
7152 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
7154 operands[3] = CONSTM1_RTX (VNx16BImode);
7155 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
7159 ;; -------------------------------------------------------------------------
7160 ;; ---- [FP] Direct comparisons
7161 ;; -------------------------------------------------------------------------
7170 ;; -------------------------------------------------------------------------
7172 ;; Floating-point comparisons. All comparisons except FCMUO allow a zero
7173 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
7175 (define_expand "vec_cmp<mode><vpred>"
7176 [(set (match_operand:<VPRED> 0 "register_operand")
7177 (match_operator:<VPRED> 1 "comparison_operator"
7178 [(match_operand:SVE_FULL_F 2 "register_operand")
7179 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
7182 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
7183 operands[2], operands[3], false);
7188 ;; Predicated floating-point comparisons.
7189 (define_insn "@aarch64_pred_fcm<cmp_op><mode>"
7190 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7192 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7193 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7194 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7195 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")]
7196 SVE_COND_FP_CMP_I0))]
7199 fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
7200 fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
7203 ;; Same for unordered comparisons.
7204 (define_insn "@aarch64_pred_fcmuo<mode>"
7205 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7207 [(match_operand:<VPRED> 1 "register_operand" "Upl")
7208 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7209 (match_operand:SVE_FULL_F 3 "register_operand" "w")
7210 (match_operand:SVE_FULL_F 4 "register_operand" "w")]
7211 UNSPEC_COND_FCMUO))]
7213 "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
7216 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
7217 ;; with another predicate P. This does not have the same trapping behavior
7218 ;; as predicating the comparison itself on P, but it's a legitimate fold,
7219 ;; since we can drop any potentially-trapping operations whose results
7222 ;; Split the instruction into its preferred form (below) at the earliest
7223 ;; opportunity, in order to get rid of the redundant operand 1.
7224 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
7225 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7228 [(match_operand:<VPRED> 1)
7229 (const_int SVE_KNOWN_PTRUE)
7230 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7231 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
7233 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
7240 (const_int SVE_MAYBE_NOT_PTRUE)
7243 SVE_COND_FP_CMP_I0))]
7246 ;; Same for unordered comparisons.
7247 (define_insn_and_split "*fcmuo<mode>_and_combine"
7248 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7251 [(match_operand:<VPRED> 1)
7252 (const_int SVE_KNOWN_PTRUE)
7253 (match_operand:SVE_FULL_F 2 "register_operand" "w")
7254 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7256 (match_operand:<VPRED> 4 "register_operand" "Upl")))]
7263 (const_int SVE_MAYBE_NOT_PTRUE)
7266 UNSPEC_COND_FCMUO))]
7269 ;; -------------------------------------------------------------------------
7270 ;; ---- [FP] Absolute comparisons
7271 ;; -------------------------------------------------------------------------
7277 ;; -------------------------------------------------------------------------
7279 ;; Predicated floating-point absolute comparisons.
7280 (define_expand "@aarch64_pred_fac<cmp_op><mode>"
7281 [(set (match_operand:<VPRED> 0 "register_operand")
7283 [(match_operand:<VPRED> 1 "register_operand")
7284 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7288 (match_operand:SVE_FULL_F 3 "register_operand")]
7293 (match_operand:SVE_FULL_F 4 "register_operand")]
7295 SVE_COND_FP_ABS_CMP))]
7299 (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>"
7300 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7302 [(match_operand:<VPRED> 1 "register_operand" "Upl")
7303 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
7306 (match_operand:SI 6 "aarch64_sve_gp_strictness")
7307 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7311 (match_operand:SI 8 "aarch64_sve_gp_strictness")
7312 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7314 SVE_COND_FP_ABS_CMP))]
7316 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
7317 && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
7318 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
7319 "&& (!rtx_equal_p (operands[1], operands[5])
7320 || !rtx_equal_p (operands[1], operands[7]))"
7322 operands[5] = copy_rtx (operands[1]);
7323 operands[7] = copy_rtx (operands[1]);
7327 ;; -------------------------------------------------------------------------
7328 ;; ---- [PRED] Select
7329 ;; -------------------------------------------------------------------------
7332 ;; -------------------------------------------------------------------------
7334 (define_insn "@vcond_mask_<mode><mode>"
7335 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7338 (match_operand:PRED_ALL 3 "register_operand" "Upa")
7339 (match_operand:PRED_ALL 1 "register_operand" "Upa"))
7342 (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
7344 "sel\t%0.b, %3, %1.b, %2.b"
7347 ;; -------------------------------------------------------------------------
7348 ;; ---- [PRED] Test bits
7349 ;; -------------------------------------------------------------------------
7352 ;; -------------------------------------------------------------------------
7354 ;; Branch based on predicate equality or inequality.
7355 (define_expand "cbranch<mode>4"
7358 (match_operator 0 "aarch64_equality_operator"
7359 [(match_operand:PRED_ALL 1 "register_operand")
7360 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
7361 (label_ref (match_operand 3 ""))
7365 rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
7366 rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
7367 rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
7369 if (operands[2] == CONST0_RTX (<MODE>mode))
7373 pred = gen_reg_rtx (<MODE>mode);
7374 emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
7377 emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
7378 operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
7379 operands[2] = const0_rtx;
7383 ;; See "Description of UNSPEC_PTEST" above for details.
7384 (define_insn "aarch64_ptest<mode>"
7385 [(set (reg:CC_NZC CC_REGNUM)
7386 (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
7388 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7389 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
7395 ;; =========================================================================
7397 ;; =========================================================================
7399 ;; -------------------------------------------------------------------------
7400 ;; ---- [INT,FP] Conditional reductions
7401 ;; -------------------------------------------------------------------------
7405 ;; -------------------------------------------------------------------------
7407 ;; Set operand 0 to the last active element in operand 3, or to tied
7408 ;; operand 1 if no elements are active.
7409 (define_insn "@fold_extract_<last_op>_<mode>"
7410 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
7412 [(match_operand:<VEL> 1 "register_operand" "0, 0")
7413 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
7414 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7418 clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
7419 clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
7422 (define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
7423 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7425 [(match_operand:SVE_FULL 1 "register_operand" "0, w")
7426 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
7427 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7431 clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
7432 movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>"
7435 ;; -------------------------------------------------------------------------
7436 ;; ---- [INT] Tree reductions
7437 ;; -------------------------------------------------------------------------
7448 ;; -------------------------------------------------------------------------
7450 ;; Unpredicated integer add reduction.
7451 (define_expand "reduc_plus_scal_<mode>"
7452 [(match_operand:<VEL> 0 "register_operand")
7453 (match_operand:SVE_FULL_I 1 "register_operand")]
7456 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
7457 rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
7458 emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
7459 if (tmp != operands[0])
7460 emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
7465 ;; Predicated integer add reduction. The result is always 64-bits.
7466 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7467 [(set (match_operand:DI 0 "register_operand" "=w")
7468 (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
7469 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
7471 "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
7472 "<su>addv\t%d0, %1, %2.<Vetype>"
7475 ;; Unpredicated integer reductions.
7476 (define_expand "reduc_<optab>_scal_<mode>"
7477 [(set (match_operand:<VEL> 0 "register_operand")
7478 (unspec:<VEL> [(match_dup 2)
7479 (match_operand:SVE_FULL_I 1 "register_operand")]
7480 SVE_INT_REDUCTION))]
7483 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7487 ;; Predicated integer reductions.
7488 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7489 [(set (match_operand:<VEL> 0 "register_operand" "=w")
7490 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
7491 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
7492 SVE_INT_REDUCTION))]
7494 "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
7497 ;; -------------------------------------------------------------------------
7498 ;; ---- [FP] Tree reductions
7499 ;; -------------------------------------------------------------------------
7506 ;; -------------------------------------------------------------------------
7508 ;; Unpredicated floating-point tree reductions.
7509 (define_expand "reduc_<optab>_scal_<mode>"
7510 [(set (match_operand:<VEL> 0 "register_operand")
7511 (unspec:<VEL> [(match_dup 2)
7512 (match_operand:SVE_FULL_F 1 "register_operand")]
7516 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7520 ;; Predicated floating-point tree reductions.
7521 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7522 [(set (match_operand:<VEL> 0 "register_operand" "=w")
7523 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
7524 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7527 "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
7530 ;; -------------------------------------------------------------------------
7531 ;; ---- [FP] Left-to-right reductions
7532 ;; -------------------------------------------------------------------------
7535 ;; -------------------------------------------------------------------------
7537 ;; Unpredicated in-order FP reductions.
7538 (define_expand "fold_left_plus_<mode>"
7539 [(set (match_operand:<VEL> 0 "register_operand")
7540 (unspec:<VEL> [(match_dup 3)
7541 (match_operand:<VEL> 1 "register_operand")
7542 (match_operand:SVE_FULL_F 2 "register_operand")]
7546 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
7550 ;; Predicated in-order FP reductions.
7551 (define_insn "mask_fold_left_plus_<mode>"
7552 [(set (match_operand:<VEL> 0 "register_operand" "=w")
7553 (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
7554 (match_operand:<VEL> 1 "register_operand" "0")
7555 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7558 "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
7561 ;; =========================================================================
7563 ;; =========================================================================
7565 ;; -------------------------------------------------------------------------
7566 ;; ---- [INT,FP] General permutes
7567 ;; -------------------------------------------------------------------------
7570 ;; -------------------------------------------------------------------------
7572 (define_expand "vec_perm<mode>"
7573 [(match_operand:SVE_FULL 0 "register_operand")
7574 (match_operand:SVE_FULL 1 "register_operand")
7575 (match_operand:SVE_FULL 2 "register_operand")
7576 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
7577 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
7579 aarch64_expand_sve_vec_perm (operands[0], operands[1],
7580 operands[2], operands[3]);
7585 (define_insn "@aarch64_sve_tbl<mode>"
7586 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7588 [(match_operand:SVE_FULL 1 "register_operand" "w")
7589 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
7592 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7595 ;; -------------------------------------------------------------------------
7596 ;; ---- [INT,FP] Special-purpose unary permutes
7597 ;; -------------------------------------------------------------------------
7602 ;; -------------------------------------------------------------------------
7604 ;; Compact active elements and pad with zeros.
7605 (define_insn "@aarch64_sve_compact<mode>"
7606 [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
7608 [(match_operand:<VPRED> 1 "register_operand" "Upl")
7609 (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
7610 UNSPEC_SVE_COMPACT))]
7612 "compact\t%0.<Vetype>, %1, %2.<Vetype>"
7615 ;; Duplicate one element of a vector.
7616 (define_insn "@aarch64_sve_dup_lane<mode>"
7617 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7618 (vec_duplicate:SVE_FULL
7620 (match_operand:SVE_FULL 1 "register_operand" "w")
7621 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
7623 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
7624 "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
7627 ;; Use DUP.Q to duplicate a 128-bit segment of a register.
7629 ;; The vec_select:<V128> sets memory lane number N of the V128 to lane
7630 ;; number op2 + N of op1. (We don't need to distinguish between memory
7631 ;; and architectural register lane numbering for op1 or op0, since the
7632 ;; two numbering schemes are the same for SVE.)
7634 ;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
7635 ;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
7636 ;; of op0. We therefore get the correct result for both endiannesses.
7638 ;; The wrinkle is that for big-endian V128 registers, memory lane numbering
7639 ;; is in the opposite order to architectural register lane numbering.
7640 ;; Thus if we were to do this operation via a V128 temporary register,
7641 ;; the vec_select and vec_duplicate would both involve a reverse operation
7642 ;; for big-endian targets. In this fused pattern the two reverses cancel
7644 (define_insn "@aarch64_sve_dupq_lane<mode>"
7645 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7646 (vec_duplicate:SVE_FULL
7648 (match_operand:SVE_FULL 1 "register_operand" "w")
7649 (match_operand 2 "ascending_int_parallel"))))]
7651 && (INTVAL (XVECEXP (operands[2], 0, 0))
7652 * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
7653 && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
7654 * GET_MODE_SIZE (<VEL>mode), 0, 63)"
7656 unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
7657 * GET_MODE_SIZE (<VEL>mode));
7658 operands[2] = gen_int_mode (byte / 16, DImode);
7659 return "dup\t%0.q, %1.q[%2]";
7663 ;; Reverse the order of elements within a full vector.
7664 (define_insn "@aarch64_sve_rev<mode>"
7665 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7667 [(match_operand:SVE_FULL 1 "register_operand" "w")]
7670 "rev\t%0.<Vetype>, %1.<Vetype>")
7672 ;; -------------------------------------------------------------------------
7673 ;; ---- [INT,FP] Special-purpose binary permutes
7674 ;; -------------------------------------------------------------------------
7683 ;; -------------------------------------------------------------------------
7685 ;; Like EXT, but start at the first active element.
7686 (define_insn "@aarch64_sve_splice<mode>"
7687 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7689 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7690 (match_operand:SVE_FULL 2 "register_operand" "0, w")
7691 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7692 UNSPEC_SVE_SPLICE))]
7695 splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
7696 movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>"
7697 [(set_attr "movprfx" "*, yes")]
7700 ;; Permutes that take half the elements from one vector and half the
7701 ;; elements from the other.
7702 (define_insn "@aarch64_sve_<perm_insn><mode>"
7703 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7705 [(match_operand:SVE_FULL 1 "register_operand" "w")
7706 (match_operand:SVE_FULL 2 "register_operand" "w")]
7709 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7712 ;; Apply PERMUTE to 128-bit sequences. The behavior of these patterns
7713 ;; doesn't depend on the mode.
7714 (define_insn "@aarch64_sve_<optab><mode>"
7715 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7717 [(match_operand:SVE_FULL 1 "register_operand" "w")
7718 (match_operand:SVE_FULL 2 "register_operand" "w")]
7721 "<perm_insn>\t%0.q, %1.q, %2.q"
7724 ;; Concatenate two vectors and extract a subvector. Note that the
7725 ;; immediate (third) operand is the lane index not the byte index.
7726 (define_insn "@aarch64_sve_ext<mode>"
7727 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7729 [(match_operand:SVE_FULL 1 "register_operand" "0, w")
7730 (match_operand:SVE_FULL 2 "register_operand" "w, w")
7731 (match_operand:SI 3 "const_int_operand")]
7734 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
7736 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
7737 return (which_alternative == 0
7738 ? "ext\\t%0.b, %0.b, %2.b, #%3"
7739 : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
7741 [(set_attr "movprfx" "*,yes")]
7744 ;; -------------------------------------------------------------------------
7745 ;; ---- [PRED] Special-purpose unary permutes
7746 ;; -------------------------------------------------------------------------
7749 ;; -------------------------------------------------------------------------
7751 (define_insn "@aarch64_sve_rev<mode>"
7752 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7753 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
7756 "rev\t%0.<Vetype>, %1.<Vetype>")
7758 ;; -------------------------------------------------------------------------
7759 ;; ---- [PRED] Special-purpose binary permutes
7760 ;; -------------------------------------------------------------------------
7768 ;; -------------------------------------------------------------------------
7770 ;; Permutes that take half the elements from one vector and half the
7771 ;; elements from the other.
7772 (define_insn "@aarch64_sve_<perm_insn><mode>"
7773 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7774 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
7775 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
7778 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7781 ;; =========================================================================
7783 ;; =========================================================================
7785 ;; -------------------------------------------------------------------------
7786 ;; ---- [INT<-INT] Packs
7787 ;; -------------------------------------------------------------------------
7790 ;; -------------------------------------------------------------------------
7792 ;; Integer pack. Use UZP1 on the narrower type, which discards
7793 ;; the high part of each wide element.
7794 (define_insn "vec_pack_trunc_<Vwide>"
7795 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
7796 (unspec:SVE_FULL_BHSI
7797 [(match_operand:<VWIDE> 1 "register_operand" "w")
7798 (match_operand:<VWIDE> 2 "register_operand" "w")]
7801 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7804 ;; -------------------------------------------------------------------------
7805 ;; ---- [INT<-INT] Unpacks
7806 ;; -------------------------------------------------------------------------
7812 ;; -------------------------------------------------------------------------
7814 ;; Unpack the low or high half of a vector, where "high" refers to
7815 ;; the low-numbered lanes for big-endian and the high-numbered lanes
7816 ;; for little-endian.
7817 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
7818 [(match_operand:<VWIDE> 0 "register_operand")
7820 [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
7823 emit_insn ((<hi_lanes_optab>
7824 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
7825 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
7826 (operands[0], operands[1]));
7831 (define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
7832 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
7834 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
7837 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
7840 ;; -------------------------------------------------------------------------
7841 ;; ---- [INT<-FP] Conversions
7842 ;; -------------------------------------------------------------------------
7846 ;; -------------------------------------------------------------------------
7848 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
7849 ;; SF to SI or DF to DI).
7850 (define_expand "<optab><mode><v_int_equiv>2"
7851 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7852 (unspec:<V_INT_EQUIV>
7854 (const_int SVE_RELAXED_GP)
7855 (match_operand:SVE_FULL_F 1 "register_operand")]
7859 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7863 ;; Predicated float-to-integer conversion, either to the same width or wider.
7864 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7865 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
7866 (unspec:SVE_FULL_HSDI
7867 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
7868 (match_operand:SI 3 "aarch64_sve_gp_strictness")
7869 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7871 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7872 "fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
7875 ;; Predicated narrowing float-to-integer conversion.
7876 (define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7877 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w")
7879 [(match_operand:VNx2BI 1 "register_operand" "Upl")
7880 (match_operand:SI 3 "aarch64_sve_gp_strictness")
7881 (match_operand:VNx2DF_ONLY 2 "register_operand" "w")]
7884 "fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
7887 ;; Predicated float-to-integer conversion with merging, either to the same
7889 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7890 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
7891 (unspec:SVE_FULL_HSDI
7892 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
7893 (unspec:SVE_FULL_HSDI
7895 (const_int SVE_STRICT_GP)
7896 (match_operand:SVE_FULL_F 2 "register_operand")]
7898 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
7900 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7903 ;; The first alternative doesn't need the earlyclobber, but the only case
7904 ;; it would help is the uninteresting one in which operands 2 and 3 are
7905 ;; the same register (despite having different modes). Making all the
7906 ;; alternatives earlyclobber makes things more consistent for the
7907 ;; register allocator.
7908 (define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7909 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
7910 (unspec:SVE_FULL_HSDI
7911 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
7912 (unspec:SVE_FULL_HSDI
7914 (match_operand:SI 5 "aarch64_sve_gp_strictness")
7915 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
7917 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7920 && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
7921 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
7923 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
7924 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
7925 movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
7926 "&& !rtx_equal_p (operands[1], operands[4])"
7928 operands[4] = copy_rtx (operands[1]);
7930 [(set_attr "movprfx" "*,yes,yes")]
7933 ;; Predicated narrowing float-to-integer conversion with merging.
7934 (define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7935 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7937 [(match_operand:VNx2BI 1 "register_operand")
7940 (const_int SVE_STRICT_GP)
7941 (match_operand:VNx2DF_ONLY 2 "register_operand")]
7943 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
7948 (define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7949 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w")
7951 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
7954 (match_operand:SI 4 "aarch64_sve_gp_strictness")
7955 (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")]
7957 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7961 fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
7962 movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
7963 movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
7964 [(set_attr "movprfx" "*,yes,yes")]
7967 ;; -------------------------------------------------------------------------
7968 ;; ---- [INT<-FP] Packs
7969 ;; -------------------------------------------------------------------------
7970 ;; The patterns in this section are synthetic.
7971 ;; -------------------------------------------------------------------------
7973 ;; Convert two vectors of DF to SI and pack the results into a single vector.
7974 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
7978 (const_int SVE_RELAXED_GP)
7979 (match_operand:VNx2DF 1 "register_operand")]
7984 (const_int SVE_RELAXED_GP)
7985 (match_operand:VNx2DF 2 "register_operand")]
7987 (set (match_operand:VNx4SI 0 "register_operand")
7988 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
7991 operands[3] = aarch64_ptrue_reg (VNx2BImode);
7992 operands[4] = gen_reg_rtx (VNx4SImode);
7993 operands[5] = gen_reg_rtx (VNx4SImode);
7997 ;; -------------------------------------------------------------------------
7998 ;; ---- [INT<-FP] Unpacks
7999 ;; -------------------------------------------------------------------------
8000 ;; No patterns here yet!
8001 ;; -------------------------------------------------------------------------
8003 ;; -------------------------------------------------------------------------
8004 ;; ---- [FP<-INT] Conversions
8005 ;; -------------------------------------------------------------------------
8009 ;; -------------------------------------------------------------------------
8011 ;; Unpredicated conversion of integers to floats of the same size
8012 ;; (HI to HF, SI to SF or DI to DF).
8013 (define_expand "<optab><v_int_equiv><mode>2"
8014 [(set (match_operand:SVE_FULL_F 0 "register_operand")
8017 (const_int SVE_RELAXED_GP)
8018 (match_operand:<V_INT_EQUIV> 1 "register_operand")]
8022 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8026 ;; Predicated integer-to-float conversion, either to the same width or
8028 (define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
8029 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
8031 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
8032 (match_operand:SI 3 "aarch64_sve_gp_strictness")
8033 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
8035 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8036 "<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
8039 ;; Predicated widening integer-to-float conversion.
8040 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
8041 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w")
8043 [(match_operand:VNx2BI 1 "register_operand" "Upl")
8044 (match_operand:SI 3 "aarch64_sve_gp_strictness")
8045 (match_operand:VNx4SI_ONLY 2 "register_operand" "w")]
8048 "<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
8051 ;; Predicated integer-to-float conversion with merging, either to the same
8052 ;; width or narrower.
8053 (define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
8054 [(set (match_operand:SVE_FULL_F 0 "register_operand")
8056 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
8059 (const_int SVE_STRICT_GP)
8060 (match_operand:SVE_FULL_HSDI 2 "register_operand")]
8062 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
8064 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8067 ;; The first alternative doesn't need the earlyclobber, but the only case
8068 ;; it would help is the uninteresting one in which operands 2 and 3 are
8069 ;; the same register (despite having different modes). Making all the
8070 ;; alternatives earlyclobber makes things more consistent for the
8071 ;; register allocator.
8072 (define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
8073 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
8075 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8078 (match_operand:SI 5 "aarch64_sve_gp_strictness")
8079 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
8081 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8084 && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
8085 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
8087 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
8088 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
8089 movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
8090 "&& !rtx_equal_p (operands[1], operands[4])"
8092 operands[4] = copy_rtx (operands[1]);
8094 [(set_attr "movprfx" "*,yes,yes")]
8097 ;; Predicated widening integer-to-float conversion with merging.
8098 (define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
8099 [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
8101 [(match_operand:VNx2BI 1 "register_operand")
8104 (const_int SVE_STRICT_GP)
8105 (match_operand:VNx4SI_ONLY 2 "register_operand")]
8107 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
8112 (define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
8113 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
8115 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
8118 (match_operand:SI 4 "aarch64_sve_gp_strictness")
8119 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
8121 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8125 <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
8126 movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
8127 movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
8128 [(set_attr "movprfx" "*,yes,yes")]
8131 ;; -------------------------------------------------------------------------
8132 ;; ---- [FP<-INT] Packs
8133 ;; -------------------------------------------------------------------------
8134 ;; No patterns here yet!
8135 ;; -------------------------------------------------------------------------
8137 ;; -------------------------------------------------------------------------
8138 ;; ---- [FP<-INT] Unpacks
8139 ;; -------------------------------------------------------------------------
8140 ;; The patterns in this section are synthetic.
8141 ;; -------------------------------------------------------------------------
8143 ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
8144 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
8145 ;; unpacked VNx4SI to VNx2DF.
8146 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
8147 [(match_operand:VNx2DF 0 "register_operand")
8149 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
8153 /* Use ZIP to do the unpack, since we don't care about the upper halves
8154 and since it has the nice property of not needing any subregs.
8155 If using UUNPK* turns out to be preferable, we could model it as
8156 a ZIP whose first operand is zero. */
8157 rtx temp = gen_reg_rtx (VNx4SImode);
8158 emit_insn ((<hi_lanes_optab>
8159 ? gen_aarch64_sve_zip2vnx4si
8160 : gen_aarch64_sve_zip1vnx4si)
8161 (temp, operands[1], operands[1]));
8162 rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
8163 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
8164 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
8165 (operands[0], ptrue, temp, strictness));
8170 ;; -------------------------------------------------------------------------
8171 ;; ---- [FP<-FP] Packs
8172 ;; -------------------------------------------------------------------------
8175 ;; -------------------------------------------------------------------------
8177 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
8178 ;; the results into a single vector.
8179 (define_expand "vec_pack_trunc_<Vwide>"
8181 (unspec:SVE_FULL_HSF
8183 (const_int SVE_RELAXED_GP)
8184 (match_operand:<VWIDE> 1 "register_operand")]
8187 (unspec:SVE_FULL_HSF
8189 (const_int SVE_RELAXED_GP)
8190 (match_operand:<VWIDE> 2 "register_operand")]
8192 (set (match_operand:SVE_FULL_HSF 0 "register_operand")
8193 (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
8196 operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
8197 operands[4] = gen_reg_rtx (<MODE>mode);
8198 operands[5] = gen_reg_rtx (<MODE>mode);
8202 ;; Predicated float-to-float truncation.
8203 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
8204 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
8205 (unspec:SVE_FULL_HSF
8206 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl")
8207 (match_operand:SI 3 "aarch64_sve_gp_strictness")
8208 (match_operand:SVE_FULL_SDF 2 "register_operand" "w")]
8210 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8211 "fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
8214 ;; Predicated float-to-float truncation with merging.
8215 (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
8216 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
8217 (unspec:SVE_FULL_HSF
8218 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
8219 (unspec:SVE_FULL_HSF
8221 (const_int SVE_STRICT_GP)
8222 (match_operand:SVE_FULL_SDF 2 "register_operand")]
8224 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
8226 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8229 (define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
8230 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w")
8231 (unspec:SVE_FULL_HSF
8232 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8233 (unspec:SVE_FULL_HSF
8235 (match_operand:SI 4 "aarch64_sve_gp_strictness")
8236 (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")]
8238 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8240 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8242 fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
8243 movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
8244 movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
8245 [(set_attr "movprfx" "*,yes,yes")]
8248 ;; -------------------------------------------------------------------------
8249 ;; ---- [FP<-FP] Packs (bfloat16)
8250 ;; -------------------------------------------------------------------------
8254 ;; -------------------------------------------------------------------------
8256 ;; Predicated BFCVT.
8257 (define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
8258 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
8260 [(match_operand:VNx4BI 1 "register_operand" "Upl")
8261 (match_operand:SI 3 "aarch64_sve_gp_strictness")
8262 (match_operand:VNx4SF_ONLY 2 "register_operand" "w")]
8265 "bfcvt\t%0.h, %1/m, %2.s"
8268 ;; Predicated BFCVT with merging.
8269 (define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
8270 [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
8272 [(match_operand:VNx4BI 1 "register_operand")
8275 (const_int SVE_STRICT_GP)
8276 (match_operand:VNx4SF_ONLY 2 "register_operand")]
8278 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
8283 (define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
8284 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
8286 [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl")
8289 (match_operand:SI 4 "aarch64_sve_gp_strictness")
8290 (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")]
8292 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8296 bfcvt\t%0.h, %1/m, %2.s
8297 movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
8298 movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s"
8299 [(set_attr "movprfx" "*,yes,yes")]
8302 ;; Predicated BFCVTNT. This doesn't give a natural aarch64_pred_*/cond_*
8303 ;; pair because the even elements always have to be supplied for active
8304 ;; elements, even if the inactive elements don't matter.
8306 ;; This instructions does not take MOVPRFX.
8307 (define_insn "@aarch64_sve_cvtnt<mode>"
8308 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
8310 [(match_operand:VNx4BI 2 "register_operand" "Upl")
8311 (const_int SVE_STRICT_GP)
8312 (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
8313 (match_operand:VNx4SF 3 "register_operand" "w")]
8314 UNSPEC_COND_FCVTNT))]
8316 "bfcvtnt\t%0.h, %2/m, %3.s"
8319 ;; -------------------------------------------------------------------------
8320 ;; ---- [FP<-FP] Unpacks
8321 ;; -------------------------------------------------------------------------
8324 ;; -------------------------------------------------------------------------
8326 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
8327 ;; First unpack the source without conversion, then float-convert the
8329 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
8330 [(match_operand:<VWIDE> 0 "register_operand")
8331 (unspec:SVE_FULL_HSF
8332 [(match_operand:SVE_FULL_HSF 1 "register_operand")]
8336 /* Use ZIP to do the unpack, since we don't care about the upper halves
8337 and since it has the nice property of not needing any subregs.
8338 If using UUNPK* turns out to be preferable, we could model it as
8339 a ZIP whose first operand is zero. */
8340 rtx temp = gen_reg_rtx (<MODE>mode);
8341 emit_insn ((<hi_lanes_optab>
8342 ? gen_aarch64_sve_zip2<mode>
8343 : gen_aarch64_sve_zip1<mode>)
8344 (temp, operands[1], operands[1]));
8345 rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
8346 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
8347 emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
8348 (operands[0], ptrue, temp, strictness));
8353 ;; Predicated float-to-float extension.
8354 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8355 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
8356 (unspec:SVE_FULL_SDF
8357 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl")
8358 (match_operand:SI 3 "aarch64_sve_gp_strictness")
8359 (match_operand:SVE_FULL_HSF 2 "register_operand" "w")]
8361 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8362 "fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
8365 ;; Predicated float-to-float extension with merging.
8366 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8367 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
8368 (unspec:SVE_FULL_SDF
8369 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
8370 (unspec:SVE_FULL_SDF
8372 (const_int SVE_STRICT_GP)
8373 (match_operand:SVE_FULL_HSF 2 "register_operand")]
8375 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
8377 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8380 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8381 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w")
8382 (unspec:SVE_FULL_SDF
8383 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8384 (unspec:SVE_FULL_SDF
8386 (match_operand:SI 4 "aarch64_sve_gp_strictness")
8387 (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")]
8389 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8391 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8393 fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
8394 movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
8395 movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
8396 [(set_attr "movprfx" "*,yes,yes")]
8399 ;; -------------------------------------------------------------------------
8400 ;; ---- [PRED<-PRED] Packs
8401 ;; -------------------------------------------------------------------------
8404 ;; -------------------------------------------------------------------------
8406 ;; Predicate pack. Use UZP1 on the narrower type, which discards
8407 ;; the high part of each wide element.
8408 (define_insn "vec_pack_trunc_<Vwide>"
8409 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
8411 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
8412 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
8415 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8418 ;; -------------------------------------------------------------------------
8419 ;; ---- [PRED<-PRED] Unpacks
8420 ;; -------------------------------------------------------------------------
8424 ;; -------------------------------------------------------------------------
8426 ;; Unpack the low or high half of a predicate, where "high" refers to
8427 ;; the low-numbered lanes for big-endian and the high-numbered lanes
8428 ;; for little-endian.
8429 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
8430 [(match_operand:<VWIDE> 0 "register_operand")
8431 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
8435 emit_insn ((<hi_lanes_optab>
8436 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
8437 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
8438 (operands[0], operands[1]));
8443 (define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
8444 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
8445 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
8448 "punpk<perm_hilo>\t%0.h, %1.b"
8451 ;; =========================================================================
8452 ;; == Vector partitioning
8453 ;; =========================================================================
8455 ;; -------------------------------------------------------------------------
8456 ;; ---- [PRED] Unary partitioning
8457 ;; -------------------------------------------------------------------------
8463 ;; -------------------------------------------------------------------------
8465 ;; Note that unlike most other instructions that have both merging and
8466 ;; zeroing forms, these instructions don't operate elementwise and so
8467 ;; don't fit the IFN_COND model.
8468 (define_insn "@aarch64_brk<brk_op>"
8469 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
8471 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8472 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8473 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8477 brk<brk_op>\t%0.b, %1/z, %2.b
8478 brk<brk_op>\t%0.b, %1/m, %2.b"
8481 ;; Same, but also producing a flags result.
8482 (define_insn "*aarch64_brk<brk_op>_cc"
8483 [(set (reg:CC_NZC CC_REGNUM)
8485 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8487 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8490 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8491 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8494 (set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
8502 brk<brk_op>s\t%0.b, %1/z, %2.b
8503 brk<brk_op>s\t%0.b, %1/m, %2.b"
8506 ;; Same, but with only the flags result being interesting.
8507 (define_insn "*aarch64_brk<brk_op>_ptest"
8508 [(set (reg:CC_NZC CC_REGNUM)
8510 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8512 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8515 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8516 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8519 (clobber (match_scratch:VNx16BI 0 "=Upa, Upa"))]
8522 brk<brk_op>s\t%0.b, %1/z, %2.b
8523 brk<brk_op>s\t%0.b, %1/m, %2.b"
8526 ;; -------------------------------------------------------------------------
8527 ;; ---- [PRED] Binary partitioning
8528 ;; -------------------------------------------------------------------------
8536 ;; -------------------------------------------------------------------------
8538 ;; Binary BRKs (BRKN, BRKPA, BRKPB).
8539 (define_insn "@aarch64_brk<brk_op>"
8540 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8542 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8543 (match_operand:VNx16BI 2 "register_operand" "Upa")
8544 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8547 "brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8550 ;; Same, but also producing a flags result.
8551 (define_insn "*aarch64_brk<brk_op>_cc"
8552 [(set (reg:CC_NZC CC_REGNUM)
8554 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8556 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8559 (match_operand:VNx16BI 2 "register_operand" "Upa")
8560 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8563 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8570 "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8573 ;; Same, but with only the flags result being interesting.
8574 (define_insn "*aarch64_brk<brk_op>_ptest"
8575 [(set (reg:CC_NZC CC_REGNUM)
8577 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8579 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8582 (match_operand:VNx16BI 2 "register_operand" "Upa")
8583 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8586 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
8588 "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8591 ;; -------------------------------------------------------------------------
8592 ;; ---- [PRED] Scalarization
8593 ;; -------------------------------------------------------------------------
8597 ;; -------------------------------------------------------------------------
8599 (define_insn "@aarch64_sve_<sve_pred_op><mode>"
8600 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8602 [(match_operand:PRED_ALL 1 "register_operand" "Upa")
8603 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8604 (match_operand:PRED_ALL 3 "register_operand" "0")]
8606 (clobber (reg:CC_NZC CC_REGNUM))]
8607 "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
8608 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8611 ;; Same, but also producing a flags result.
8612 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
8613 [(set (reg:CC_NZC CC_REGNUM)
8615 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8617 (match_operand:SI 3 "aarch64_sve_ptrue_flag")
8620 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8621 (match_operand:PRED_ALL 6 "register_operand" "0")]
8624 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8631 && <max_elem_bits> >= <elem_bits>
8632 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
8633 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8634 "&& !rtx_equal_p (operands[2], operands[4])"
8636 operands[4] = operands[2];
8637 operands[5] = operands[3];
8641 ;; Same, but with only the flags result being interesting.
8642 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
8643 [(set (reg:CC_NZC CC_REGNUM)
8645 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8647 (match_operand:SI 3 "aarch64_sve_ptrue_flag")
8650 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8651 (match_operand:PRED_ALL 6 "register_operand" "0")]
8654 (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
8656 && <max_elem_bits> >= <elem_bits>
8657 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
8658 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8659 "&& !rtx_equal_p (operands[2], operands[4])"
8661 operands[4] = operands[2];
8662 operands[5] = operands[3];
8666 ;; =========================================================================
8667 ;; == Counting elements
8668 ;; =========================================================================
8670 ;; -------------------------------------------------------------------------
8671 ;; ---- [INT] Count elements in a pattern (scalar)
8672 ;; -------------------------------------------------------------------------
8678 ;; -------------------------------------------------------------------------
8680 ;; Count the number of elements in an svpattern. Operand 1 is the pattern,
8681 ;; operand 2 is the number of elements that fit in a 128-bit block, and
8682 ;; operand 3 is a multiplier in the range [1, 16].
8684 ;; Note that this pattern isn't used for SV_ALL (but would work for that too).
8685 (define_insn "aarch64_sve_cnt_pat"
8686 [(set (match_operand:DI 0 "register_operand" "=r")
8688 (unspec:SI [(match_operand:DI 1 "const_int_operand")
8689 (match_operand:DI 2 "const_int_operand")
8690 (match_operand:DI 3 "const_int_operand")]
8691 UNSPEC_SVE_CNT_PAT)))]
8694 return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
8698 ;; -------------------------------------------------------------------------
8699 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
8700 ;; -------------------------------------------------------------------------
8705 ;; -------------------------------------------------------------------------
8707 ;; Increment a DImode register by the number of elements in an svpattern.
8708 ;; See aarch64_sve_cnt_pat for the counting behavior.
8709 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8710 [(set (match_operand:DI 0 "register_operand" "=r")
8711 (ANY_PLUS:DI (zero_extend:DI
8712 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8713 (match_operand:DI 3 "const_int_operand")
8714 (match_operand:DI 4 "const_int_operand")]
8715 UNSPEC_SVE_CNT_PAT))
8716 (match_operand:DI_ONLY 1 "register_operand" "0")))]
8719 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
8724 ;; Increment an SImode register by the number of elements in an svpattern
8725 ;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting
8727 (define_insn "*aarch64_sve_incsi_pat"
8728 [(set (match_operand:SI 0 "register_operand" "=r")
8729 (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
8730 (match_operand:DI 3 "const_int_operand")
8731 (match_operand:DI 4 "const_int_operand")]
8733 (match_operand:SI 1 "register_operand" "0")))]
8736 return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
8740 ;; Increment an SImode register by the number of elements in an svpattern
8741 ;; using saturating arithmetic, extending the result to 64 bits.
8743 ;; See aarch64_sve_cnt_pat for the counting behavior.
8744 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8745 [(set (match_operand:DI 0 "register_operand" "=r")
8748 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8749 (match_operand:DI 3 "const_int_operand")
8750 (match_operand:DI 4 "const_int_operand")]
8752 (match_operand:SI_ONLY 1 "register_operand" "0"))))]
8755 const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
8756 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
8761 ;; -------------------------------------------------------------------------
8762 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
8763 ;; -------------------------------------------------------------------------
8768 ;; -------------------------------------------------------------------------
8770 ;; Increment a vector of DIs by the number of elements in an svpattern.
8771 ;; See aarch64_sve_cnt_pat for the counting behavior.
8772 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8773 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8775 (vec_duplicate:VNx2DI
8777 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8778 (match_operand:DI 3 "const_int_operand")
8779 (match_operand:DI 4 "const_int_operand")]
8780 UNSPEC_SVE_CNT_PAT)))
8781 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
8784 if (which_alternative == 1)
8785 output_asm_insn ("movprfx\t%0, %1", operands);
8786 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8789 [(set_attr "movprfx" "*,yes")]
8792 ;; Increment a vector of SIs by the number of elements in an svpattern.
8793 ;; See aarch64_sve_cnt_pat for the counting behavior.
8794 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8795 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8797 (vec_duplicate:VNx4SI
8798 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8799 (match_operand:DI 3 "const_int_operand")
8800 (match_operand:DI 4 "const_int_operand")]
8801 UNSPEC_SVE_CNT_PAT))
8802 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
8805 if (which_alternative == 1)
8806 output_asm_insn ("movprfx\t%0, %1", operands);
8807 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8810 [(set_attr "movprfx" "*,yes")]
8813 ;; Increment a vector of HIs by the number of elements in an svpattern.
8814 ;; See aarch64_sve_cnt_pat for the counting behavior.
8815 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
8816 [(set (match_operand:VNx8HI 0 "register_operand")
8818 (vec_duplicate:VNx8HI
8820 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8821 (match_operand:DI 3 "const_int_operand")
8822 (match_operand:DI 4 "const_int_operand")]
8823 UNSPEC_SVE_CNT_PAT)))
8824 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
8828 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
8829 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8831 (vec_duplicate:VNx8HI
8832 (match_operator:HI 5 "subreg_lowpart_operator"
8833 [(unspec:SI [(match_operand:DI 2 "const_int_operand")
8834 (match_operand:DI 3 "const_int_operand")
8835 (match_operand:DI 4 "const_int_operand")]
8836 UNSPEC_SVE_CNT_PAT)]))
8837 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
8840 if (which_alternative == 1)
8841 output_asm_insn ("movprfx\t%0, %1", operands);
8842 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8845 [(set_attr "movprfx" "*,yes")]
8848 ;; -------------------------------------------------------------------------
8849 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
8850 ;; -------------------------------------------------------------------------
8855 ;; -------------------------------------------------------------------------
8857 ;; Decrement a DImode register by the number of elements in an svpattern.
8858 ;; See aarch64_sve_cnt_pat for the counting behavior.
8859 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8860 [(set (match_operand:DI 0 "register_operand" "=r")
8861 (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
8863 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8864 (match_operand:DI 3 "const_int_operand")
8865 (match_operand:DI 4 "const_int_operand")]
8866 UNSPEC_SVE_CNT_PAT))))]
8869 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
8874 ;; Decrement an SImode register by the number of elements in an svpattern
8875 ;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting
8877 (define_insn "*aarch64_sve_decsi_pat"
8878 [(set (match_operand:SI 0 "register_operand" "=r")
8879 (minus:SI (match_operand:SI 1 "register_operand" "0")
8880 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8881 (match_operand:DI 3 "const_int_operand")
8882 (match_operand:DI 4 "const_int_operand")]
8883 UNSPEC_SVE_CNT_PAT)))]
8886 return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
8890 ;; Decrement an SImode register by the number of elements in an svpattern
8891 ;; using saturating arithmetic, extending the result to 64 bits.
8893 ;; See aarch64_sve_cnt_pat for the counting behavior.
8894 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8895 [(set (match_operand:DI 0 "register_operand" "=r")
8898 (match_operand:SI_ONLY 1 "register_operand" "0")
8899 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8900 (match_operand:DI 3 "const_int_operand")
8901 (match_operand:DI 4 "const_int_operand")]
8902 UNSPEC_SVE_CNT_PAT))))]
8905 const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
8906 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
8911 ;; -------------------------------------------------------------------------
8912 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
8913 ;; -------------------------------------------------------------------------
8918 ;; -------------------------------------------------------------------------
8920 ;; Decrement a vector of DIs by the number of elements in an svpattern.
8921 ;; See aarch64_sve_cnt_pat for the counting behavior.
8922 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8923 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8925 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
8926 (vec_duplicate:VNx2DI
8928 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8929 (match_operand:DI 3 "const_int_operand")
8930 (match_operand:DI 4 "const_int_operand")]
8931 UNSPEC_SVE_CNT_PAT)))))]
8934 if (which_alternative == 1)
8935 output_asm_insn ("movprfx\t%0, %1", operands);
8936 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8939 [(set_attr "movprfx" "*,yes")]
8942 ;; Decrement a vector of SIs by the number of elements in an svpattern.
8943 ;; See aarch64_sve_cnt_pat for the counting behavior.
8944 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8945 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8947 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
8948 (vec_duplicate:VNx4SI
8949 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8950 (match_operand:DI 3 "const_int_operand")
8951 (match_operand:DI 4 "const_int_operand")]
8952 UNSPEC_SVE_CNT_PAT))))]
8955 if (which_alternative == 1)
8956 output_asm_insn ("movprfx\t%0, %1", operands);
8957 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8960 [(set_attr "movprfx" "*,yes")]
8963 ;; Decrement a vector of HIs by the number of elements in an svpattern.
8964 ;; See aarch64_sve_cnt_pat for the counting behavior.
8965 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
8966 [(set (match_operand:VNx8HI 0 "register_operand")
8968 (match_operand:VNx8HI_ONLY 1 "register_operand")
8969 (vec_duplicate:VNx8HI
8971 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8972 (match_operand:DI 3 "const_int_operand")
8973 (match_operand:DI 4 "const_int_operand")]
8974 UNSPEC_SVE_CNT_PAT)))))]
8978 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
8979 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8981 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
8982 (vec_duplicate:VNx8HI
8983 (match_operator:HI 5 "subreg_lowpart_operator"
8984 [(unspec:SI [(match_operand:DI 2 "const_int_operand")
8985 (match_operand:DI 3 "const_int_operand")
8986 (match_operand:DI 4 "const_int_operand")]
8987 UNSPEC_SVE_CNT_PAT)]))))]
8990 if (which_alternative == 1)
8991 output_asm_insn ("movprfx\t%0, %1", operands);
8992 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8995 [(set_attr "movprfx" "*,yes")]
8998 ;; -------------------------------------------------------------------------
8999 ;; ---- [INT] Count elements in a predicate (scalar)
9000 ;; -------------------------------------------------------------------------
9003 ;; -------------------------------------------------------------------------
9005 ;; Count the number of set bits in a predicate. Operand 3 is true if
9006 ;; operand 1 is known to be all-true.
9007 (define_insn "@aarch64_pred_cntp<mode>"
9008 [(set (match_operand:DI 0 "register_operand" "=r")
9010 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
9011 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
9012 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
9015 "cntp\t%x0, %1, %3.<Vetype>")
9017 ;; -------------------------------------------------------------------------
9018 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
9019 ;; -------------------------------------------------------------------------
9024 ;; -------------------------------------------------------------------------
9026 ;; Increment a DImode register by the number of set bits in a predicate.
9027 ;; See aarch64_sve_cntp for a description of the operands.
9028 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
9029 [(set (match_operand:DI 0 "register_operand")
9032 (unspec:SI [(match_dup 3)
9033 (const_int SVE_KNOWN_PTRUE)
9034 (match_operand:PRED_ALL 2 "register_operand")]
9036 (match_operand:DI_ONLY 1 "register_operand")))]
9039 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9043 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
9044 [(set (match_operand:DI 0 "register_operand" "=r")
9047 (unspec:SI [(match_operand 3)
9048 (const_int SVE_KNOWN_PTRUE)
9049 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9051 (match_operand:DI_ONLY 1 "register_operand" "0")))]
9053 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
9054 "&& !CONSTANT_P (operands[3])"
9056 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9060 ;; Increment an SImode register by the number of set bits in a predicate
9061 ;; using modular arithmetic. See aarch64_sve_cntp for a description of
9063 (define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
9064 [(set (match_operand:SI 0 "register_operand" "=r")
9066 (unspec:SI [(match_operand 3)
9067 (const_int SVE_KNOWN_PTRUE)
9068 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9070 (match_operand:SI 1 "register_operand" "0")))]
9072 "incp\t%x0, %2.<Vetype>"
9073 "&& !CONSTANT_P (operands[3])"
9075 operands[3] = CONSTM1_RTX (<MODE>mode);
9079 ;; Increment an SImode register by the number of set bits in a predicate
9080 ;; using saturating arithmetic, extending the result to 64 bits.
9082 ;; See aarch64_sve_cntp for a description of the operands.
9083 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9084 [(set (match_operand:DI 0 "register_operand")
9087 (unspec:SI [(match_dup 3)
9088 (const_int SVE_KNOWN_PTRUE)
9089 (match_operand:PRED_ALL 2 "register_operand")]
9091 (match_operand:SI_ONLY 1 "register_operand"))))]
9094 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9098 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9099 [(set (match_operand:DI 0 "register_operand" "=r")
9102 (unspec:SI [(match_operand 3)
9103 (const_int SVE_KNOWN_PTRUE)
9104 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9106 (match_operand:SI_ONLY 1 "register_operand" "0"))))]
9109 if (<CODE> == SS_PLUS)
9110 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
9112 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
9114 "&& !CONSTANT_P (operands[3])"
9116 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9120 ;; -------------------------------------------------------------------------
9121 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
9122 ;; -------------------------------------------------------------------------
9127 ;; -------------------------------------------------------------------------
9129 ;; Increment a vector of DIs by the number of set bits in a predicate.
9130 ;; See aarch64_sve_cntp for a description of the operands.
9131 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9132 [(set (match_operand:VNx2DI 0 "register_operand")
9134 (vec_duplicate:VNx2DI
9138 (const_int SVE_KNOWN_PTRUE)
9139 (match_operand:<VPRED> 2 "register_operand")]
9141 (match_operand:VNx2DI_ONLY 1 "register_operand")))]
9144 operands[3] = CONSTM1_RTX (<VPRED>mode);
9148 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9149 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9151 (vec_duplicate:VNx2DI
9155 (const_int SVE_KNOWN_PTRUE)
9156 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9158 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
9161 <inc_dec>p\t%0.d, %2
9162 movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
9163 "&& !CONSTANT_P (operands[3])"
9165 operands[3] = CONSTM1_RTX (<VPRED>mode);
9167 [(set_attr "movprfx" "*,yes")]
9170 ;; Increment a vector of SIs by the number of set bits in a predicate.
9171 ;; See aarch64_sve_cntp for a description of the operands.
9172 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9173 [(set (match_operand:VNx4SI 0 "register_operand")
9175 (vec_duplicate:VNx4SI
9178 (const_int SVE_KNOWN_PTRUE)
9179 (match_operand:<VPRED> 2 "register_operand")]
9181 (match_operand:VNx4SI_ONLY 1 "register_operand")))]
9184 operands[3] = CONSTM1_RTX (<VPRED>mode);
9188 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9189 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9191 (vec_duplicate:VNx4SI
9194 (const_int SVE_KNOWN_PTRUE)
9195 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9197 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
9200 <inc_dec>p\t%0.s, %2
9201 movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
9202 "&& !CONSTANT_P (operands[3])"
9204 operands[3] = CONSTM1_RTX (<VPRED>mode);
9206 [(set_attr "movprfx" "*,yes")]
9209 ;; Increment a vector of HIs by the number of set bits in a predicate.
9210 ;; See aarch64_sve_cntp for a description of the operands.
9211 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9212 [(set (match_operand:VNx8HI 0 "register_operand")
9214 (vec_duplicate:VNx8HI
9218 (const_int SVE_KNOWN_PTRUE)
9219 (match_operand:<VPRED> 2 "register_operand")]
9221 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
9224 operands[3] = CONSTM1_RTX (<VPRED>mode);
9228 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9229 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
9231 (vec_duplicate:VNx8HI
9232 (match_operator:HI 3 "subreg_lowpart_operator"
9235 (const_int SVE_KNOWN_PTRUE)
9236 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9238 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
9241 <inc_dec>p\t%0.h, %2
9242 movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
9243 "&& !CONSTANT_P (operands[4])"
9245 operands[4] = CONSTM1_RTX (<VPRED>mode);
9247 [(set_attr "movprfx" "*,yes")]
9250 ;; -------------------------------------------------------------------------
9251 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
9252 ;; -------------------------------------------------------------------------
9257 ;; -------------------------------------------------------------------------
9259 ;; Decrement a DImode register by the number of set bits in a predicate.
9260 ;; See aarch64_sve_cntp for a description of the operands.
9261 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
9262 [(set (match_operand:DI 0 "register_operand")
9264 (match_operand:DI_ONLY 1 "register_operand")
9266 (unspec:SI [(match_dup 3)
9267 (const_int SVE_KNOWN_PTRUE)
9268 (match_operand:PRED_ALL 2 "register_operand")]
9272 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9276 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
9277 [(set (match_operand:DI 0 "register_operand" "=r")
9279 (match_operand:DI_ONLY 1 "register_operand" "0")
9281 (unspec:SI [(match_operand 3)
9282 (const_int SVE_KNOWN_PTRUE)
9283 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9286 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
9287 "&& !CONSTANT_P (operands[3])"
9289 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9293 ;; Decrement an SImode register by the number of set bits in a predicate
9294 ;; using modular arithmetic. See aarch64_sve_cntp for a description of the
9296 (define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
9297 [(set (match_operand:SI 0 "register_operand" "=r")
9299 (match_operand:SI 1 "register_operand" "0")
9300 (unspec:SI [(match_operand 3)
9301 (const_int SVE_KNOWN_PTRUE)
9302 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9305 "decp\t%x0, %2.<Vetype>"
9306 "&& !CONSTANT_P (operands[3])"
9308 operands[3] = CONSTM1_RTX (<MODE>mode);
9312 ;; Decrement an SImode register by the number of set bits in a predicate
9313 ;; using saturating arithmetic, extending the result to 64 bits.
9315 ;; See aarch64_sve_cntp for a description of the operands.
9316 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9317 [(set (match_operand:DI 0 "register_operand")
9320 (match_operand:SI_ONLY 1 "register_operand")
9321 (unspec:SI [(match_dup 3)
9322 (const_int SVE_KNOWN_PTRUE)
9323 (match_operand:PRED_ALL 2 "register_operand")]
9327 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9331 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9332 [(set (match_operand:DI 0 "register_operand" "=r")
9335 (match_operand:SI_ONLY 1 "register_operand" "0")
9336 (unspec:SI [(match_operand 3)
9337 (const_int SVE_KNOWN_PTRUE)
9338 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9342 if (<CODE> == SS_MINUS)
9343 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
9345 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
9347 "&& !CONSTANT_P (operands[3])"
9349 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9353 ;; -------------------------------------------------------------------------
9354 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
9355 ;; -------------------------------------------------------------------------
9360 ;; -------------------------------------------------------------------------
9362 ;; Decrement a vector of DIs by the number of set bits in a predicate.
9363 ;; See aarch64_sve_cntp for a description of the operands.
9364 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9365 [(set (match_operand:VNx2DI 0 "register_operand")
9367 (match_operand:VNx2DI_ONLY 1 "register_operand")
9368 (vec_duplicate:VNx2DI
9372 (const_int SVE_KNOWN_PTRUE)
9373 (match_operand:<VPRED> 2 "register_operand")]
9377 operands[3] = CONSTM1_RTX (<VPRED>mode);
9381 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9382 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9384 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
9385 (vec_duplicate:VNx2DI
9389 (const_int SVE_KNOWN_PTRUE)
9390 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9394 <inc_dec>p\t%0.d, %2
9395 movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
9396 "&& !CONSTANT_P (operands[3])"
9398 operands[3] = CONSTM1_RTX (<VPRED>mode);
9400 [(set_attr "movprfx" "*,yes")]
9403 ;; Decrement a vector of SIs by the number of set bits in a predicate.
9404 ;; See aarch64_sve_cntp for a description of the operands.
9405 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9406 [(set (match_operand:VNx4SI 0 "register_operand")
9408 (match_operand:VNx4SI_ONLY 1 "register_operand")
9409 (vec_duplicate:VNx4SI
9412 (const_int SVE_KNOWN_PTRUE)
9413 (match_operand:<VPRED> 2 "register_operand")]
9417 operands[3] = CONSTM1_RTX (<VPRED>mode);
9421 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9422 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9424 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
9425 (vec_duplicate:VNx4SI
9428 (const_int SVE_KNOWN_PTRUE)
9429 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9433 <inc_dec>p\t%0.s, %2
9434 movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
9435 "&& !CONSTANT_P (operands[3])"
9437 operands[3] = CONSTM1_RTX (<VPRED>mode);
9439 [(set_attr "movprfx" "*,yes")]
9442 ;; Decrement a vector of HIs by the number of set bits in a predicate.
9443 ;; See aarch64_sve_cntp for a description of the operands.
9444 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9445 [(set (match_operand:VNx8HI 0 "register_operand")
9447 (match_operand:VNx8HI_ONLY 1 "register_operand")
9448 (vec_duplicate:VNx8HI
9452 (const_int SVE_KNOWN_PTRUE)
9453 (match_operand:<VPRED> 2 "register_operand")]
9457 operands[3] = CONSTM1_RTX (<VPRED>mode);
9461 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9462 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
9464 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
9465 (vec_duplicate:VNx8HI
9466 (match_operator:HI 3 "subreg_lowpart_operator"
9469 (const_int SVE_KNOWN_PTRUE)
9470 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9474 <inc_dec>p\t%0.h, %2
9475 movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
9476 "&& !CONSTANT_P (operands[4])"
9478 operands[4] = CONSTM1_RTX (<VPRED>mode);
9480 [(set_attr "movprfx" "*,yes")]