aarch64: Add support for unpacked sub [PR96366]
[gcc.git] / gcc / config / aarch64 / aarch64-sve.md
1 ;; Machine description for AArch64 SVE.
2 ;; Copyright (C) 2009-2020 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 ;; The file is organised into the following sections (search for the full
22 ;; line):
23 ;;
24 ;; == General notes
25 ;; ---- Note on the handling of big-endian SVE
26 ;; ---- Description of UNSPEC_PTEST
27 ;; ---- Description of UNSPEC_PRED_Z
28 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
29 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
30 ;; ---- Note on FFR handling
31 ;;
32 ;; == Moves
33 ;; ---- Moves of single vectors
34 ;; ---- Moves of multiple vectors
35 ;; ---- Moves of predicates
36 ;; ---- Moves relating to the FFR
37 ;;
38 ;; == Loads
39 ;; ---- Normal contiguous loads
40 ;; ---- Extending contiguous loads
41 ;; ---- First-faulting contiguous loads
42 ;; ---- First-faulting extending contiguous loads
43 ;; ---- Non-temporal contiguous loads
44 ;; ---- Normal gather loads
45 ;; ---- Extending gather loads
46 ;; ---- First-faulting gather loads
47 ;; ---- First-faulting extending gather loads
48 ;;
49 ;; == Prefetches
50 ;; ---- Contiguous prefetches
51 ;; ---- Gather prefetches
52 ;;
53 ;; == Stores
54 ;; ---- Normal contiguous stores
55 ;; ---- Truncating contiguous stores
56 ;; ---- Non-temporal contiguous stores
57 ;; ---- Normal scatter stores
58 ;; ---- Truncating scatter stores
59 ;;
60 ;; == Vector creation
61 ;; ---- [INT,FP] Duplicate element
62 ;; ---- [INT,FP] Initialize from individual elements
63 ;; ---- [INT] Linear series
64 ;; ---- [PRED] Duplicate element
65 ;;
66 ;; == Vector decomposition
67 ;; ---- [INT,FP] Extract index
68 ;; ---- [INT,FP] Extract active element
69 ;; ---- [PRED] Extract index
70 ;;
71 ;; == Unary arithmetic
72 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
73 ;; ---- [INT] General unary arithmetic corresponding to unspecs
74 ;; ---- [INT] Sign and zero extension
75 ;; ---- [INT] Truncation
76 ;; ---- [INT] Logical inverse
77 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
78 ;; ---- [FP] General unary arithmetic corresponding to unspecs
79 ;; ---- [FP] Square root
80 ;; ---- [FP] Reciprocal square root
81 ;; ---- [PRED] Inverse
82
83 ;; == Binary arithmetic
84 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
85 ;; ---- [INT] Addition
86 ;; ---- [INT] Subtraction
87 ;; ---- [INT] Take address
88 ;; ---- [INT] Absolute difference
89 ;; ---- [INT] Saturating addition and subtraction
90 ;; ---- [INT] Highpart multiplication
91 ;; ---- [INT] Division
92 ;; ---- [INT] Binary logical operations
93 ;; ---- [INT] Binary logical operations (inverted second input)
94 ;; ---- [INT] Shifts (rounding towards -Inf)
95 ;; ---- [INT] Shifts (rounding towards 0)
96 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
97 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
98 ;; ---- [FP] General binary arithmetic corresponding to unspecs
99 ;; ---- [FP] Addition
100 ;; ---- [FP] Complex addition
101 ;; ---- [FP] Subtraction
102 ;; ---- [FP] Absolute difference
103 ;; ---- [FP] Multiplication
104 ;; ---- [FP] Division
105 ;; ---- [FP] Binary logical operations
106 ;; ---- [FP] Sign copying
107 ;; ---- [FP] Maximum and minimum
108 ;; ---- [PRED] Binary logical operations
109 ;; ---- [PRED] Binary logical operations (inverted second input)
110 ;; ---- [PRED] Binary logical operations (inverted result)
111 ;;
112 ;; == Ternary arithmetic
113 ;; ---- [INT] MLA and MAD
114 ;; ---- [INT] MLS and MSB
115 ;; ---- [INT] Dot product
116 ;; ---- [INT] Sum of absolute differences
117 ;; ---- [INT] Matrix multiply-accumulate
118 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
119 ;; ---- [FP] Complex multiply-add
120 ;; ---- [FP] Trigonometric multiply-add
121 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
122 ;; ---- [FP] Matrix multiply-accumulate
123 ;;
124 ;; == Comparisons and selects
125 ;; ---- [INT,FP] Select based on predicates
126 ;; ---- [INT,FP] Compare and select
127 ;; ---- [INT] Comparisons
128 ;; ---- [INT] While tests
129 ;; ---- [FP] Direct comparisons
130 ;; ---- [FP] Absolute comparisons
131 ;; ---- [PRED] Select
132 ;; ---- [PRED] Test bits
133 ;;
134 ;; == Reductions
135 ;; ---- [INT,FP] Conditional reductions
136 ;; ---- [INT] Tree reductions
137 ;; ---- [FP] Tree reductions
138 ;; ---- [FP] Left-to-right reductions
139 ;;
140 ;; == Permutes
141 ;; ---- [INT,FP] General permutes
142 ;; ---- [INT,FP] Special-purpose unary permutes
143 ;; ---- [INT,FP] Special-purpose binary permutes
144 ;; ---- [PRED] Special-purpose unary permutes
145 ;; ---- [PRED] Special-purpose binary permutes
146 ;;
147 ;; == Conversions
148 ;; ---- [INT<-INT] Packs
149 ;; ---- [INT<-INT] Unpacks
150 ;; ---- [INT<-FP] Conversions
151 ;; ---- [INT<-FP] Packs
152 ;; ---- [INT<-FP] Unpacks
153 ;; ---- [FP<-INT] Conversions
154 ;; ---- [FP<-INT] Packs
155 ;; ---- [FP<-INT] Unpacks
156 ;; ---- [FP<-FP] Packs
157 ;; ---- [FP<-FP] Packs (bfloat16)
158 ;; ---- [FP<-FP] Unpacks
159 ;; ---- [PRED<-PRED] Packs
160 ;; ---- [PRED<-PRED] Unpacks
161 ;;
162 ;; == Vector partitioning
163 ;; ---- [PRED] Unary partitioning
164 ;; ---- [PRED] Binary partitioning
165 ;; ---- [PRED] Scalarization
166 ;;
167 ;; == Counting elements
168 ;; ---- [INT] Count elements in a pattern (scalar)
169 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
170 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
171 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
172 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
173 ;; ---- [INT] Count elements in a predicate (scalar)
174 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
175 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
176 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
177 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
178
179 ;; =========================================================================
180 ;; == General notes
181 ;; =========================================================================
182 ;;
183 ;; -------------------------------------------------------------------------
184 ;; ---- Note on the handling of big-endian SVE
185 ;; -------------------------------------------------------------------------
186 ;;
187 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
188 ;; same way as movdi or movti would: the first byte of memory goes
189 ;; into the most significant byte of the register and the last byte
190 ;; of memory goes into the least significant byte of the register.
191 ;; This is the most natural ordering for Advanced SIMD and matches
192 ;; the ABI layout for 64-bit and 128-bit vector types.
193 ;;
194 ;; As a result, the order of bytes within the register is what GCC
195 ;; expects for a big-endian target, and subreg offsets therefore work
196 ;; as expected, with the first element in memory having subreg offset 0
197 ;; and the last element in memory having the subreg offset associated
198 ;; with a big-endian lowpart. However, this ordering also means that
199 ;; GCC's lane numbering does not match the architecture's numbering:
200 ;; GCC always treats the element at the lowest address in memory
201 ;; (subreg offset 0) as element 0, while the architecture treats
202 ;; the least significant end of the register as element 0.
203 ;;
204 ;; The situation for SVE is different. We want the layout of the
205 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
206 ;; logically, a mov<mode> load must be indistinguishable from a
207 ;; maskload<mode> whose mask is all true. We therefore need the
208 ;; register layout to match LD1 rather than LDR. The ABI layout of
209 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
210 ;;
211 ;; As a result, the architecture lane numbering matches GCC's lane
212 ;; numbering, with element 0 always being the first in memory.
213 ;; However:
214 ;;
215 ;; - Applying a subreg offset to a register does not give the element
216 ;; that GCC expects: the first element in memory has the subreg offset
217 ;; associated with a big-endian lowpart while the last element in memory
218 ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
219 ;;
220 ;; - We cannot use LDR and STR for spill slots that might be accessed
221 ;; via subregs, since although the elements have the order GCC expects,
222 ;; the order of the bytes within the elements is different. We instead
223 ;; access spill slots via LD1 and ST1, using secondary reloads to
224 ;; reserve a predicate register.
225 ;;
226 ;; -------------------------------------------------------------------------
227 ;; ---- Description of UNSPEC_PTEST
228 ;; -------------------------------------------------------------------------
229 ;;
230 ;; SVE provides a PTEST instruction for testing the active lanes of a
231 ;; predicate and setting the flags based on the result. The associated
232 ;; condition code tests are:
233 ;;
234 ;; - any (= ne): at least one active bit is set
235 ;; - none (= eq): all active bits are clear (*)
236 ;; - first (= mi): the first active bit is set
237 ;; - nfrst (= pl): the first active bit is clear (*)
238 ;; - last (= cc): the last active bit is set
239 ;; - nlast (= cs): the last active bit is clear (*)
240 ;;
241 ;; where the conditions marked (*) are also true when there are no active
242 ;; lanes (i.e. when the governing predicate is a PFALSE). The flags results
243 ;; of a PTEST use the condition code mode CC_NZC.
244 ;;
245 ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
246 ;; This means that for other predicate modes, we need a governing predicate
247 ;; in which all bits are defined.
248 ;;
249 ;; For example, most predicated .H operations ignore the odd bits of the
250 ;; governing predicate, so that an active lane is represented by the
251 ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
252 ;; any value. To test a .H predicate, we instead need "10" and "00"
253 ;; respectively, so that the condition only tests the even bits of the
254 ;; predicate.
255 ;;
256 ;; Several instructions set the flags as a side-effect, in the same way
257 ;; that a separate PTEST would. It's important for code quality that we
258 ;; use these flags results as often as possible, particularly in the case
259 ;; of WHILE* and RDFFR.
260 ;;
261 ;; Also, some of the instructions that set the flags are unpredicated
262 ;; and instead implicitly test all .B, .H, .S or .D elements, as though
263 ;; they were predicated on a PTRUE of that size. For example, a .S
264 ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
265 ;; would.
266 ;;
267 ;; We therefore need to represent PTEST operations in a way that
268 ;; makes it easy to combine them with both predicated and unpredicated
269 ;; operations, while using a VNx16BI governing predicate for all
270 ;; predicate modes. We do this using:
271 ;;
272 ;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
273 ;;
274 ;; where:
275 ;;
276 ;; - GP is the real VNx16BI governing predicate
277 ;;
278 ;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting
279 ;; GP to CAST_GP are guaranteed to be clear in GP.
280 ;;
281 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
282 ;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
283 ;; SVE_MAYBE_NOT_PTRUE otherwise.
284 ;;
285 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
286 ;;
287 ;; -------------------------------------------------------------------------
288 ;; ---- Description of UNSPEC_PRED_Z
289 ;; -------------------------------------------------------------------------
290 ;;
291 ;; SVE integer comparisons are predicated and return zero for inactive
292 ;; lanes. Sometimes we use them with predicates that are all-true and
293 ;; sometimes we use them with general predicates.
294 ;;
295 ;; The integer comparisons also set the flags and so build-in the effect
296 ;; of a PTEST. We therefore want to be able to combine integer comparison
297 ;; patterns with PTESTs of the result. One difficulty with doing this is
298 ;; that (as noted above) the PTEST is always a .B operation and so can place
299 ;; stronger requirements on the governing predicate than the comparison does.
300 ;;
301 ;; For example, when applying a separate PTEST to the result of a full-vector
302 ;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
303 ;; .B PTRUE. In constrast, the comparison might be predicated on either
304 ;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
305 ;; bits don't matter for .H operations.
306 ;;
307 ;; We therefore can't rely on a full-vector comparison using the same
308 ;; predicate register as a following PTEST. We instead need to remember
309 ;; whether a comparison is known to be a full-vector comparison and use
310 ;; this information in addition to a check for equal predicate registers.
311 ;; At the same time, it's useful to have a common representation for all
312 ;; integer comparisons, so that they can be handled by a single set of
313 ;; patterns.
314 ;;
315 ;; We therefore take a similar approach to UNSPEC_PTEST above and use:
316 ;;
317 ;; (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
318 ;;
319 ;; where:
320 ;;
321 ;; - GP is the governing predicate, of mode <M:VPRED>
322 ;;
323 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
324 ;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
325 ;; otherwise
326 ;;
327 ;; - CODE is the comparison code
328 ;;
329 ;; - OP0 and OP1 are the values being compared, of mode M
330 ;;
331 ;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
332 ;;
333 ;; -------------------------------------------------------------------------
334 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
335 ;; -------------------------------------------------------------------------
336 ;;
337 ;; Many SVE integer operations are predicated. We can generate them
338 ;; from four sources:
339 ;;
340 ;; (1) Using normal unpredicated optabs. In this case we need to create
341 ;; an all-true predicate register to act as the governing predicate
342 ;; for the SVE instruction. There are no inactive lanes, and thus
343 ;; the values of inactive lanes don't matter.
344 ;;
345 ;; (2) Using _x ACLE functions. In this case the function provides a
346 ;; specific predicate and some lanes might be inactive. However,
347 ;; as for (1), the values of the inactive lanes don't matter.
348 ;; We can make extra lanes active without changing the behavior
349 ;; (although for code-quality reasons we should avoid doing so
350 ;; needlessly).
351 ;;
352 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
353 ;; These optabs have a predicate operand that specifies which lanes are
354 ;; active and another operand that provides the values of inactive lanes.
355 ;;
356 ;; (4) Using _m and _z ACLE functions. These functions map to the same
357 ;; patterns as (3), with the _z functions setting inactive lanes to zero
358 ;; and the _m functions setting the inactive lanes to one of the function
359 ;; arguments.
360 ;;
361 ;; For (1) and (2) we need a way of attaching the predicate to a normal
362 ;; unpredicated integer operation. We do this using:
363 ;;
364 ;; (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
365 ;;
366 ;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
367 ;; is a predicate of mode <M:VPRED>. PRED might or might not be a PTRUE;
368 ;; it always is for (1), but might not be for (2).
369 ;;
370 ;; The unspec as a whole has the same value as (code:M ...) when PRED is
371 ;; all-true. It is always semantically valid to replace PRED with a PTRUE,
372 ;; but as noted above, we should only do so if there's a specific benefit.
373 ;;
374 ;; (The "_X" in the unspec is named after the ACLE functions in (2).)
375 ;;
376 ;; For (3) and (4) we can simply use the SVE port's normal representation
377 ;; of a predicate-based select:
378 ;;
379 ;; (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
380 ;;
381 ;; where INACTIVE specifies the values of inactive lanes.
382 ;;
383 ;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
384 ;; than inserting the integer operation directly. This is mostly useful
385 ;; if we want the combine pass to merge an integer operation with an explicit
386 ;; vcond_mask (in other words, with a following SEL instruction). However,
387 ;; it's generally better to merge such operations at the gimple level
388 ;; using (3).
389 ;;
390 ;; -------------------------------------------------------------------------
391 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
392 ;; -------------------------------------------------------------------------
393 ;;
394 ;; Most SVE floating-point operations are predicated. We can generate
395 ;; them from four sources:
396 ;;
397 ;; (1) Using normal unpredicated optabs. In this case we need to create
398 ;; an all-true predicate register to act as the governing predicate
399 ;; for the SVE instruction. There are no inactive lanes, and thus
400 ;; the values of inactive lanes don't matter.
401 ;;
402 ;; (2) Using _x ACLE functions. In this case the function provides a
403 ;; specific predicate and some lanes might be inactive. However,
404 ;; as for (1), the values of the inactive lanes don't matter.
405 ;;
406 ;; The instruction must have the same exception behavior as the
407 ;; function call unless things like command-line flags specifically
408 ;; allow otherwise. For example, with -ffast-math, it is OK to
409 ;; raise exceptions for inactive lanes, but normally it isn't.
410 ;;
411 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
412 ;; These optabs have a predicate operand that specifies which lanes are
413 ;; active and another operand that provides the values of inactive lanes.
414 ;;
415 ;; (4) Using _m and _z ACLE functions. These functions map to the same
416 ;; patterns as (3), with the _z functions setting inactive lanes to zero
417 ;; and the _m functions setting the inactive lanes to one of the function
418 ;; arguments.
419 ;;
420 ;; So:
421 ;;
422 ;; - In (1), the predicate is known to be all true and the pattern can use
423 ;; unpredicated operations where available.
424 ;;
425 ;; - In (2), the predicate might or might not be all true. The pattern can
426 ;; use unpredicated instructions if the predicate is all-true or if things
427 ;; like command-line flags allow exceptions for inactive lanes.
428 ;;
429 ;; - (3) and (4) represent a native SVE predicated operation. Some lanes
430 ;; might be inactive and inactive lanes of the result must have specific
431 ;; values. There is no scope for using unpredicated instructions (and no
432 ;; reason to want to), so the question about command-line flags doesn't
433 ;; arise.
434 ;;
435 ;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
436 ;; in combination with a separate predicate operand, e.g.
437 ;;
438 ;; (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
439 ;; (sqrt:SVE_FULL_F 2 "register_operand" "w")]
440 ;; ....)
441 ;;
442 ;; because (sqrt ...) can raise an exception for any lane, including
443 ;; inactive ones. We therefore need to use an unspec instead.
444 ;;
445 ;; Also, (2) requires some way of distinguishing the case in which the
446 ;; predicate might have inactive lanes and cannot be changed from the
447 ;; case in which the predicate has no inactive lanes or can be changed.
448 ;; This information is also useful when matching combined FP patterns
449 ;; in which the predicates might not be equal.
450 ;;
451 ;; We therefore model FP operations as an unspec of the form:
452 ;;
453 ;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
454 ;;
455 ;; where:
456 ;;
457 ;; - PRED is the governing predicate.
458 ;;
459 ;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the
460 ;; value SVE_STRICT_GP if PRED might have inactive lanes and if those
461 ;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise.
462 ;;
463 ;; - OP0 OP1 ... are the normal input operands to the operation.
464 ;;
465 ;; - MNEMONIC is the mnemonic of the associated SVE instruction.
466 ;;
467 ;; -------------------------------------------------------------------------
468 ;; ---- Note on FFR handling
469 ;; -------------------------------------------------------------------------
470 ;;
471 ;; Logically we want to divide FFR-related instructions into regions
472 ;; that contain exactly one of:
473 ;;
474 ;; - a single write to the FFR
475 ;; - any number of reads from the FFR (but only one read is likely)
476 ;; - any number of LDFF1 and LDNF1 instructions
477 ;;
478 ;; However, LDFF1 and LDNF1 instructions should otherwise behave like
479 ;; normal loads as far as possible. This means that they should be
480 ;; schedulable within a region in the same way that LD1 would be,
481 ;; and they should be deleted as dead if the result is unused. The loads
482 ;; should therefore not write to the FFR, since that would both serialize
483 ;; the loads with respect to each other and keep the loads live for any
484 ;; later RDFFR.
485 ;;
486 ;; We get around this by using a fake "FFR token" (FFRT) to help describe
487 ;; the dependencies. Writing to the FFRT starts a new "FFRT region",
488 ;; while using the FFRT keeps the instruction within its region.
489 ;; Specifically:
490 ;;
491 ;; - Writes start a new FFRT region as well as setting the FFR:
492 ;;
493 ;; W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
494 ;;
495 ;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
496 ;; loads stay within the same FFRT region:
497 ;;
498 ;; L1: load data while using the FFRT
499 ;;
500 ;; In addition, any FFRT region that includes a load also has at least one
501 ;; instance of:
502 ;;
503 ;; L2: FFR = update(FFR, FFRT) [type == no_insn]
504 ;;
505 ;; to make it clear that the region both reads from and writes to the FFR.
506 ;;
507 ;; - Reads do the following:
508 ;;
509 ;; R1: FFRT = FFR [type == no_insn]
510 ;; R2: read from the FFRT
511 ;; R3: FFRT = update(FFRT) [type == no_insn]
512 ;;
513 ;; R1 and R3 both create new FFRT regions, so that previous LDFF1s and
514 ;; LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
515 ;; cannot move backwards across R3.
516 ;;
517 ;; This way, writes are only kept alive by later loads or reads,
518 ;; and write/read pairs fold normally. For two consecutive reads,
519 ;; the first R3 is made dead by the second R1, which in turn becomes
520 ;; redundant with the first R1. We then have:
521 ;;
522 ;; first R1: FFRT = FFR
523 ;; first read from the FFRT
524 ;; second read from the FFRT
525 ;; second R3: FFRT = update(FFRT)
526 ;;
527 ;; i.e. the two FFRT regions collapse into a single one with two
528 ;; independent reads.
529 ;;
530 ;; The model still prevents some valid optimizations though. For example,
531 ;; if all loads in an FFRT region are deleted as dead, nothing would remove
532 ;; the L2 instructions.
533
534 ;; =========================================================================
535 ;; == Moves
536 ;; =========================================================================
537
538 ;; -------------------------------------------------------------------------
539 ;; ---- Moves of single vectors
540 ;; -------------------------------------------------------------------------
541 ;; Includes:
542 ;; - MOV (including aliases)
543 ;; - LD1B (contiguous form)
544 ;; - LD1D ( " " )
545 ;; - LD1H ( " " )
546 ;; - LD1W ( " " )
547 ;; - LDR
548 ;; - ST1B (contiguous form)
549 ;; - ST1D ( " " )
550 ;; - ST1H ( " " )
551 ;; - ST1W ( " " )
552 ;; - STR
553 ;; -------------------------------------------------------------------------
554
555 (define_expand "mov<mode>"
556 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
557 (match_operand:SVE_ALL 1 "general_operand"))]
558 "TARGET_SVE"
559 {
560 /* Use the predicated load and store patterns where possible.
561 This is required for big-endian targets (see the comment at the
562 head of the file) and increases the addressing choices for
563 little-endian. */
564 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
565 && can_create_pseudo_p ())
566 {
567 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
568 DONE;
569 }
570
571 if (CONSTANT_P (operands[1]))
572 {
573 aarch64_expand_mov_immediate (operands[0], operands[1]);
574 DONE;
575 }
576
577 /* Optimize subregs on big-endian targets: we can use REV[BHW]
578 instead of going through memory. */
579 if (BYTES_BIG_ENDIAN
580 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
581 DONE;
582 }
583 )
584
585 (define_expand "movmisalign<mode>"
586 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
587 (match_operand:SVE_ALL 1 "general_operand"))]
588 "TARGET_SVE"
589 {
590 /* Equivalent to a normal move for our purpooses. */
591 emit_move_insn (operands[0], operands[1]);
592 DONE;
593 }
594 )
595
596 ;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
597 ;; little-endian ordering is acceptable. Only allow memory operations during
598 ;; and after RA; before RA we want the predicated load and store patterns to
599 ;; be used instead.
600 (define_insn "*aarch64_sve_mov<mode>_ldr_str"
601 [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
602 (match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
603 "TARGET_SVE
604 && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
605 && ((lra_in_progress || reload_completed)
606 || (register_operand (operands[0], <MODE>mode)
607 && nonmemory_operand (operands[1], <MODE>mode)))"
608 "@
609 ldr\t%0, %1
610 str\t%1, %0
611 mov\t%0.d, %1.d
612 * return aarch64_output_sve_mov_immediate (operands[1]);"
613 )
614
615 ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
616 ;; or vectors for which little-endian ordering isn't acceptable. Memory
617 ;; accesses require secondary reloads.
618 (define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
619 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
620 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
621 "TARGET_SVE
622 && <MODE>mode != VNx16QImode
623 && (BYTES_BIG_ENDIAN
624 || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
625 "@
626 mov\t%0.d, %1.d
627 * return aarch64_output_sve_mov_immediate (operands[1]);"
628 )
629
630 ;; Handle memory reloads for modes that can't use LDR and STR. We use
631 ;; byte PTRUE for all modes to try to encourage reuse. This pattern
632 ;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
633 (define_expand "aarch64_sve_reload_mem"
634 [(parallel
635 [(set (match_operand 0)
636 (match_operand 1))
637 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
638 "TARGET_SVE"
639 {
640 /* Create a PTRUE. */
641 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
642
643 /* Refer to the PTRUE in the appropriate mode for this move. */
644 machine_mode mode = GET_MODE (operands[0]);
645 rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
646
647 /* Emit a predicated load or store. */
648 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
649 DONE;
650 }
651 )
652
653 ;; A predicated move in which the predicate is known to be all-true.
654 ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
655 ;; so changes to this pattern will need changes there as well.
656 (define_insn_and_split "@aarch64_pred_mov<mode>"
657 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
658 (unspec:SVE_ALL
659 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
660 (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
661 UNSPEC_PRED_X))]
662 "TARGET_SVE
663 && (register_operand (operands[0], <MODE>mode)
664 || register_operand (operands[2], <MODE>mode))"
665 "@
666 #
667 ld1<Vesize>\t%0.<Vctype>, %1/z, %2
668 st1<Vesize>\t%2.<Vctype>, %1, %0"
669 "&& register_operand (operands[0], <MODE>mode)
670 && register_operand (operands[2], <MODE>mode)"
671 [(set (match_dup 0) (match_dup 2))]
672 )
673
674 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
675 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
676 ;; for details. We use a special predicate for operand 2 to reduce
677 ;; the number of patterns.
678 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
679 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
680 (unspec:SVE_ALL
681 [(match_operand:VNx16BI 1 "register_operand" "Upl")
682 (match_operand 2 "aarch64_any_register_operand" "w")]
683 UNSPEC_REV_SUBREG))]
684 "TARGET_SVE && BYTES_BIG_ENDIAN"
685 "#"
686 "&& reload_completed"
687 [(const_int 0)]
688 {
689 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
690 DONE;
691 }
692 )
693
694 ;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
695 ;; This is equivalent to a subreg on little-endian targets but not for
696 ;; big-endian; see the comment at the head of the file for details.
697 (define_expand "@aarch64_sve_reinterpret<mode>"
698 [(set (match_operand:SVE_ALL 0 "register_operand")
699 (unspec:SVE_ALL
700 [(match_operand 1 "aarch64_any_register_operand")]
701 UNSPEC_REINTERPRET))]
702 "TARGET_SVE"
703 {
704 machine_mode src_mode = GET_MODE (operands[1]);
705 if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
706 {
707 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
708 DONE;
709 }
710 }
711 )
712
713 ;; A pattern for handling type punning on big-endian targets. We use a
714 ;; special predicate for operand 1 to reduce the number of patterns.
715 (define_insn_and_split "*aarch64_sve_reinterpret<mode>"
716 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
717 (unspec:SVE_ALL
718 [(match_operand 1 "aarch64_any_register_operand" "w")]
719 UNSPEC_REINTERPRET))]
720 "TARGET_SVE"
721 "#"
722 "&& reload_completed"
723 [(set (match_dup 0) (match_dup 1))]
724 {
725 operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
726 }
727 )
728
729 ;; -------------------------------------------------------------------------
730 ;; ---- Moves of multiple vectors
731 ;; -------------------------------------------------------------------------
732 ;; All patterns in this section are synthetic and split to real
733 ;; instructions after reload.
734 ;; -------------------------------------------------------------------------
735
736 (define_expand "mov<mode>"
737 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
738 (match_operand:SVE_STRUCT 1 "general_operand"))]
739 "TARGET_SVE"
740 {
741 /* Big-endian loads and stores need to be done via LD1 and ST1;
742 see the comment at the head of the file for details. */
743 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
744 && BYTES_BIG_ENDIAN)
745 {
746 gcc_assert (can_create_pseudo_p ());
747 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
748 DONE;
749 }
750
751 if (CONSTANT_P (operands[1]))
752 {
753 aarch64_expand_mov_immediate (operands[0], operands[1]);
754 DONE;
755 }
756 }
757 )
758
759 ;; Unpredicated structure moves (little-endian).
760 (define_insn "*aarch64_sve_mov<mode>_le"
761 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
762 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
763 "TARGET_SVE && !BYTES_BIG_ENDIAN"
764 "#"
765 [(set_attr "length" "<insn_length>")]
766 )
767
768 ;; Unpredicated structure moves (big-endian). Memory accesses require
769 ;; secondary reloads.
770 (define_insn "*aarch64_sve_mov<mode>_be"
771 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
772 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
773 "TARGET_SVE && BYTES_BIG_ENDIAN"
774 "#"
775 [(set_attr "length" "<insn_length>")]
776 )
777
778 ;; Split unpredicated structure moves into pieces. This is the same
779 ;; for both big-endian and little-endian code, although it only needs
780 ;; to handle memory operands for little-endian code.
781 (define_split
782 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
783 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
784 "TARGET_SVE && reload_completed"
785 [(const_int 0)]
786 {
787 rtx dest = operands[0];
788 rtx src = operands[1];
789 if (REG_P (dest) && REG_P (src))
790 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
791 else
792 for (unsigned int i = 0; i < <vector_count>; ++i)
793 {
794 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
795 i * BYTES_PER_SVE_VECTOR);
796 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
797 i * BYTES_PER_SVE_VECTOR);
798 emit_insn (gen_rtx_SET (subdest, subsrc));
799 }
800 DONE;
801 }
802 )
803
804 ;; Predicated structure moves. This works for both endiannesses but in
805 ;; practice is only useful for big-endian.
806 (define_insn_and_split "@aarch64_pred_mov<mode>"
807 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
808 (unspec:SVE_STRUCT
809 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
810 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
811 UNSPEC_PRED_X))]
812 "TARGET_SVE
813 && (register_operand (operands[0], <MODE>mode)
814 || register_operand (operands[2], <MODE>mode))"
815 "#"
816 "&& reload_completed"
817 [(const_int 0)]
818 {
819 for (unsigned int i = 0; i < <vector_count>; ++i)
820 {
821 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
822 <MODE>mode,
823 i * BYTES_PER_SVE_VECTOR);
824 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
825 <MODE>mode,
826 i * BYTES_PER_SVE_VECTOR);
827 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
828 }
829 DONE;
830 }
831 [(set_attr "length" "<insn_length>")]
832 )
833
834 ;; -------------------------------------------------------------------------
835 ;; ---- Moves of predicates
836 ;; -------------------------------------------------------------------------
837 ;; Includes:
838 ;; - MOV
839 ;; - LDR
840 ;; - PFALSE
841 ;; - PTRUE
842 ;; - PTRUES
843 ;; - STR
844 ;; -------------------------------------------------------------------------
845
846 (define_expand "mov<mode>"
847 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
848 (match_operand:PRED_ALL 1 "general_operand"))]
849 "TARGET_SVE"
850 {
851 if (GET_CODE (operands[0]) == MEM)
852 operands[1] = force_reg (<MODE>mode, operands[1]);
853
854 if (CONSTANT_P (operands[1]))
855 {
856 aarch64_expand_mov_immediate (operands[0], operands[1]);
857 DONE;
858 }
859 }
860 )
861
862 (define_insn "*aarch64_sve_mov<mode>"
863 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa")
864 (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))]
865 "TARGET_SVE
866 && (register_operand (operands[0], <MODE>mode)
867 || register_operand (operands[1], <MODE>mode))"
868 "@
869 mov\t%0.b, %1.b
870 str\t%1, %0
871 ldr\t%0, %1
872 * return aarch64_output_sve_mov_immediate (operands[1]);"
873 )
874
875 ;; Match PTRUES Pn.B when both the predicate and flags are useful.
876 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
877 [(set (reg:CC_NZC CC_REGNUM)
878 (unspec:CC_NZC
879 [(match_operand 2)
880 (match_operand 3)
881 (const_int SVE_KNOWN_PTRUE)
882 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
883 [(unspec:VNx16BI
884 [(match_operand:SI 4 "const_int_operand")
885 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
886 UNSPEC_PTRUE)])]
887 UNSPEC_PTEST))
888 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
889 (match_dup 1))]
890 "TARGET_SVE"
891 {
892 return aarch64_output_sve_ptrues (operands[1]);
893 }
894 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
895 {
896 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
897 }
898 )
899
900 ;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
901 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
902 [(set (reg:CC_NZC CC_REGNUM)
903 (unspec:CC_NZC
904 [(match_operand 2)
905 (match_operand 3)
906 (const_int SVE_KNOWN_PTRUE)
907 (subreg:PRED_HSD
908 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
909 [(unspec:VNx16BI
910 [(match_operand:SI 4 "const_int_operand")
911 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
912 UNSPEC_PTRUE)]) 0)]
913 UNSPEC_PTEST))
914 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
915 (match_dup 1))]
916 "TARGET_SVE"
917 {
918 return aarch64_output_sve_ptrues (operands[1]);
919 }
920 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
921 {
922 operands[2] = CONSTM1_RTX (VNx16BImode);
923 operands[3] = CONSTM1_RTX (<MODE>mode);
924 }
925 )
926
927 ;; Match PTRUES Pn.B when only the flags result is useful (which is
928 ;; a way of testing VL).
929 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
930 [(set (reg:CC_NZC CC_REGNUM)
931 (unspec:CC_NZC
932 [(match_operand 2)
933 (match_operand 3)
934 (const_int SVE_KNOWN_PTRUE)
935 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
936 [(unspec:VNx16BI
937 [(match_operand:SI 4 "const_int_operand")
938 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
939 UNSPEC_PTRUE)])]
940 UNSPEC_PTEST))
941 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
942 "TARGET_SVE"
943 {
944 return aarch64_output_sve_ptrues (operands[1]);
945 }
946 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
947 {
948 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
949 }
950 )
951
952 ;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
953 ;; a way of testing VL).
954 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
955 [(set (reg:CC_NZC CC_REGNUM)
956 (unspec:CC_NZC
957 [(match_operand 2)
958 (match_operand 3)
959 (const_int SVE_KNOWN_PTRUE)
960 (subreg:PRED_HSD
961 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
962 [(unspec:VNx16BI
963 [(match_operand:SI 4 "const_int_operand")
964 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
965 UNSPEC_PTRUE)]) 0)]
966 UNSPEC_PTEST))
967 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
968 "TARGET_SVE"
969 {
970 return aarch64_output_sve_ptrues (operands[1]);
971 }
972 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
973 {
974 operands[2] = CONSTM1_RTX (VNx16BImode);
975 operands[3] = CONSTM1_RTX (<MODE>mode);
976 }
977 )
978
979 ;; -------------------------------------------------------------------------
980 ;; ---- Moves relating to the FFR
981 ;; -------------------------------------------------------------------------
982 ;; RDFFR
983 ;; RDFFRS
984 ;; SETFFR
985 ;; WRFFR
986 ;; -------------------------------------------------------------------------
987
988 ;; [W1 in the block comment above about FFR handling]
989 ;;
990 ;; Write to the FFR and start a new FFRT scheduling region.
991 (define_insn "aarch64_wrffr"
992 [(set (reg:VNx16BI FFR_REGNUM)
993 (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa"))
994 (set (reg:VNx16BI FFRT_REGNUM)
995 (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
996 "TARGET_SVE"
997 "@
998 setffr
999 wrffr\t%0.b"
1000 )
1001
1002 ;; [L2 in the block comment above about FFR handling]
1003 ;;
1004 ;; Introduce a read from and write to the FFR in the current FFRT region,
1005 ;; so that the FFR value is live on entry to the region and so that the FFR
1006 ;; value visibly changes within the region. This is used (possibly multiple
1007 ;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
1008 (define_insn "aarch64_update_ffr_for_load"
1009 [(set (reg:VNx16BI FFR_REGNUM)
1010 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
1011 (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
1012 "TARGET_SVE"
1013 ""
1014 [(set_attr "type" "no_insn")]
1015 )
1016
1017 ;; [R1 in the block comment above about FFR handling]
1018 ;;
1019 ;; Notionally copy the FFR to the FFRT, so that the current FFR value
1020 ;; can be read from there by the RDFFR instructions below. This acts
1021 ;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
1022 ;; creates a natural dependency with earlier writes.
1023 (define_insn "aarch64_copy_ffr_to_ffrt"
1024 [(set (reg:VNx16BI FFRT_REGNUM)
1025 (reg:VNx16BI FFR_REGNUM))]
1026 "TARGET_SVE"
1027 ""
1028 [(set_attr "type" "no_insn")]
1029 )
1030
1031 ;; [R2 in the block comment above about FFR handling]
1032 ;;
1033 ;; Read the FFR via the FFRT.
1034 (define_insn "aarch64_rdffr"
1035 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1036 (reg:VNx16BI FFRT_REGNUM))]
1037 "TARGET_SVE"
1038 "rdffr\t%0.b"
1039 )
1040
1041 ;; Likewise with zero predication.
1042 (define_insn "aarch64_rdffr_z"
1043 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1044 (and:VNx16BI
1045 (reg:VNx16BI FFRT_REGNUM)
1046 (match_operand:VNx16BI 1 "register_operand" "Upa")))]
1047 "TARGET_SVE"
1048 "rdffr\t%0.b, %1/z"
1049 )
1050
1051 ;; Read the FFR to test for a fault, without using the predicate result.
1052 (define_insn "*aarch64_rdffr_z_ptest"
1053 [(set (reg:CC_NZC CC_REGNUM)
1054 (unspec:CC_NZC
1055 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1056 (match_dup 1)
1057 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1058 (and:VNx16BI
1059 (reg:VNx16BI FFRT_REGNUM)
1060 (match_dup 1))]
1061 UNSPEC_PTEST))
1062 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1063 "TARGET_SVE"
1064 "rdffrs\t%0.b, %1/z"
1065 )
1066
1067 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1068 (define_insn "*aarch64_rdffr_ptest"
1069 [(set (reg:CC_NZC CC_REGNUM)
1070 (unspec:CC_NZC
1071 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1072 (match_dup 1)
1073 (const_int SVE_KNOWN_PTRUE)
1074 (reg:VNx16BI FFRT_REGNUM)]
1075 UNSPEC_PTEST))
1076 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1077 "TARGET_SVE"
1078 "rdffrs\t%0.b, %1/z"
1079 )
1080
1081 ;; Read the FFR with zero predication and test the result.
1082 (define_insn "*aarch64_rdffr_z_cc"
1083 [(set (reg:CC_NZC CC_REGNUM)
1084 (unspec:CC_NZC
1085 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1086 (match_dup 1)
1087 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1088 (and:VNx16BI
1089 (reg:VNx16BI FFRT_REGNUM)
1090 (match_dup 1))]
1091 UNSPEC_PTEST))
1092 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1093 (and:VNx16BI
1094 (reg:VNx16BI FFRT_REGNUM)
1095 (match_dup 1)))]
1096 "TARGET_SVE"
1097 "rdffrs\t%0.b, %1/z"
1098 )
1099
1100 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1101 (define_insn "*aarch64_rdffr_cc"
1102 [(set (reg:CC_NZC CC_REGNUM)
1103 (unspec:CC_NZC
1104 [(match_operand:VNx16BI 1 "register_operand" "Upa")
1105 (match_dup 1)
1106 (const_int SVE_KNOWN_PTRUE)
1107 (reg:VNx16BI FFRT_REGNUM)]
1108 UNSPEC_PTEST))
1109 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1110 (reg:VNx16BI FFRT_REGNUM))]
1111 "TARGET_SVE"
1112 "rdffrs\t%0.b, %1/z"
1113 )
1114
1115 ;; [R3 in the block comment above about FFR handling]
1116 ;;
1117 ;; Arbitrarily update the FFRT after a read from the FFR. This acts as
1118 ;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
1119 (define_insn "aarch64_update_ffrt"
1120 [(set (reg:VNx16BI FFRT_REGNUM)
1121 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
1122 "TARGET_SVE"
1123 ""
1124 [(set_attr "type" "no_insn")]
1125 )
1126
1127 ;; =========================================================================
1128 ;; == Loads
1129 ;; =========================================================================
1130
1131 ;; -------------------------------------------------------------------------
1132 ;; ---- Normal contiguous loads
1133 ;; -------------------------------------------------------------------------
1134 ;; Includes contiguous forms of:
1135 ;; - LD1B
1136 ;; - LD1D
1137 ;; - LD1H
1138 ;; - LD1W
1139 ;; - LD2B
1140 ;; - LD2D
1141 ;; - LD2H
1142 ;; - LD2W
1143 ;; - LD3B
1144 ;; - LD3D
1145 ;; - LD3H
1146 ;; - LD3W
1147 ;; - LD4B
1148 ;; - LD4D
1149 ;; - LD4H
1150 ;; - LD4W
1151 ;; -------------------------------------------------------------------------
1152
1153 ;; Predicated LD1.
1154 (define_insn "maskload<mode><vpred>"
1155 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1156 (unspec:SVE_ALL
1157 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1158 (match_operand:SVE_ALL 1 "memory_operand" "m")]
1159 UNSPEC_LD1_SVE))]
1160 "TARGET_SVE"
1161 "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
1162 )
1163
1164 ;; Unpredicated LD[234].
1165 (define_expand "vec_load_lanes<mode><vsingle>"
1166 [(set (match_operand:SVE_STRUCT 0 "register_operand")
1167 (unspec:SVE_STRUCT
1168 [(match_dup 2)
1169 (match_operand:SVE_STRUCT 1 "memory_operand")]
1170 UNSPEC_LDN))]
1171 "TARGET_SVE"
1172 {
1173 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1174 }
1175 )
1176
1177 ;; Predicated LD[234].
1178 (define_insn "vec_mask_load_lanes<mode><vsingle>"
1179 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
1180 (unspec:SVE_STRUCT
1181 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1182 (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
1183 UNSPEC_LDN))]
1184 "TARGET_SVE"
1185 "ld<vector_count><Vesize>\t%0, %2/z, %1"
1186 )
1187
1188 ;; -------------------------------------------------------------------------
1189 ;; ---- Extending contiguous loads
1190 ;; -------------------------------------------------------------------------
1191 ;; Includes contiguous forms of:
1192 ;; LD1B
1193 ;; LD1H
1194 ;; LD1SB
1195 ;; LD1SH
1196 ;; LD1SW
1197 ;; LD1W
1198 ;; -------------------------------------------------------------------------
1199
1200 ;; Predicated load and extend, with 8 elements per 128-bit block.
1201 (define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1202 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1203 (unspec:SVE_HSDI
1204 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1205 (ANY_EXTEND:SVE_HSDI
1206 (unspec:SVE_PARTIAL_I
1207 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1208 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
1209 UNSPEC_LD1_SVE))]
1210 UNSPEC_PRED_X))]
1211 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1212 "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1213 "&& !CONSTANT_P (operands[3])"
1214 {
1215 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1216 }
1217 )
1218
1219 ;; -------------------------------------------------------------------------
1220 ;; ---- First-faulting contiguous loads
1221 ;; -------------------------------------------------------------------------
1222 ;; Includes contiguous forms of:
1223 ;; - LDFF1B
1224 ;; - LDFF1D
1225 ;; - LDFF1H
1226 ;; - LDFF1W
1227 ;; - LDNF1B
1228 ;; - LDNF1D
1229 ;; - LDNF1H
1230 ;; - LDNF1W
1231 ;; -------------------------------------------------------------------------
1232
1233 ;; Contiguous non-extending first-faulting or non-faulting loads.
1234 (define_insn "@aarch64_ld<fn>f1<mode>"
1235 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1236 (unspec:SVE_FULL
1237 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1238 (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1239 (reg:VNx16BI FFRT_REGNUM)]
1240 SVE_LDFF1_LDNF1))]
1241 "TARGET_SVE"
1242 "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
1243 )
1244
1245 ;; -------------------------------------------------------------------------
1246 ;; ---- First-faulting extending contiguous loads
1247 ;; -------------------------------------------------------------------------
1248 ;; Includes contiguous forms of:
1249 ;; - LDFF1B
1250 ;; - LDFF1H
1251 ;; - LDFF1SB
1252 ;; - LDFF1SH
1253 ;; - LDFF1SW
1254 ;; - LDFF1W
1255 ;; - LDNF1B
1256 ;; - LDNF1H
1257 ;; - LDNF1SB
1258 ;; - LDNF1SH
1259 ;; - LDNF1SW
1260 ;; - LDNF1W
1261 ;; -------------------------------------------------------------------------
1262
1263 ;; Predicated first-faulting or non-faulting load and extend.
1264 (define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1265 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1266 (unspec:SVE_HSDI
1267 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1268 (ANY_EXTEND:SVE_HSDI
1269 (unspec:SVE_PARTIAL_I
1270 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1271 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1272 (reg:VNx16BI FFRT_REGNUM)]
1273 SVE_LDFF1_LDNF1))]
1274 UNSPEC_PRED_X))]
1275 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1276 "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1277 "&& !CONSTANT_P (operands[3])"
1278 {
1279 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1280 }
1281 )
1282
1283 ;; -------------------------------------------------------------------------
1284 ;; ---- Non-temporal contiguous loads
1285 ;; -------------------------------------------------------------------------
1286 ;; Includes:
1287 ;; - LDNT1B
1288 ;; - LDNT1D
1289 ;; - LDNT1H
1290 ;; - LDNT1W
1291 ;; -------------------------------------------------------------------------
1292
1293 ;; Predicated contiguous non-temporal load.
1294 (define_insn "@aarch64_ldnt1<mode>"
1295 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1296 (unspec:SVE_FULL
1297 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1298 (match_operand:SVE_FULL 1 "memory_operand" "m")]
1299 UNSPEC_LDNT1_SVE))]
1300 "TARGET_SVE"
1301 "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
1302 )
1303
1304 ;; -------------------------------------------------------------------------
1305 ;; ---- Normal gather loads
1306 ;; -------------------------------------------------------------------------
1307 ;; Includes gather forms of:
1308 ;; - LD1D
1309 ;; - LD1W
1310 ;; -------------------------------------------------------------------------
1311
1312 ;; Unpredicated gather loads.
1313 (define_expand "gather_load<mode><v_int_container>"
1314 [(set (match_operand:SVE_24 0 "register_operand")
1315 (unspec:SVE_24
1316 [(match_dup 5)
1317 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1318 (match_operand:<V_INT_CONTAINER> 2 "register_operand")
1319 (match_operand:DI 3 "const_int_operand")
1320 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1321 (mem:BLK (scratch))]
1322 UNSPEC_LD1_GATHER))]
1323 "TARGET_SVE"
1324 {
1325 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
1326 }
1327 )
1328
1329 ;; Predicated gather loads for 32-bit elements. Operand 3 is true for
1330 ;; unsigned extension and false for signed extension.
1331 (define_insn "mask_gather_load<mode><v_int_container>"
1332 [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w")
1333 (unspec:SVE_4
1334 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1335 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
1336 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1337 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1338 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1339 (mem:BLK (scratch))]
1340 UNSPEC_LD1_GATHER))]
1341 "TARGET_SVE"
1342 "@
1343 ld1<Vesize>\t%0.s, %5/z, [%2.s]
1344 ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1345 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1346 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1347 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1348 ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1349 )
1350
1351 ;; Predicated gather loads for 64-bit elements. The value of operand 3
1352 ;; doesn't matter in this case.
1353 (define_insn "mask_gather_load<mode><v_int_container>"
1354 [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w")
1355 (unspec:SVE_2
1356 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1357 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
1358 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1359 (match_operand:DI 3 "const_int_operand")
1360 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
1361 (mem:BLK (scratch))]
1362 UNSPEC_LD1_GATHER))]
1363 "TARGET_SVE"
1364 "@
1365 ld1<Vesize>\t%0.d, %5/z, [%2.d]
1366 ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1367 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1368 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1369 )
1370
1371 ;; Likewise, but with the offset being extended from 32 bits.
1372 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
1373 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1374 (unspec:SVE_2
1375 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1376 (match_operand:DI 1 "register_operand" "rk, rk")
1377 (unspec:VNx2DI
1378 [(match_operand 6)
1379 (ANY_EXTEND:VNx2DI
1380 (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1381 UNSPEC_PRED_X)
1382 (match_operand:DI 3 "const_int_operand")
1383 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1384 (mem:BLK (scratch))]
1385 UNSPEC_LD1_GATHER))]
1386 "TARGET_SVE"
1387 "@
1388 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
1389 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]"
1390 "&& !CONSTANT_P (operands[6])"
1391 {
1392 operands[6] = CONSTM1_RTX (VNx2BImode);
1393 }
1394 )
1395
1396 ;; Likewise, but with the offset being truncated to 32 bits and then
1397 ;; sign-extended.
1398 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
1399 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1400 (unspec:SVE_2
1401 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1402 (match_operand:DI 1 "register_operand" "rk, rk")
1403 (unspec:VNx2DI
1404 [(match_operand 6)
1405 (sign_extend:VNx2DI
1406 (truncate:VNx2SI
1407 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1408 UNSPEC_PRED_X)
1409 (match_operand:DI 3 "const_int_operand")
1410 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1411 (mem:BLK (scratch))]
1412 UNSPEC_LD1_GATHER))]
1413 "TARGET_SVE"
1414 "@
1415 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1416 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1417 "&& !CONSTANT_P (operands[6])"
1418 {
1419 operands[6] = CONSTM1_RTX (VNx2BImode);
1420 }
1421 )
1422
1423 ;; Likewise, but with the offset being truncated to 32 bits and then
1424 ;; zero-extended.
1425 (define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
1426 [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1427 (unspec:SVE_2
1428 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1429 (match_operand:DI 1 "register_operand" "rk, rk")
1430 (and:VNx2DI
1431 (match_operand:VNx2DI 2 "register_operand" "w, w")
1432 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1433 (match_operand:DI 3 "const_int_operand")
1434 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1435 (mem:BLK (scratch))]
1436 UNSPEC_LD1_GATHER))]
1437 "TARGET_SVE"
1438 "@
1439 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1440 ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1441 )
1442
1443 ;; -------------------------------------------------------------------------
1444 ;; ---- Extending gather loads
1445 ;; -------------------------------------------------------------------------
1446 ;; Includes gather forms of:
1447 ;; - LD1B
1448 ;; - LD1H
1449 ;; - LD1SB
1450 ;; - LD1SH
1451 ;; - LD1SW
1452 ;; - LD1W
1453 ;; -------------------------------------------------------------------------
1454
1455 ;; Predicated extending gather loads for 32-bit elements. Operand 3 is
1456 ;; true for unsigned extension and false for signed extension.
1457 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
1458 [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w")
1459 (unspec:SVE_4HSI
1460 [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1461 (ANY_EXTEND:SVE_4HSI
1462 (unspec:SVE_4BHI
1463 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1464 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk")
1465 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1466 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1467 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1468 (mem:BLK (scratch))]
1469 UNSPEC_LD1_GATHER))]
1470 UNSPEC_PRED_X))]
1471 "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
1472 "@
1473 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
1474 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1475 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1476 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1477 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1478 ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1479 "&& !CONSTANT_P (operands[6])"
1480 {
1481 operands[6] = CONSTM1_RTX (VNx4BImode);
1482 }
1483 )
1484
1485 ;; Predicated extending gather loads for 64-bit elements. The value of
1486 ;; operand 3 doesn't matter in this case.
1487 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
1488 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w")
1489 (unspec:SVE_2HSDI
1490 [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1491 (ANY_EXTEND:SVE_2HSDI
1492 (unspec:SVE_2BHSI
1493 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1494 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk")
1495 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1496 (match_operand:DI 3 "const_int_operand")
1497 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i")
1498 (mem:BLK (scratch))]
1499 UNSPEC_LD1_GATHER))]
1500 UNSPEC_PRED_X))]
1501 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1502 "@
1503 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
1504 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1505 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
1506 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1507 "&& !CONSTANT_P (operands[6])"
1508 {
1509 operands[6] = CONSTM1_RTX (VNx2BImode);
1510 }
1511 )
1512
1513 ;; Likewise, but with the offset being extended from 32 bits.
1514 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
1515 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1516 (unspec:SVE_2HSDI
1517 [(match_operand 6)
1518 (ANY_EXTEND:SVE_2HSDI
1519 (unspec:SVE_2BHSI
1520 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1521 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1522 (unspec:VNx2DI
1523 [(match_operand 7)
1524 (ANY_EXTEND2:VNx2DI
1525 (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1526 UNSPEC_PRED_X)
1527 (match_operand:DI 3 "const_int_operand")
1528 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1529 (mem:BLK (scratch))]
1530 UNSPEC_LD1_GATHER))]
1531 UNSPEC_PRED_X))]
1532 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1533 "@
1534 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
1535 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]"
1536 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1537 {
1538 operands[6] = CONSTM1_RTX (VNx2BImode);
1539 operands[7] = CONSTM1_RTX (VNx2BImode);
1540 }
1541 )
1542
1543 ;; Likewise, but with the offset being truncated to 32 bits and then
1544 ;; sign-extended.
1545 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
1546 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1547 (unspec:SVE_2HSDI
1548 [(match_operand 6)
1549 (ANY_EXTEND:SVE_2HSDI
1550 (unspec:SVE_2BHSI
1551 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1552 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1553 (unspec:VNx2DI
1554 [(match_operand 7)
1555 (sign_extend:VNx2DI
1556 (truncate:VNx2SI
1557 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1558 UNSPEC_PRED_X)
1559 (match_operand:DI 3 "const_int_operand")
1560 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1561 (mem:BLK (scratch))]
1562 UNSPEC_LD1_GATHER))]
1563 UNSPEC_PRED_X))]
1564 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1565 "@
1566 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1567 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1568 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1569 {
1570 operands[6] = CONSTM1_RTX (VNx2BImode);
1571 operands[7] = CONSTM1_RTX (VNx2BImode);
1572 }
1573 )
1574
1575 ;; Likewise, but with the offset being truncated to 32 bits and then
1576 ;; zero-extended.
1577 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
1578 [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1579 (unspec:SVE_2HSDI
1580 [(match_operand 7)
1581 (ANY_EXTEND:SVE_2HSDI
1582 (unspec:SVE_2BHSI
1583 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1584 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1585 (and:VNx2DI
1586 (match_operand:VNx2DI 2 "register_operand" "w, w")
1587 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1588 (match_operand:DI 3 "const_int_operand")
1589 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1590 (mem:BLK (scratch))]
1591 UNSPEC_LD1_GATHER))]
1592 UNSPEC_PRED_X))]
1593 "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1594 "@
1595 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1596 ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1597 "&& !CONSTANT_P (operands[7])"
1598 {
1599 operands[7] = CONSTM1_RTX (VNx2BImode);
1600 }
1601 )
1602
1603 ;; -------------------------------------------------------------------------
1604 ;; ---- First-faulting gather loads
1605 ;; -------------------------------------------------------------------------
1606 ;; Includes gather forms of:
1607 ;; - LDFF1D
1608 ;; - LDFF1W
1609 ;; -------------------------------------------------------------------------
1610
1611 ;; Predicated first-faulting gather loads for 32-bit elements. Operand
1612 ;; 3 is true for unsigned extension and false for signed extension.
1613 (define_insn "@aarch64_ldff1_gather<mode>"
1614 [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w")
1615 (unspec:SVE_FULL_S
1616 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1617 (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk")
1618 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1619 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1620 (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i")
1621 (mem:BLK (scratch))
1622 (reg:VNx16BI FFRT_REGNUM)]
1623 UNSPEC_LDFF1_GATHER))]
1624 "TARGET_SVE"
1625 "@
1626 ldff1w\t%0.s, %5/z, [%2.s]
1627 ldff1w\t%0.s, %5/z, [%2.s, #%1]
1628 ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
1629 ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
1630 ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1631 ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1632 )
1633
1634 ;; Predicated first-faulting gather loads for 64-bit elements. The value
1635 ;; of operand 3 doesn't matter in this case.
1636 (define_insn "@aarch64_ldff1_gather<mode>"
1637 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w")
1638 (unspec:SVE_FULL_D
1639 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1640 (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk")
1641 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1642 (match_operand:DI 3 "const_int_operand")
1643 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i")
1644 (mem:BLK (scratch))
1645 (reg:VNx16BI FFRT_REGNUM)]
1646 UNSPEC_LDFF1_GATHER))]
1647 "TARGET_SVE"
1648 "@
1649 ldff1d\t%0.d, %5/z, [%2.d]
1650 ldff1d\t%0.d, %5/z, [%2.d, #%1]
1651 ldff1d\t%0.d, %5/z, [%1, %2.d]
1652 ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1653 )
1654
1655 ;; Likewise, but with the offset being sign-extended from 32 bits.
1656 (define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
1657 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1658 (unspec:SVE_FULL_D
1659 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1660 (match_operand:DI 1 "register_operand" "rk, rk")
1661 (unspec:VNx2DI
1662 [(match_operand 6)
1663 (sign_extend:VNx2DI
1664 (truncate:VNx2SI
1665 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1666 UNSPEC_PRED_X)
1667 (match_operand:DI 3 "const_int_operand")
1668 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1669 (mem:BLK (scratch))
1670 (reg:VNx16BI FFRT_REGNUM)]
1671 UNSPEC_LDFF1_GATHER))]
1672 "TARGET_SVE"
1673 "@
1674 ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
1675 ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1676 "&& !CONSTANT_P (operands[6])"
1677 {
1678 operands[6] = CONSTM1_RTX (VNx2BImode);
1679 }
1680 )
1681
1682 ;; Likewise, but with the offset being zero-extended from 32 bits.
1683 (define_insn "*aarch64_ldff1_gather<mode>_uxtw"
1684 [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1685 (unspec:SVE_FULL_D
1686 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1687 (match_operand:DI 1 "register_operand" "rk, rk")
1688 (and:VNx2DI
1689 (match_operand:VNx2DI 2 "register_operand" "w, w")
1690 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1691 (match_operand:DI 3 "const_int_operand")
1692 (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1693 (mem:BLK (scratch))
1694 (reg:VNx16BI FFRT_REGNUM)]
1695 UNSPEC_LDFF1_GATHER))]
1696 "TARGET_SVE"
1697 "@
1698 ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
1699 ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1700 )
1701
1702 ;; -------------------------------------------------------------------------
1703 ;; ---- First-faulting extending gather loads
1704 ;; -------------------------------------------------------------------------
1705 ;; Includes gather forms of:
1706 ;; - LDFF1B
1707 ;; - LDFF1H
1708 ;; - LDFF1SB
1709 ;; - LDFF1SH
1710 ;; - LDFF1SW
1711 ;; - LDFF1W
1712 ;; -------------------------------------------------------------------------
1713
1714 ;; Predicated extending first-faulting gather loads for 32-bit elements.
1715 ;; Operand 3 is true for unsigned extension and false for signed extension.
1716 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
1717 [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w")
1718 (unspec:VNx4_WIDE
1719 [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1720 (ANY_EXTEND:VNx4_WIDE
1721 (unspec:VNx4_NARROW
1722 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1723 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
1724 (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w")
1725 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1726 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1727 (mem:BLK (scratch))
1728 (reg:VNx16BI FFRT_REGNUM)]
1729 UNSPEC_LDFF1_GATHER))]
1730 UNSPEC_PRED_X))]
1731 "TARGET_SVE"
1732 "@
1733 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
1734 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1735 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1736 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1737 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1738 ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1739 "&& !CONSTANT_P (operands[6])"
1740 {
1741 operands[6] = CONSTM1_RTX (VNx4BImode);
1742 }
1743 )
1744
1745 ;; Predicated extending first-faulting gather loads for 64-bit elements.
1746 ;; The value of operand 3 doesn't matter in this case.
1747 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
1748 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w")
1749 (unspec:VNx2_WIDE
1750 [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1751 (ANY_EXTEND:VNx2_WIDE
1752 (unspec:VNx2_NARROW
1753 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1754 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
1755 (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w")
1756 (match_operand:DI 3 "const_int_operand")
1757 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
1758 (mem:BLK (scratch))
1759 (reg:VNx16BI FFRT_REGNUM)]
1760 UNSPEC_LDFF1_GATHER))]
1761 UNSPEC_PRED_X))]
1762 "TARGET_SVE"
1763 "@
1764 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
1765 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1766 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
1767 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1768 "&& !CONSTANT_P (operands[6])"
1769 {
1770 operands[6] = CONSTM1_RTX (VNx2BImode);
1771 }
1772 )
1773
1774 ;; Likewise, but with the offset being sign-extended from 32 bits.
1775 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
1776 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1777 (unspec:VNx2_WIDE
1778 [(match_operand 6)
1779 (ANY_EXTEND:VNx2_WIDE
1780 (unspec:VNx2_NARROW
1781 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1782 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1783 (unspec:VNx2DI
1784 [(match_operand 7)
1785 (sign_extend:VNx2DI
1786 (truncate:VNx2SI
1787 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1788 UNSPEC_PRED_X)
1789 (match_operand:DI 3 "const_int_operand")
1790 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1791 (mem:BLK (scratch))
1792 (reg:VNx16BI FFRT_REGNUM)]
1793 UNSPEC_LDFF1_GATHER))]
1794 UNSPEC_PRED_X))]
1795 "TARGET_SVE"
1796 "@
1797 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1798 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1799 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1800 {
1801 operands[6] = CONSTM1_RTX (VNx2BImode);
1802 operands[7] = CONSTM1_RTX (VNx2BImode);
1803 }
1804 )
1805
1806 ;; Likewise, but with the offset being zero-extended from 32 bits.
1807 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
1808 [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1809 (unspec:VNx2_WIDE
1810 [(match_operand 7)
1811 (ANY_EXTEND:VNx2_WIDE
1812 (unspec:VNx2_NARROW
1813 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1814 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1815 (and:VNx2DI
1816 (match_operand:VNx2DI 2 "register_operand" "w, w")
1817 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1818 (match_operand:DI 3 "const_int_operand")
1819 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1820 (mem:BLK (scratch))
1821 (reg:VNx16BI FFRT_REGNUM)]
1822 UNSPEC_LDFF1_GATHER))]
1823 UNSPEC_PRED_X))]
1824 "TARGET_SVE"
1825 "@
1826 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1827 ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1828 "&& !CONSTANT_P (operands[7])"
1829 {
1830 operands[7] = CONSTM1_RTX (VNx2BImode);
1831 }
1832 )
1833
1834 ;; =========================================================================
1835 ;; == Prefetches
1836 ;; =========================================================================
1837
1838 ;; -------------------------------------------------------------------------
1839 ;; ---- Contiguous prefetches
1840 ;; -------------------------------------------------------------------------
1841 ;; Includes contiguous forms of:
1842 ;; - PRFB
1843 ;; - PRFD
1844 ;; - PRFH
1845 ;; - PRFW
1846 ;; -------------------------------------------------------------------------
1847
1848 ;; Contiguous predicated prefetches. Operand 2 gives the real prefetch
1849 ;; operation (as an svprfop), with operands 3 and 4 providing distilled
1850 ;; information.
1851 (define_insn "@aarch64_sve_prefetch<mode>"
1852 [(prefetch (unspec:DI
1853 [(match_operand:<VPRED> 0 "register_operand" "Upl")
1854 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
1855 (match_operand:DI 2 "const_int_operand")]
1856 UNSPEC_SVE_PREFETCH)
1857 (match_operand:DI 3 "const_int_operand")
1858 (match_operand:DI 4 "const_int_operand"))]
1859 "TARGET_SVE"
1860 {
1861 operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
1862 return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
1863 }
1864 )
1865
1866 ;; -------------------------------------------------------------------------
1867 ;; ---- Gather prefetches
1868 ;; -------------------------------------------------------------------------
1869 ;; Includes gather forms of:
1870 ;; - PRFB
1871 ;; - PRFD
1872 ;; - PRFH
1873 ;; - PRFW
1874 ;; -------------------------------------------------------------------------
1875
1876 ;; Predicated gather prefetches for 32-bit bases and offsets. The operands
1877 ;; are:
1878 ;; 0: the governing predicate
1879 ;; 1: the scalar component of the address
1880 ;; 2: the vector component of the address
1881 ;; 3: 1 for zero extension, 0 for sign extension
1882 ;; 4: the scale multiplier
1883 ;; 5: a vector zero that identifies the mode of data being accessed
1884 ;; 6: the prefetch operator (an svprfop)
1885 ;; 7: the normal RTL prefetch rw flag
1886 ;; 8: the normal RTL prefetch locality value
1887 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
1888 [(prefetch (unspec:DI
1889 [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1890 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
1891 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
1892 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1893 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1894 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1895 (match_operand:DI 6 "const_int_operand")]
1896 UNSPEC_SVE_PREFETCH_GATHER)
1897 (match_operand:DI 7 "const_int_operand")
1898 (match_operand:DI 8 "const_int_operand"))]
1899 "TARGET_SVE"
1900 {
1901 static const char *const insns[][2] = {
1902 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
1903 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
1904 "prfb", "%0, [%1, %2.s, sxtw]",
1905 "prfb", "%0, [%1, %2.s, uxtw]",
1906 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
1907 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
1908 };
1909 const char *const *parts = insns[which_alternative];
1910 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1911 }
1912 )
1913
1914 ;; Predicated gather prefetches for 64-bit elements. The value of operand 3
1915 ;; doesn't matter in this case.
1916 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
1917 [(prefetch (unspec:DI
1918 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
1919 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
1920 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
1921 (match_operand:DI 3 "const_int_operand")
1922 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
1923 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1924 (match_operand:DI 6 "const_int_operand")]
1925 UNSPEC_SVE_PREFETCH_GATHER)
1926 (match_operand:DI 7 "const_int_operand")
1927 (match_operand:DI 8 "const_int_operand"))]
1928 "TARGET_SVE"
1929 {
1930 static const char *const insns[][2] = {
1931 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
1932 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
1933 "prfb", "%0, [%1, %2.d]",
1934 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
1935 };
1936 const char *const *parts = insns[which_alternative];
1937 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1938 }
1939 )
1940
1941 ;; Likewise, but with the offset being sign-extended from 32 bits.
1942 (define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
1943 [(prefetch (unspec:DI
1944 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
1945 (match_operand:DI 1 "register_operand" "rk, rk")
1946 (unspec:VNx2DI_ONLY
1947 [(match_operand 9)
1948 (sign_extend:VNx2DI
1949 (truncate:VNx2SI
1950 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1951 UNSPEC_PRED_X)
1952 (match_operand:DI 3 "const_int_operand")
1953 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
1954 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1955 (match_operand:DI 6 "const_int_operand")]
1956 UNSPEC_SVE_PREFETCH_GATHER)
1957 (match_operand:DI 7 "const_int_operand")
1958 (match_operand:DI 8 "const_int_operand"))]
1959 "TARGET_SVE"
1960 {
1961 static const char *const insns[][2] = {
1962 "prfb", "%0, [%1, %2.d, sxtw]",
1963 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
1964 };
1965 const char *const *parts = insns[which_alternative];
1966 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1967 }
1968 "&& !rtx_equal_p (operands[0], operands[9])"
1969 {
1970 operands[9] = copy_rtx (operands[0]);
1971 }
1972 )
1973
1974 ;; Likewise, but with the offset being zero-extended from 32 bits.
1975 (define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
1976 [(prefetch (unspec:DI
1977 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
1978 (match_operand:DI 1 "register_operand" "rk, rk")
1979 (and:VNx2DI_ONLY
1980 (match_operand:VNx2DI 2 "register_operand" "w, w")
1981 (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
1982 (match_operand:DI 3 "const_int_operand")
1983 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
1984 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1985 (match_operand:DI 6 "const_int_operand")]
1986 UNSPEC_SVE_PREFETCH_GATHER)
1987 (match_operand:DI 7 "const_int_operand")
1988 (match_operand:DI 8 "const_int_operand"))]
1989 "TARGET_SVE"
1990 {
1991 static const char *const insns[][2] = {
1992 "prfb", "%0, [%1, %2.d, uxtw]",
1993 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
1994 };
1995 const char *const *parts = insns[which_alternative];
1996 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1997 }
1998 )
1999
2000 ;; =========================================================================
2001 ;; == Stores
2002 ;; =========================================================================
2003
2004 ;; -------------------------------------------------------------------------
2005 ;; ---- Normal contiguous stores
2006 ;; -------------------------------------------------------------------------
2007 ;; Includes contiguous forms of:
2008 ;; - ST1B
2009 ;; - ST1D
2010 ;; - ST1H
2011 ;; - ST1W
2012 ;; - ST2B
2013 ;; - ST2D
2014 ;; - ST2H
2015 ;; - ST2W
2016 ;; - ST3B
2017 ;; - ST3D
2018 ;; - ST3H
2019 ;; - ST3W
2020 ;; - ST4B
2021 ;; - ST4D
2022 ;; - ST4H
2023 ;; - ST4W
2024 ;; -------------------------------------------------------------------------
2025
2026 ;; Predicated ST1.
2027 (define_insn "maskstore<mode><vpred>"
2028 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
2029 (unspec:SVE_ALL
2030 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2031 (match_operand:SVE_ALL 1 "register_operand" "w")
2032 (match_dup 0)]
2033 UNSPEC_ST1_SVE))]
2034 "TARGET_SVE"
2035 "st1<Vesize>\t%1.<Vctype>, %2, %0"
2036 )
2037
2038 ;; Unpredicated ST[234]. This is always a full update, so the dependence
2039 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
2040 ;; There doesn't seem to be any obvious benefit to treating the all-true
2041 ;; case differently though. In particular, it's very unlikely that we'll
2042 ;; only find out during RTL that a store_lanes is dead.
2043 (define_expand "vec_store_lanes<mode><vsingle>"
2044 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
2045 (unspec:SVE_STRUCT
2046 [(match_dup 2)
2047 (match_operand:SVE_STRUCT 1 "register_operand")
2048 (match_dup 0)]
2049 UNSPEC_STN))]
2050 "TARGET_SVE"
2051 {
2052 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2053 }
2054 )
2055
2056 ;; Predicated ST[234].
2057 (define_insn "vec_mask_store_lanes<mode><vsingle>"
2058 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
2059 (unspec:SVE_STRUCT
2060 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2061 (match_operand:SVE_STRUCT 1 "register_operand" "w")
2062 (match_dup 0)]
2063 UNSPEC_STN))]
2064 "TARGET_SVE"
2065 "st<vector_count><Vesize>\t%1, %2, %0"
2066 )
2067
2068 ;; -------------------------------------------------------------------------
2069 ;; ---- Truncating contiguous stores
2070 ;; -------------------------------------------------------------------------
2071 ;; Includes:
2072 ;; - ST1B
2073 ;; - ST1H
2074 ;; - ST1W
2075 ;; -------------------------------------------------------------------------
2076
2077 ;; Predicated truncate and store, with 8 elements per 128-bit block.
2078 (define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
2079 [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
2080 (unspec:VNx8_NARROW
2081 [(match_operand:VNx8BI 2 "register_operand" "Upl")
2082 (truncate:VNx8_NARROW
2083 (match_operand:VNx8_WIDE 1 "register_operand" "w"))
2084 (match_dup 0)]
2085 UNSPEC_ST1_SVE))]
2086 "TARGET_SVE"
2087 "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
2088 )
2089
2090 ;; Predicated truncate and store, with 4 elements per 128-bit block.
2091 (define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2092 [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
2093 (unspec:VNx4_NARROW
2094 [(match_operand:VNx4BI 2 "register_operand" "Upl")
2095 (truncate:VNx4_NARROW
2096 (match_operand:VNx4_WIDE 1 "register_operand" "w"))
2097 (match_dup 0)]
2098 UNSPEC_ST1_SVE))]
2099 "TARGET_SVE"
2100 "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
2101 )
2102
2103 ;; Predicated truncate and store, with 2 elements per 128-bit block.
2104 (define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2105 [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
2106 (unspec:VNx2_NARROW
2107 [(match_operand:VNx2BI 2 "register_operand" "Upl")
2108 (truncate:VNx2_NARROW
2109 (match_operand:VNx2_WIDE 1 "register_operand" "w"))
2110 (match_dup 0)]
2111 UNSPEC_ST1_SVE))]
2112 "TARGET_SVE"
2113 "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
2114 )
2115
2116 ;; -------------------------------------------------------------------------
2117 ;; ---- Non-temporal contiguous stores
2118 ;; -------------------------------------------------------------------------
2119 ;; Includes:
2120 ;; - STNT1B
2121 ;; - STNT1D
2122 ;; - STNT1H
2123 ;; - STNT1W
2124 ;; -------------------------------------------------------------------------
2125
2126 (define_insn "@aarch64_stnt1<mode>"
2127 [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
2128 (unspec:SVE_FULL
2129 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2130 (match_operand:SVE_FULL 1 "register_operand" "w")
2131 (match_dup 0)]
2132 UNSPEC_STNT1_SVE))]
2133 "TARGET_SVE"
2134 "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
2135 )
2136
2137 ;; -------------------------------------------------------------------------
2138 ;; ---- Normal scatter stores
2139 ;; -------------------------------------------------------------------------
2140 ;; Includes scatter forms of:
2141 ;; - ST1D
2142 ;; - ST1W
2143 ;; -------------------------------------------------------------------------
2144
2145 ;; Unpredicated scatter stores.
2146 (define_expand "scatter_store<mode><v_int_container>"
2147 [(set (mem:BLK (scratch))
2148 (unspec:BLK
2149 [(match_dup 5)
2150 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2151 (match_operand:<V_INT_CONTAINER> 1 "register_operand")
2152 (match_operand:DI 2 "const_int_operand")
2153 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2154 (match_operand:SVE_24 4 "register_operand")]
2155 UNSPEC_ST1_SCATTER))]
2156 "TARGET_SVE"
2157 {
2158 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
2159 }
2160 )
2161
2162 ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
2163 ;; unsigned extension and false for signed extension.
2164 (define_insn "mask_scatter_store<mode><v_int_container>"
2165 [(set (mem:BLK (scratch))
2166 (unspec:BLK
2167 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2168 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
2169 (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2170 (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2171 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2172 (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
2173 UNSPEC_ST1_SCATTER))]
2174 "TARGET_SVE"
2175 "@
2176 st1<Vesize>\t%4.s, %5, [%1.s]
2177 st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2178 st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2179 st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2180 st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2181 st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2182 )
2183
2184 ;; Predicated scatter stores for 64-bit elements. The value of operand 2
2185 ;; doesn't matter in this case.
2186 (define_insn "mask_scatter_store<mode><v_int_container>"
2187 [(set (mem:BLK (scratch))
2188 (unspec:BLK
2189 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2190 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
2191 (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2192 (match_operand:DI 2 "const_int_operand")
2193 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
2194 (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
2195 UNSPEC_ST1_SCATTER))]
2196 "TARGET_SVE"
2197 "@
2198 st1<Vesize>\t%4.d, %5, [%1.d]
2199 st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2200 st1<Vesize>\t%4.d, %5, [%0, %1.d]
2201 st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2202 )
2203
2204 ;; Likewise, but with the offset being extended from 32 bits.
2205 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
2206 [(set (mem:BLK (scratch))
2207 (unspec:BLK
2208 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2209 (match_operand:DI 0 "register_operand" "rk, rk")
2210 (unspec:VNx2DI
2211 [(match_operand 6)
2212 (ANY_EXTEND:VNx2DI
2213 (match_operand:VNx2SI 1 "register_operand" "w, w"))]
2214 UNSPEC_PRED_X)
2215 (match_operand:DI 2 "const_int_operand")
2216 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2217 (match_operand:SVE_2 4 "register_operand" "w, w")]
2218 UNSPEC_ST1_SCATTER))]
2219 "TARGET_SVE"
2220 "@
2221 st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
2222 st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
2223 "&& !CONSTANT_P (operands[6])"
2224 {
2225 operands[6] = CONSTM1_RTX (<VPRED>mode);
2226 }
2227 )
2228
2229 ;; Likewise, but with the offset being truncated to 32 bits and then
2230 ;; sign-extended.
2231 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
2232 [(set (mem:BLK (scratch))
2233 (unspec:BLK
2234 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2235 (match_operand:DI 0 "register_operand" "rk, rk")
2236 (unspec:VNx2DI
2237 [(match_operand 6)
2238 (sign_extend:VNx2DI
2239 (truncate:VNx2SI
2240 (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2241 UNSPEC_PRED_X)
2242 (match_operand:DI 2 "const_int_operand")
2243 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2244 (match_operand:SVE_2 4 "register_operand" "w, w")]
2245 UNSPEC_ST1_SCATTER))]
2246 "TARGET_SVE"
2247 "@
2248 st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2249 st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2250 "&& !CONSTANT_P (operands[6])"
2251 {
2252 operands[6] = CONSTM1_RTX (<VPRED>mode);
2253 }
2254 )
2255
2256 ;; Likewise, but with the offset being truncated to 32 bits and then
2257 ;; zero-extended.
2258 (define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
2259 [(set (mem:BLK (scratch))
2260 (unspec:BLK
2261 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2262 (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2263 (and:VNx2DI
2264 (match_operand:VNx2DI 1 "register_operand" "w, w")
2265 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2266 (match_operand:DI 2 "const_int_operand")
2267 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2268 (match_operand:SVE_2 4 "register_operand" "w, w")]
2269 UNSPEC_ST1_SCATTER))]
2270 "TARGET_SVE"
2271 "@
2272 st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2273 st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2274 )
2275
2276 ;; -------------------------------------------------------------------------
2277 ;; ---- Truncating scatter stores
2278 ;; -------------------------------------------------------------------------
2279 ;; Includes scatter forms of:
2280 ;; - ST1B
2281 ;; - ST1H
2282 ;; - ST1W
2283 ;; -------------------------------------------------------------------------
2284
2285 ;; Predicated truncating scatter stores for 32-bit elements. Operand 2 is
2286 ;; true for unsigned extension and false for signed extension.
2287 (define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2288 [(set (mem:BLK (scratch))
2289 (unspec:BLK
2290 [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2291 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
2292 (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2293 (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2294 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2295 (truncate:VNx4_NARROW
2296 (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))]
2297 UNSPEC_ST1_SCATTER))]
2298 "TARGET_SVE"
2299 "@
2300 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
2301 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
2302 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2303 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2304 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2305 st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2306 )
2307
2308 ;; Predicated truncating scatter stores for 64-bit elements. The value of
2309 ;; operand 2 doesn't matter in this case.
2310 (define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2311 [(set (mem:BLK (scratch))
2312 (unspec:BLK
2313 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2314 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
2315 (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2316 (match_operand:DI 2 "const_int_operand")
2317 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
2318 (truncate:VNx2_NARROW
2319 (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))]
2320 UNSPEC_ST1_SCATTER))]
2321 "TARGET_SVE"
2322 "@
2323 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
2324 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
2325 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
2326 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2327 )
2328
2329 ;; Likewise, but with the offset being sign-extended from 32 bits.
2330 (define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
2331 [(set (mem:BLK (scratch))
2332 (unspec:BLK
2333 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2334 (match_operand:DI 0 "register_operand" "rk, rk")
2335 (unspec:VNx2DI
2336 [(match_operand 6)
2337 (sign_extend:VNx2DI
2338 (truncate:VNx2SI
2339 (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2340 UNSPEC_PRED_X)
2341 (match_operand:DI 2 "const_int_operand")
2342 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2343 (truncate:VNx2_NARROW
2344 (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2345 UNSPEC_ST1_SCATTER))]
2346 "TARGET_SVE"
2347 "@
2348 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2349 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2350 "&& !rtx_equal_p (operands[5], operands[6])"
2351 {
2352 operands[6] = copy_rtx (operands[5]);
2353 }
2354 )
2355
2356 ;; Likewise, but with the offset being zero-extended from 32 bits.
2357 (define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
2358 [(set (mem:BLK (scratch))
2359 (unspec:BLK
2360 [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2361 (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2362 (and:VNx2DI
2363 (match_operand:VNx2DI 1 "register_operand" "w, w")
2364 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2365 (match_operand:DI 2 "const_int_operand")
2366 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2367 (truncate:VNx2_NARROW
2368 (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2369 UNSPEC_ST1_SCATTER))]
2370 "TARGET_SVE"
2371 "@
2372 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2373 st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2374 )
2375
2376 ;; =========================================================================
2377 ;; == Vector creation
2378 ;; =========================================================================
2379
2380 ;; -------------------------------------------------------------------------
2381 ;; ---- [INT,FP] Duplicate element
2382 ;; -------------------------------------------------------------------------
2383 ;; Includes:
2384 ;; - DUP
2385 ;; - MOV
2386 ;; - LD1RB
2387 ;; - LD1RD
2388 ;; - LD1RH
2389 ;; - LD1RW
2390 ;; - LD1ROB (F64MM)
2391 ;; - LD1ROD (F64MM)
2392 ;; - LD1ROH (F64MM)
2393 ;; - LD1ROW (F64MM)
2394 ;; - LD1RQB
2395 ;; - LD1RQD
2396 ;; - LD1RQH
2397 ;; - LD1RQW
2398 ;; -------------------------------------------------------------------------
2399
2400 (define_expand "vec_duplicate<mode>"
2401 [(parallel
2402 [(set (match_operand:SVE_ALL 0 "register_operand")
2403 (vec_duplicate:SVE_ALL
2404 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2405 (clobber (scratch:VNx16BI))])]
2406 "TARGET_SVE"
2407 {
2408 if (MEM_P (operands[1]))
2409 {
2410 rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
2411 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
2412 CONST0_RTX (<MODE>mode)));
2413 DONE;
2414 }
2415 }
2416 )
2417
2418 ;; Accept memory operands for the benefit of combine, and also in case
2419 ;; the scalar input gets spilled to memory during RA. We want to split
2420 ;; the load at the first opportunity in order to allow the PTRUE to be
2421 ;; optimized with surrounding code.
2422 (define_insn_and_split "*vec_duplicate<mode>_reg"
2423 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
2424 (vec_duplicate:SVE_ALL
2425 (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
2426 (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
2427 "TARGET_SVE"
2428 "@
2429 mov\t%0.<Vetype>, %<vwcore>1
2430 mov\t%0.<Vetype>, %<Vetype>1
2431 #"
2432 "&& MEM_P (operands[1])"
2433 [(const_int 0)]
2434 {
2435 if (GET_CODE (operands[2]) == SCRATCH)
2436 operands[2] = gen_reg_rtx (VNx16BImode);
2437 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2438 rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2439 emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
2440 CONST0_RTX (<MODE>mode)));
2441 DONE;
2442 }
2443 [(set_attr "length" "4,4,8")]
2444 )
2445
2446 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
2447 (define_insn "@aarch64_vec_duplicate_vq<mode>_le"
2448 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2449 (vec_duplicate:SVE_FULL
2450 (match_operand:<V128> 1 "register_operand" "w")))]
2451 "TARGET_SVE && !BYTES_BIG_ENDIAN"
2452 {
2453 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2454 return "dup\t%0.q, %1.q[0]";
2455 }
2456 )
2457
2458 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
2459 ;; The SVE register layout puts memory lane N into (architectural)
2460 ;; register lane N, whereas the Advanced SIMD layout puts the memory
2461 ;; lsb into the register lsb. We therefore have to describe this in rtl
2462 ;; terms as a reverse of the V128 vector followed by a duplicate.
2463 (define_insn "@aarch64_vec_duplicate_vq<mode>_be"
2464 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2465 (vec_duplicate:SVE_FULL
2466 (vec_select:<V128>
2467 (match_operand:<V128> 1 "register_operand" "w")
2468 (match_operand 2 "descending_int_parallel"))))]
2469 "TARGET_SVE
2470 && BYTES_BIG_ENDIAN
2471 && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
2472 GET_MODE_NUNITS (<V128>mode) - 1)"
2473 {
2474 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2475 return "dup\t%0.q, %1.q[0]";
2476 }
2477 )
2478
2479 ;; This is used for vec_duplicate<mode>s from memory, but can also
2480 ;; be used by combine to optimize selects of a vec_duplicate<mode>
2481 ;; with zero.
2482 (define_insn "sve_ld1r<mode>"
2483 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2484 (unspec:SVE_ALL
2485 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2486 (vec_duplicate:SVE_ALL
2487 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
2488 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
2489 UNSPEC_SEL))]
2490 "TARGET_SVE"
2491 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
2492 )
2493
2494 ;; Load 128 bits from memory under predicate control and duplicate to
2495 ;; fill a vector.
2496 (define_insn "@aarch64_sve_ld1rq<mode>"
2497 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2498 (unspec:SVE_FULL
2499 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2500 (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
2501 UNSPEC_LD1RQ))]
2502 "TARGET_SVE"
2503 {
2504 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2505 return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
2506 }
2507 )
2508
2509 (define_insn "@aarch64_sve_ld1ro<mode>"
2510 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2511 (unspec:SVE_FULL
2512 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2513 (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
2514 "UO<Vesize>")]
2515 UNSPEC_LD1RO))]
2516 "TARGET_SVE_F64MM"
2517 {
2518 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2519 return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
2520 }
2521 )
2522
2523 ;; -------------------------------------------------------------------------
2524 ;; ---- [INT,FP] Initialize from individual elements
2525 ;; -------------------------------------------------------------------------
2526 ;; Includes:
2527 ;; - INSR
2528 ;; -------------------------------------------------------------------------
2529
2530 (define_expand "vec_init<mode><Vel>"
2531 [(match_operand:SVE_FULL 0 "register_operand")
2532 (match_operand 1 "")]
2533 "TARGET_SVE"
2534 {
2535 aarch64_sve_expand_vector_init (operands[0], operands[1]);
2536 DONE;
2537 }
2538 )
2539
2540 ;; Shift an SVE vector left and insert a scalar into element 0.
2541 (define_insn "vec_shl_insert_<mode>"
2542 [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w")
2543 (unspec:SVE_FULL
2544 [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w")
2545 (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")]
2546 UNSPEC_INSR))]
2547 "TARGET_SVE"
2548 "@
2549 insr\t%0.<Vetype>, %<vwcore>2
2550 insr\t%0.<Vetype>, %<Vetype>2
2551 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
2552 movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2"
2553 [(set_attr "movprfx" "*,*,yes,yes")]
2554 )
2555
2556 ;; -------------------------------------------------------------------------
2557 ;; ---- [INT] Linear series
2558 ;; -------------------------------------------------------------------------
2559 ;; Includes:
2560 ;; - INDEX
2561 ;; -------------------------------------------------------------------------
2562
2563 (define_insn "vec_series<mode>"
2564 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
2565 (vec_series:SVE_I
2566 (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
2567 (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
2568 "TARGET_SVE"
2569 "@
2570 index\t%0.<Vctype>, #%1, %<vccore>2
2571 index\t%0.<Vctype>, %<vccore>1, #%2
2572 index\t%0.<Vctype>, %<vccore>1, %<vccore>2"
2573 )
2574
2575 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
2576 ;; of an INDEX instruction.
2577 (define_insn "*vec_series<mode>_plus"
2578 [(set (match_operand:SVE_I 0 "register_operand" "=w")
2579 (plus:SVE_I
2580 (vec_duplicate:SVE_I
2581 (match_operand:<VEL> 1 "register_operand" "r"))
2582 (match_operand:SVE_I 2 "immediate_operand")))]
2583 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
2584 {
2585 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
2586 return "index\t%0.<Vctype>, %<vccore>1, #%2";
2587 }
2588 )
2589
2590 ;; -------------------------------------------------------------------------
2591 ;; ---- [PRED] Duplicate element
2592 ;; -------------------------------------------------------------------------
2593 ;; The patterns in this section are synthetic.
2594 ;; -------------------------------------------------------------------------
2595
2596 ;; Implement a predicate broadcast by shifting the low bit of the scalar
2597 ;; input into the top bit and using a WHILELO. An alternative would be to
2598 ;; duplicate the input and do a compare with zero.
2599 (define_expand "vec_duplicate<mode>"
2600 [(set (match_operand:PRED_ALL 0 "register_operand")
2601 (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
2602 "TARGET_SVE"
2603 {
2604 rtx tmp = gen_reg_rtx (DImode);
2605 rtx op1 = gen_lowpart (DImode, operands[1]);
2606 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
2607 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
2608 DONE;
2609 }
2610 )
2611
2612 ;; =========================================================================
2613 ;; == Vector decomposition
2614 ;; =========================================================================
2615
2616 ;; -------------------------------------------------------------------------
2617 ;; ---- [INT,FP] Extract index
2618 ;; -------------------------------------------------------------------------
2619 ;; Includes:
2620 ;; - DUP (Advanced SIMD)
2621 ;; - DUP (SVE)
2622 ;; - EXT (SVE)
2623 ;; - ST1 (Advanced SIMD)
2624 ;; - UMOV (Advanced SIMD)
2625 ;; -------------------------------------------------------------------------
2626
2627 (define_expand "vec_extract<mode><Vel>"
2628 [(set (match_operand:<VEL> 0 "register_operand")
2629 (vec_select:<VEL>
2630 (match_operand:SVE_FULL 1 "register_operand")
2631 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
2632 "TARGET_SVE"
2633 {
2634 poly_int64 val;
2635 if (poly_int_rtx_p (operands[2], &val)
2636 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
2637 {
2638 /* The last element can be extracted with a LASTB and a false
2639 predicate. */
2640 rtx sel = aarch64_pfalse_reg (<VPRED>mode);
2641 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2642 DONE;
2643 }
2644 if (!CONST_INT_P (operands[2]))
2645 {
2646 /* Create an index with operand[2] as the base and -1 as the step.
2647 It will then be zero for the element we care about. */
2648 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
2649 index = force_reg (<VEL_INT>mode, index);
2650 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
2651 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
2652
2653 /* Get a predicate that is true for only that element. */
2654 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
2655 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
2656 rtx sel = gen_reg_rtx (<VPRED>mode);
2657 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
2658
2659 /* Select the element using LASTB. */
2660 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2661 DONE;
2662 }
2663 }
2664 )
2665
2666 ;; Extract element zero. This is a special case because we want to force
2667 ;; the registers to be the same for the second alternative, and then
2668 ;; split the instruction into nothing after RA.
2669 (define_insn_and_split "*vec_extract<mode><Vel>_0"
2670 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2671 (vec_select:<VEL>
2672 (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
2673 (parallel [(const_int 0)])))]
2674 "TARGET_SVE"
2675 {
2676 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2677 switch (which_alternative)
2678 {
2679 case 0:
2680 return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
2681 case 1:
2682 return "#";
2683 case 2:
2684 return "st1\\t{%1.<Vetype>}[0], %0";
2685 default:
2686 gcc_unreachable ();
2687 }
2688 }
2689 "&& reload_completed
2690 && REG_P (operands[0])
2691 && REGNO (operands[0]) == REGNO (operands[1])"
2692 [(const_int 0)]
2693 {
2694 emit_note (NOTE_INSN_DELETED);
2695 DONE;
2696 }
2697 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
2698 )
2699
2700 ;; Extract an element from the Advanced SIMD portion of the register.
2701 ;; We don't just reuse the aarch64-simd.md pattern because we don't
2702 ;; want any change in lane number on big-endian targets.
2703 (define_insn "*vec_extract<mode><Vel>_v128"
2704 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2705 (vec_select:<VEL>
2706 (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
2707 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2708 "TARGET_SVE
2709 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
2710 {
2711 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2712 switch (which_alternative)
2713 {
2714 case 0:
2715 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2716 case 1:
2717 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2718 case 2:
2719 return "st1\\t{%1.<Vetype>}[%2], %0";
2720 default:
2721 gcc_unreachable ();
2722 }
2723 }
2724 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
2725 )
2726
2727 ;; Extract an element in the range of DUP. This pattern allows the
2728 ;; source and destination to be different.
2729 (define_insn "*vec_extract<mode><Vel>_dup"
2730 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2731 (vec_select:<VEL>
2732 (match_operand:SVE_FULL 1 "register_operand" "w")
2733 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2734 "TARGET_SVE
2735 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
2736 {
2737 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2738 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
2739 }
2740 )
2741
2742 ;; Extract an element outside the range of DUP. This pattern requires the
2743 ;; source and destination to be the same.
2744 (define_insn "*vec_extract<mode><Vel>_ext"
2745 [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
2746 (vec_select:<VEL>
2747 (match_operand:SVE_FULL 1 "register_operand" "0, w")
2748 (parallel [(match_operand:SI 2 "const_int_operand")])))]
2749 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
2750 {
2751 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2752 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
2753 return (which_alternative == 0
2754 ? "ext\t%0.b, %0.b, %0.b, #%2"
2755 : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
2756 }
2757 [(set_attr "movprfx" "*,yes")]
2758 )
2759
2760 ;; -------------------------------------------------------------------------
2761 ;; ---- [INT,FP] Extract active element
2762 ;; -------------------------------------------------------------------------
2763 ;; Includes:
2764 ;; - LASTA
2765 ;; - LASTB
2766 ;; -------------------------------------------------------------------------
2767
2768 ;; Extract the last active element of operand 1 into operand 0.
2769 ;; If no elements are active, extract the last inactive element instead.
2770 (define_insn "@extract_<last_op>_<mode>"
2771 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
2772 (unspec:<VEL>
2773 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2774 (match_operand:SVE_FULL 2 "register_operand" "w, w")]
2775 LAST))]
2776 "TARGET_SVE"
2777 "@
2778 last<ab>\t%<vwcore>0, %1, %2.<Vetype>
2779 last<ab>\t%<Vetype>0, %1, %2.<Vetype>"
2780 )
2781
2782 ;; -------------------------------------------------------------------------
2783 ;; ---- [PRED] Extract index
2784 ;; -------------------------------------------------------------------------
2785 ;; The patterns in this section are synthetic.
2786 ;; -------------------------------------------------------------------------
2787
2788 ;; Handle extractions from a predicate by converting to an integer vector
2789 ;; and extracting from there.
2790 (define_expand "vec_extract<vpred><Vel>"
2791 [(match_operand:<VEL> 0 "register_operand")
2792 (match_operand:<VPRED> 1 "register_operand")
2793 (match_operand:SI 2 "nonmemory_operand")
2794 ;; Dummy operand to which we can attach the iterator.
2795 (reg:SVE_FULL_I V0_REGNUM)]
2796 "TARGET_SVE"
2797 {
2798 rtx tmp = gen_reg_rtx (<MODE>mode);
2799 emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
2800 CONST1_RTX (<MODE>mode),
2801 CONST0_RTX (<MODE>mode)));
2802 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
2803 DONE;
2804 }
2805 )
2806
2807 ;; =========================================================================
2808 ;; == Unary arithmetic
2809 ;; =========================================================================
2810
2811 ;; -------------------------------------------------------------------------
2812 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
2813 ;; -------------------------------------------------------------------------
2814 ;; Includes:
2815 ;; - ABS
2816 ;; - CLS (= clrsb)
2817 ;; - CLZ
2818 ;; - CNT (= popcount)
2819 ;; - NEG
2820 ;; - NOT
2821 ;; -------------------------------------------------------------------------
2822
2823 ;; Unpredicated integer unary arithmetic.
2824 (define_expand "<optab><mode>2"
2825 [(set (match_operand:SVE_I 0 "register_operand")
2826 (unspec:SVE_I
2827 [(match_dup 2)
2828 (SVE_INT_UNARY:SVE_I
2829 (match_operand:SVE_I 1 "register_operand"))]
2830 UNSPEC_PRED_X))]
2831 "TARGET_SVE"
2832 {
2833 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2834 }
2835 )
2836
2837 ;; Integer unary arithmetic predicated with a PTRUE.
2838 (define_insn "@aarch64_pred_<optab><mode>"
2839 [(set (match_operand:SVE_I 0 "register_operand" "=w")
2840 (unspec:SVE_I
2841 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2842 (SVE_INT_UNARY:SVE_I
2843 (match_operand:SVE_I 2 "register_operand" "w"))]
2844 UNSPEC_PRED_X))]
2845 "TARGET_SVE"
2846 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2847 )
2848
2849 ;; Predicated integer unary arithmetic with merging.
2850 (define_expand "@cond_<optab><mode>"
2851 [(set (match_operand:SVE_FULL_I 0 "register_operand")
2852 (unspec:SVE_FULL_I
2853 [(match_operand:<VPRED> 1 "register_operand")
2854 (SVE_INT_UNARY:SVE_FULL_I
2855 (match_operand:SVE_FULL_I 2 "register_operand"))
2856 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
2857 UNSPEC_SEL))]
2858 "TARGET_SVE"
2859 )
2860
2861 ;; Predicated integer unary arithmetic, merging with the first input.
2862 (define_insn "*cond_<optab><mode>_2"
2863 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
2864 (unspec:SVE_FULL_I
2865 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2866 (SVE_INT_UNARY:SVE_FULL_I
2867 (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
2868 (match_dup 2)]
2869 UNSPEC_SEL))]
2870 "TARGET_SVE"
2871 "@
2872 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
2873 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2874 [(set_attr "movprfx" "*,yes")]
2875 )
2876
2877 ;; Predicated integer unary arithmetic, merging with an independent value.
2878 ;;
2879 ;; The earlyclobber isn't needed for the first alternative, but omitting
2880 ;; it would only help the case in which operands 2 and 3 are the same,
2881 ;; which is handled above rather than here. Marking all the alternatives
2882 ;; as earlyclobber helps to make the instruction more regular to the
2883 ;; register allocator.
2884 (define_insn "*cond_<optab><mode>_any"
2885 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
2886 (unspec:SVE_FULL_I
2887 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2888 (SVE_INT_UNARY:SVE_FULL_I
2889 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w"))
2890 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2891 UNSPEC_SEL))]
2892 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
2893 "@
2894 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2895 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2896 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2897 [(set_attr "movprfx" "*,yes,yes")]
2898 )
2899
2900 ;; -------------------------------------------------------------------------
2901 ;; ---- [INT] General unary arithmetic corresponding to unspecs
2902 ;; -------------------------------------------------------------------------
2903 ;; Includes
2904 ;; - RBIT
2905 ;; - REVB
2906 ;; - REVH
2907 ;; - REVW
2908 ;; -------------------------------------------------------------------------
2909
2910 ;; Predicated integer unary operations.
2911 (define_insn "@aarch64_pred_<optab><mode>"
2912 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
2913 (unspec:SVE_FULL_I
2914 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2915 (unspec:SVE_FULL_I
2916 [(match_operand:SVE_FULL_I 2 "register_operand" "w")]
2917 SVE_INT_UNARY)]
2918 UNSPEC_PRED_X))]
2919 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
2920 "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2921 )
2922
2923 ;; Predicated integer unary operations with merging.
2924 (define_insn "@cond_<optab><mode>"
2925 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w")
2926 (unspec:SVE_FULL_I
2927 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2928 (unspec:SVE_FULL_I
2929 [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")]
2930 SVE_INT_UNARY)
2931 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2932 UNSPEC_SEL))]
2933 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
2934 "@
2935 <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2936 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2937 movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2938 [(set_attr "movprfx" "*,yes,yes")]
2939 )
2940
2941 ;; -------------------------------------------------------------------------
2942 ;; ---- [INT] Sign and zero extension
2943 ;; -------------------------------------------------------------------------
2944 ;; Includes:
2945 ;; - SXTB
2946 ;; - SXTH
2947 ;; - SXTW
2948 ;; - UXTB
2949 ;; - UXTH
2950 ;; - UXTW
2951 ;; -------------------------------------------------------------------------
2952
2953 ;; Unpredicated sign and zero extension from a narrower mode.
2954 (define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
2955 [(set (match_operand:SVE_HSDI 0 "register_operand")
2956 (unspec:SVE_HSDI
2957 [(match_dup 2)
2958 (ANY_EXTEND:SVE_HSDI
2959 (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
2960 UNSPEC_PRED_X))]
2961 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2962 {
2963 operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
2964 }
2965 )
2966
2967 ;; Predicated sign and zero extension from a narrower mode.
2968 (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
2969 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
2970 (unspec:SVE_HSDI
2971 [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl")
2972 (ANY_EXTEND:SVE_HSDI
2973 (match_operand:SVE_PARTIAL_I 2 "register_operand" "w"))]
2974 UNSPEC_PRED_X))]
2975 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2976 "<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
2977 )
2978
2979 ;; Predicated truncate-and-sign-extend operations.
2980 (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
2981 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
2982 (unspec:SVE_FULL_HSDI
2983 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
2984 (sign_extend:SVE_FULL_HSDI
2985 (truncate:SVE_PARTIAL_I
2986 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")))]
2987 UNSPEC_PRED_X))]
2988 "TARGET_SVE
2989 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2990 "sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
2991 )
2992
2993 ;; Predicated truncate-and-sign-extend operations with merging.
2994 (define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
2995 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w")
2996 (unspec:SVE_FULL_HSDI
2997 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
2998 (sign_extend:SVE_FULL_HSDI
2999 (truncate:SVE_PARTIAL_I
3000 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")))
3001 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3002 UNSPEC_SEL))]
3003 "TARGET_SVE
3004 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3005 "@
3006 sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3007 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3008 movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
3009 [(set_attr "movprfx" "*,yes,yes")]
3010 )
3011
3012 ;; Predicated truncate-and-zero-extend operations, merging with the
3013 ;; first input.
3014 ;;
3015 ;; The canonical form of this operation is an AND of a constant rather
3016 ;; than (zero_extend (truncate ...)).
3017 (define_insn "*cond_uxt<mode>_2"
3018 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3019 (unspec:SVE_FULL_I
3020 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3021 (and:SVE_FULL_I
3022 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3023 (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate"))
3024 (match_dup 2)]
3025 UNSPEC_SEL))]
3026 "TARGET_SVE"
3027 "@
3028 uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
3029 movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3030 [(set_attr "movprfx" "*,yes")]
3031 )
3032
3033 ;; Predicated truncate-and-zero-extend operations, merging with an
3034 ;; independent value.
3035 ;;
3036 ;; The earlyclobber isn't needed for the first alternative, but omitting
3037 ;; it would only help the case in which operands 2 and 4 are the same,
3038 ;; which is handled above rather than here. Marking all the alternatives
3039 ;; as early-clobber helps to make the instruction more regular to the
3040 ;; register allocator.
3041 (define_insn "*cond_uxt<mode>_any"
3042 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
3043 (unspec:SVE_FULL_I
3044 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3045 (and:SVE_FULL_I
3046 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
3047 (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate"))
3048 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3049 UNSPEC_SEL))]
3050 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
3051 "@
3052 uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3053 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3054 movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3055 [(set_attr "movprfx" "*,yes,yes")]
3056 )
3057
3058 ;; -------------------------------------------------------------------------
3059 ;; ---- [INT] Truncation
3060 ;; -------------------------------------------------------------------------
3061 ;; The patterns in this section are synthetic.
3062 ;; -------------------------------------------------------------------------
3063
3064 ;; Truncate to a partial SVE vector from either a full vector or a
3065 ;; wider partial vector. This is a no-op, because we can just ignore
3066 ;; the unused upper bits of the source.
3067 (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
3068 [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
3069 (truncate:SVE_PARTIAL_I
3070 (match_operand:SVE_HSDI 1 "register_operand" "w")))]
3071 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3072 "#"
3073 "&& reload_completed"
3074 [(set (match_dup 0) (match_dup 1))]
3075 {
3076 operands[1] = aarch64_replace_reg_mode (operands[1],
3077 <SVE_PARTIAL_I:MODE>mode);
3078 }
3079 )
3080
3081 ;; -------------------------------------------------------------------------
3082 ;; ---- [INT] Logical inverse
3083 ;; -------------------------------------------------------------------------
3084 ;; Includes:
3085 ;; - CNOT
3086 ;; -------------------------------------------------------------------------
3087
3088 ;; Predicated logical inverse.
3089 (define_expand "@aarch64_pred_cnot<mode>"
3090 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3091 (unspec:SVE_FULL_I
3092 [(unspec:<VPRED>
3093 [(match_operand:<VPRED> 1 "register_operand")
3094 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
3095 (eq:<VPRED>
3096 (match_operand:SVE_FULL_I 3 "register_operand")
3097 (match_dup 4))]
3098 UNSPEC_PRED_Z)
3099 (match_dup 5)
3100 (match_dup 4)]
3101 UNSPEC_SEL))]
3102 "TARGET_SVE"
3103 {
3104 operands[4] = CONST0_RTX (<MODE>mode);
3105 operands[5] = CONST1_RTX (<MODE>mode);
3106 }
3107 )
3108
3109 (define_insn "*cnot<mode>"
3110 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
3111 (unspec:SVE_FULL_I
3112 [(unspec:<VPRED>
3113 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3114 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
3115 (eq:<VPRED>
3116 (match_operand:SVE_FULL_I 2 "register_operand" "w")
3117 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3118 UNSPEC_PRED_Z)
3119 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3120 (match_dup 3)]
3121 UNSPEC_SEL))]
3122 "TARGET_SVE"
3123 "cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3124 )
3125
3126 ;; Predicated logical inverse with merging.
3127 (define_expand "@cond_cnot<mode>"
3128 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3129 (unspec:SVE_FULL_I
3130 [(match_operand:<VPRED> 1 "register_operand")
3131 (unspec:SVE_FULL_I
3132 [(unspec:<VPRED>
3133 [(match_dup 4)
3134 (const_int SVE_KNOWN_PTRUE)
3135 (eq:<VPRED>
3136 (match_operand:SVE_FULL_I 2 "register_operand")
3137 (match_dup 5))]
3138 UNSPEC_PRED_Z)
3139 (match_dup 6)
3140 (match_dup 5)]
3141 UNSPEC_SEL)
3142 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3143 UNSPEC_SEL))]
3144 "TARGET_SVE"
3145 {
3146 operands[4] = CONSTM1_RTX (<VPRED>mode);
3147 operands[5] = CONST0_RTX (<MODE>mode);
3148 operands[6] = CONST1_RTX (<MODE>mode);
3149 }
3150 )
3151
3152 ;; Predicated logical inverse, merging with the first input.
3153 (define_insn_and_rewrite "*cond_cnot<mode>_2"
3154 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3155 (unspec:SVE_FULL_I
3156 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3157 ;; Logical inverse of operand 2 (as above).
3158 (unspec:SVE_FULL_I
3159 [(unspec:<VPRED>
3160 [(match_operand 5)
3161 (const_int SVE_KNOWN_PTRUE)
3162 (eq:<VPRED>
3163 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3164 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3165 UNSPEC_PRED_Z)
3166 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3167 (match_dup 3)]
3168 UNSPEC_SEL)
3169 (match_dup 2)]
3170 UNSPEC_SEL))]
3171 "TARGET_SVE"
3172 "@
3173 cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
3174 movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3175 "&& !CONSTANT_P (operands[5])"
3176 {
3177 operands[5] = CONSTM1_RTX (<VPRED>mode);
3178 }
3179 [(set_attr "movprfx" "*,yes")]
3180 )
3181
3182 ;; Predicated logical inverse, merging with an independent value.
3183 ;;
3184 ;; The earlyclobber isn't needed for the first alternative, but omitting
3185 ;; it would only help the case in which operands 2 and 6 are the same,
3186 ;; which is handled above rather than here. Marking all the alternatives
3187 ;; as earlyclobber helps to make the instruction more regular to the
3188 ;; register allocator.
3189 (define_insn_and_rewrite "*cond_cnot<mode>_any"
3190 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
3191 (unspec:SVE_FULL_I
3192 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3193 ;; Logical inverse of operand 2 (as above).
3194 (unspec:SVE_FULL_I
3195 [(unspec:<VPRED>
3196 [(match_operand 5)
3197 (const_int SVE_KNOWN_PTRUE)
3198 (eq:<VPRED>
3199 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
3200 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3201 UNSPEC_PRED_Z)
3202 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3203 (match_dup 3)]
3204 UNSPEC_SEL)
3205 (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3206 UNSPEC_SEL))]
3207 "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
3208 "@
3209 cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3210 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3211 movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3212 "&& !CONSTANT_P (operands[5])"
3213 {
3214 operands[5] = CONSTM1_RTX (<VPRED>mode);
3215 }
3216 [(set_attr "movprfx" "*,yes,yes")]
3217 )
3218
3219 ;; -------------------------------------------------------------------------
3220 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
3221 ;; -------------------------------------------------------------------------
3222 ;; Includes:
3223 ;; - FEXPA
3224 ;; -------------------------------------------------------------------------
3225
3226 ;; Unpredicated unary operations that take an integer and return a float.
3227 (define_insn "@aarch64_sve_<optab><mode>"
3228 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3229 (unspec:SVE_FULL_F
3230 [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
3231 SVE_FP_UNARY_INT))]
3232 "TARGET_SVE"
3233 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3234 )
3235
3236 ;; -------------------------------------------------------------------------
3237 ;; ---- [FP] General unary arithmetic corresponding to unspecs
3238 ;; -------------------------------------------------------------------------
3239 ;; Includes:
3240 ;; - FABS
3241 ;; - FNEG
3242 ;; - FRECPE
3243 ;; - FRECPX
3244 ;; - FRINTA
3245 ;; - FRINTI
3246 ;; - FRINTM
3247 ;; - FRINTN
3248 ;; - FRINTP
3249 ;; - FRINTX
3250 ;; - FRINTZ
3251 ;; - FRSQRTE
3252 ;; - FSQRT
3253 ;; -------------------------------------------------------------------------
3254
3255 ;; Unpredicated floating-point unary operations.
3256 (define_insn "@aarch64_sve_<optab><mode>"
3257 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3258 (unspec:SVE_FULL_F
3259 [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
3260 SVE_FP_UNARY))]
3261 "TARGET_SVE"
3262 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3263 )
3264
3265 ;; Unpredicated floating-point unary operations.
3266 (define_expand "<optab><mode>2"
3267 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3268 (unspec:SVE_FULL_F
3269 [(match_dup 2)
3270 (const_int SVE_RELAXED_GP)
3271 (match_operand:SVE_FULL_F 1 "register_operand")]
3272 SVE_COND_FP_UNARY_OPTAB))]
3273 "TARGET_SVE"
3274 {
3275 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3276 }
3277 )
3278
3279 ;; Predicated floating-point unary operations.
3280 (define_insn "@aarch64_pred_<optab><mode>"
3281 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3282 (unspec:SVE_FULL_F
3283 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3284 (match_operand:SI 3 "aarch64_sve_gp_strictness")
3285 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
3286 SVE_COND_FP_UNARY))]
3287 "TARGET_SVE"
3288 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3289 )
3290
3291 ;; Predicated floating-point unary arithmetic with merging.
3292 (define_expand "@cond_<optab><mode>"
3293 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3294 (unspec:SVE_FULL_F
3295 [(match_operand:<VPRED> 1 "register_operand")
3296 (unspec:SVE_FULL_F
3297 [(match_dup 1)
3298 (const_int SVE_STRICT_GP)
3299 (match_operand:SVE_FULL_F 2 "register_operand")]
3300 SVE_COND_FP_UNARY)
3301 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3302 UNSPEC_SEL))]
3303 "TARGET_SVE"
3304 )
3305
3306 ;; Predicated floating-point unary arithmetic, merging with the first input.
3307 (define_insn_and_rewrite "*cond_<optab><mode>_2"
3308 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3309 (unspec:SVE_FULL_F
3310 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3311 (unspec:SVE_FULL_F
3312 [(match_operand 3)
3313 (match_operand:SI 4 "aarch64_sve_gp_strictness")
3314 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3315 SVE_COND_FP_UNARY)
3316 (match_dup 2)]
3317 UNSPEC_SEL))]
3318 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])"
3319 "@
3320 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3321 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3322 "&& !rtx_equal_p (operands[1], operands[3])"
3323 {
3324 operands[3] = copy_rtx (operands[1]);
3325 }
3326 [(set_attr "movprfx" "*,yes")]
3327 )
3328
3329 ;; Predicated floating-point unary arithmetic, merging with an independent
3330 ;; value.
3331 ;;
3332 ;; The earlyclobber isn't needed for the first alternative, but omitting
3333 ;; it would only help the case in which operands 2 and 3 are the same,
3334 ;; which is handled above rather than here. Marking all the alternatives
3335 ;; as earlyclobber helps to make the instruction more regular to the
3336 ;; register allocator.
3337 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3338 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
3339 (unspec:SVE_FULL_F
3340 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3341 (unspec:SVE_FULL_F
3342 [(match_operand 4)
3343 (match_operand:SI 5 "aarch64_sve_gp_strictness")
3344 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
3345 SVE_COND_FP_UNARY)
3346 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3347 UNSPEC_SEL))]
3348 "TARGET_SVE
3349 && !rtx_equal_p (operands[2], operands[3])
3350 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
3351 "@
3352 <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3353 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3354 movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3355 "&& !rtx_equal_p (operands[1], operands[4])"
3356 {
3357 operands[4] = copy_rtx (operands[1]);
3358 }
3359 [(set_attr "movprfx" "*,yes,yes")]
3360 )
3361
3362 ;; -------------------------------------------------------------------------
3363 ;; ---- [FP] Square root
3364 ;; -------------------------------------------------------------------------
3365
3366 (define_expand "sqrt<mode>2"
3367 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3368 (unspec:SVE_FULL_F
3369 [(match_dup 2)
3370 (const_int SVE_RELAXED_GP)
3371 (match_operand:SVE_FULL_F 1 "register_operand")]
3372 UNSPEC_COND_FSQRT))]
3373 "TARGET_SVE"
3374 {
3375 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
3376 DONE;
3377 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3378 })
3379
3380 ;; -------------------------------------------------------------------------
3381 ;; ---- [FP] Reciprocal square root
3382 ;; -------------------------------------------------------------------------
3383
3384 (define_expand "rsqrt<mode>2"
3385 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3386 (unspec:SVE_FULL_SDF
3387 [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3388 UNSPEC_RSQRT))]
3389 "TARGET_SVE"
3390 {
3391 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
3392 DONE;
3393 })
3394
3395 (define_expand "@aarch64_rsqrte<mode>"
3396 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3397 (unspec:SVE_FULL_SDF
3398 [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3399 UNSPEC_RSQRTE))]
3400 "TARGET_SVE"
3401 )
3402
3403 (define_expand "@aarch64_rsqrts<mode>"
3404 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3405 (unspec:SVE_FULL_SDF
3406 [(match_operand:SVE_FULL_SDF 1 "register_operand")
3407 (match_operand:SVE_FULL_SDF 2 "register_operand")]
3408 UNSPEC_RSQRTS))]
3409 "TARGET_SVE"
3410 )
3411
3412 ;; -------------------------------------------------------------------------
3413 ;; ---- [PRED] Inverse
3414 ;; -------------------------------------------------------------------------
3415 ;; Includes:
3416 ;; - NOT
3417 ;; -------------------------------------------------------------------------
3418
3419 ;; Unpredicated predicate inverse.
3420 (define_expand "one_cmpl<mode>2"
3421 [(set (match_operand:PRED_ALL 0 "register_operand")
3422 (and:PRED_ALL
3423 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
3424 (match_dup 2)))]
3425 "TARGET_SVE"
3426 {
3427 operands[2] = aarch64_ptrue_reg (<MODE>mode);
3428 }
3429 )
3430
3431 ;; Predicated predicate inverse.
3432 (define_insn "*one_cmpl<mode>3"
3433 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3434 (and:PRED_ALL
3435 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3436 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3437 "TARGET_SVE"
3438 "not\t%0.b, %1/z, %2.b"
3439 )
3440
3441 ;; =========================================================================
3442 ;; == Binary arithmetic
3443 ;; =========================================================================
3444
3445 ;; -------------------------------------------------------------------------
3446 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
3447 ;; -------------------------------------------------------------------------
3448 ;; Includes:
3449 ;; - ADD (merging form only)
3450 ;; - AND (merging form only)
3451 ;; - ASR (merging form only)
3452 ;; - EOR (merging form only)
3453 ;; - LSL (merging form only)
3454 ;; - LSR (merging form only)
3455 ;; - MUL
3456 ;; - ORR (merging form only)
3457 ;; - SMAX
3458 ;; - SMIN
3459 ;; - SQADD (SVE2 merging form only)
3460 ;; - SQSUB (SVE2 merging form only)
3461 ;; - SUB (merging form only)
3462 ;; - UMAX
3463 ;; - UMIN
3464 ;; - UQADD (SVE2 merging form only)
3465 ;; - UQSUB (SVE2 merging form only)
3466 ;; -------------------------------------------------------------------------
3467
3468 ;; Unpredicated integer binary operations that have an immediate form.
3469 (define_expand "<optab><mode>3"
3470 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3471 (unspec:SVE_FULL_I
3472 [(match_dup 3)
3473 (SVE_INT_BINARY_IMM:SVE_FULL_I
3474 (match_operand:SVE_FULL_I 1 "register_operand")
3475 (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
3476 UNSPEC_PRED_X))]
3477 "TARGET_SVE"
3478 {
3479 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3480 }
3481 )
3482
3483 ;; Integer binary operations that have an immediate form, predicated
3484 ;; with a PTRUE. We don't actually need the predicate for the first
3485 ;; and third alternatives, but using Upa or X isn't likely to gain much
3486 ;; and would make the instruction seem less uniform to the register
3487 ;; allocator.
3488 (define_insn_and_split "@aarch64_pred_<optab><mode>"
3489 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
3490 (unspec:SVE_FULL_I
3491 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
3492 (SVE_INT_BINARY_IMM:SVE_FULL_I
3493 (match_operand:SVE_FULL_I 2 "register_operand" "%0, 0, w, w")
3494 (match_operand:SVE_FULL_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
3495 UNSPEC_PRED_X))]
3496 "TARGET_SVE"
3497 "@
3498 #
3499 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3500 #
3501 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3502 ; Split the unpredicated form after reload, so that we don't have
3503 ; the unnecessary PTRUE.
3504 "&& reload_completed
3505 && !register_operand (operands[3], <MODE>mode)"
3506 [(set (match_dup 0)
3507 (SVE_INT_BINARY_IMM:SVE_FULL_I (match_dup 2) (match_dup 3)))]
3508 ""
3509 [(set_attr "movprfx" "*,*,yes,yes")]
3510 )
3511
3512 ;; Unpredicated binary operations with a constant (post-RA only).
3513 ;; These are generated by splitting a predicated instruction whose
3514 ;; predicate is unused.
3515 (define_insn "*post_ra_<optab><mode>3"
3516 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3517 (SVE_INT_BINARY_IMM:SVE_FULL_I
3518 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")
3519 (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
3520 "TARGET_SVE && reload_completed"
3521 "@
3522 <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
3523 movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
3524 [(set_attr "movprfx" "*,yes")]
3525 )
3526
3527 ;; Predicated integer operations with merging.
3528 (define_expand "@cond_<optab><mode>"
3529 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3530 (unspec:SVE_FULL_I
3531 [(match_operand:<VPRED> 1 "register_operand")
3532 (SVE_INT_BINARY:SVE_FULL_I
3533 (match_operand:SVE_FULL_I 2 "register_operand")
3534 (match_operand:SVE_FULL_I 3 "<sve_pred_int_rhs2_operand>"))
3535 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3536 UNSPEC_SEL))]
3537 "TARGET_SVE"
3538 )
3539
3540 ;; Predicated integer operations, merging with the first input.
3541 (define_insn "*cond_<optab><mode>_2"
3542 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3543 (unspec:SVE_FULL_I
3544 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3545 (SVE_INT_BINARY:SVE_FULL_I
3546 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3547 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
3548 (match_dup 2)]
3549 UNSPEC_SEL))]
3550 "TARGET_SVE"
3551 "@
3552 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3553 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3554 [(set_attr "movprfx" "*,yes")]
3555 )
3556
3557 ;; Predicated integer operations, merging with the second input.
3558 (define_insn "*cond_<optab><mode>_3"
3559 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3560 (unspec:SVE_FULL_I
3561 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3562 (SVE_INT_BINARY:SVE_FULL_I
3563 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
3564 (match_operand:SVE_FULL_I 3 "register_operand" "0, w"))
3565 (match_dup 3)]
3566 UNSPEC_SEL))]
3567 "TARGET_SVE"
3568 "@
3569 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3570 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
3571 [(set_attr "movprfx" "*,yes")]
3572 )
3573
3574 ;; Predicated integer operations, merging with an independent value.
3575 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3576 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3577 (unspec:SVE_FULL_I
3578 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3579 (SVE_INT_BINARY:SVE_FULL_I
3580 (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
3581 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))
3582 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3583 UNSPEC_SEL))]
3584 "TARGET_SVE
3585 && !rtx_equal_p (operands[2], operands[4])
3586 && !rtx_equal_p (operands[3], operands[4])"
3587 "@
3588 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3589 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3590 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3591 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3592 #"
3593 "&& reload_completed
3594 && register_operand (operands[4], <MODE>mode)
3595 && !rtx_equal_p (operands[0], operands[4])"
3596 {
3597 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3598 operands[4], operands[1]));
3599 operands[4] = operands[2] = operands[0];
3600 }
3601 [(set_attr "movprfx" "yes")]
3602 )
3603
3604 ;; -------------------------------------------------------------------------
3605 ;; ---- [INT] Addition
3606 ;; -------------------------------------------------------------------------
3607 ;; Includes:
3608 ;; - ADD
3609 ;; - DECB
3610 ;; - DECD
3611 ;; - DECH
3612 ;; - DECW
3613 ;; - INCB
3614 ;; - INCD
3615 ;; - INCH
3616 ;; - INCW
3617 ;; - SUB
3618 ;; -------------------------------------------------------------------------
3619
3620 (define_insn "add<mode>3"
3621 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w")
3622 (plus:SVE_I
3623 (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w")
3624 (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))]
3625 "TARGET_SVE"
3626 "@
3627 add\t%0.<Vetype>, %0.<Vetype>, #%D2
3628 sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3629 * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]);
3630 movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
3631 movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3632 add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3633 [(set_attr "movprfx" "*,*,*,yes,yes,*")]
3634 )
3635
3636 ;; Merging forms are handled through SVE_INT_BINARY.
3637
3638 ;; -------------------------------------------------------------------------
3639 ;; ---- [INT] Subtraction
3640 ;; -------------------------------------------------------------------------
3641 ;; Includes:
3642 ;; - SUB
3643 ;; - SUBR
3644 ;; -------------------------------------------------------------------------
3645
3646 (define_insn "sub<mode>3"
3647 [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
3648 (minus:SVE_I
3649 (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa")
3650 (match_operand:SVE_I 2 "register_operand" "w, 0, w")))]
3651 "TARGET_SVE"
3652 "@
3653 sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
3654 subr\t%0.<Vetype>, %0.<Vetype>, #%D1
3655 movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
3656 [(set_attr "movprfx" "*,*,yes")]
3657 )
3658
3659 ;; Merging forms are handled through SVE_INT_BINARY.
3660
3661 ;; -------------------------------------------------------------------------
3662 ;; ---- [INT] Take address
3663 ;; -------------------------------------------------------------------------
3664 ;; Includes:
3665 ;; - ADR
3666 ;; -------------------------------------------------------------------------
3667
3668 ;; An unshifted and unscaled ADR. This is functionally equivalent to an ADD,
3669 ;; but the svadrb intrinsics should preserve the user's choice.
3670 (define_insn "@aarch64_adr<mode>"
3671 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3672 (unspec:SVE_FULL_SDI
3673 [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
3674 (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
3675 UNSPEC_ADR))]
3676 "TARGET_SVE"
3677 "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
3678 )
3679
3680 ;; Same, but with the offset being sign-extended from the low 32 bits.
3681 (define_insn_and_rewrite "*aarch64_adr_sxtw"
3682 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3683 (unspec:VNx2DI
3684 [(match_operand:VNx2DI 1 "register_operand" "w")
3685 (unspec:VNx2DI
3686 [(match_operand 3)
3687 (sign_extend:VNx2DI
3688 (truncate:VNx2SI
3689 (match_operand:VNx2DI 2 "register_operand" "w")))]
3690 UNSPEC_PRED_X)]
3691 UNSPEC_ADR))]
3692 "TARGET_SVE"
3693 "adr\t%0.d, [%1.d, %2.d, sxtw]"
3694 "&& !CONSTANT_P (operands[3])"
3695 {
3696 operands[3] = CONSTM1_RTX (VNx2BImode);
3697 }
3698 )
3699
3700 ;; Same, but with the offset being zero-extended from the low 32 bits.
3701 (define_insn "*aarch64_adr_uxtw_unspec"
3702 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3703 (unspec:VNx2DI
3704 [(match_operand:VNx2DI 1 "register_operand" "w")
3705 (and:VNx2DI
3706 (match_operand:VNx2DI 2 "register_operand" "w")
3707 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
3708 UNSPEC_ADR))]
3709 "TARGET_SVE"
3710 "adr\t%0.d, [%1.d, %2.d, uxtw]"
3711 )
3712
3713 ;; Same, matching as a PLUS rather than unspec.
3714 (define_insn "*aarch64_adr_uxtw_and"
3715 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3716 (plus:VNx2DI
3717 (and:VNx2DI
3718 (match_operand:VNx2DI 2 "register_operand" "w")
3719 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
3720 (match_operand:VNx2DI 1 "register_operand" "w")))]
3721 "TARGET_SVE"
3722 "adr\t%0.d, [%1.d, %2.d, uxtw]"
3723 )
3724
3725 ;; ADR with a nonzero shift.
3726 (define_expand "@aarch64_adr<mode>_shift"
3727 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
3728 (plus:SVE_FULL_SDI
3729 (unspec:SVE_FULL_SDI
3730 [(match_dup 4)
3731 (ashift:SVE_FULL_SDI
3732 (match_operand:SVE_FULL_SDI 2 "register_operand")
3733 (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3734 UNSPEC_PRED_X)
3735 (match_operand:SVE_FULL_SDI 1 "register_operand")))]
3736 "TARGET_SVE"
3737 {
3738 operands[4] = CONSTM1_RTX (<VPRED>mode);
3739 }
3740 )
3741
3742 (define_insn_and_rewrite "*aarch64_adr<mode>_shift"
3743 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3744 (plus:SVE_FULL_SDI
3745 (unspec:SVE_FULL_SDI
3746 [(match_operand 4)
3747 (ashift:SVE_FULL_SDI
3748 (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
3749 (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3750 UNSPEC_PRED_X)
3751 (match_operand:SVE_FULL_SDI 1 "register_operand" "w")))]
3752 "TARGET_SVE"
3753 "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>, lsl %3]"
3754 "&& !CONSTANT_P (operands[4])"
3755 {
3756 operands[4] = CONSTM1_RTX (<VPRED>mode);
3757 }
3758 )
3759
3760 ;; Same, but with the index being sign-extended from the low 32 bits.
3761 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
3762 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3763 (plus:VNx2DI
3764 (unspec:VNx2DI
3765 [(match_operand 4)
3766 (ashift:VNx2DI
3767 (unspec:VNx2DI
3768 [(match_operand 5)
3769 (sign_extend:VNx2DI
3770 (truncate:VNx2SI
3771 (match_operand:VNx2DI 2 "register_operand" "w")))]
3772 UNSPEC_PRED_X)
3773 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3774 UNSPEC_PRED_X)
3775 (match_operand:VNx2DI 1 "register_operand" "w")))]
3776 "TARGET_SVE"
3777 "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
3778 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3779 {
3780 operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
3781 }
3782 )
3783
3784 ;; Same, but with the index being zero-extended from the low 32 bits.
3785 (define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
3786 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3787 (plus:VNx2DI
3788 (unspec:VNx2DI
3789 [(match_operand 5)
3790 (ashift:VNx2DI
3791 (and:VNx2DI
3792 (match_operand:VNx2DI 2 "register_operand" "w")
3793 (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
3794 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3795 UNSPEC_PRED_X)
3796 (match_operand:VNx2DI 1 "register_operand" "w")))]
3797 "TARGET_SVE"
3798 "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
3799 "&& !CONSTANT_P (operands[5])"
3800 {
3801 operands[5] = CONSTM1_RTX (VNx2BImode);
3802 }
3803 )
3804
3805 ;; -------------------------------------------------------------------------
3806 ;; ---- [INT] Absolute difference
3807 ;; -------------------------------------------------------------------------
3808 ;; Includes:
3809 ;; - SABD
3810 ;; - UABD
3811 ;; -------------------------------------------------------------------------
3812
3813 ;; Unpredicated integer absolute difference.
3814 (define_expand "<su>abd<mode>_3"
3815 [(use (match_operand:SVE_FULL_I 0 "register_operand"))
3816 (USMAX:SVE_FULL_I
3817 (match_operand:SVE_FULL_I 1 "register_operand")
3818 (match_operand:SVE_FULL_I 2 "register_operand"))]
3819 "TARGET_SVE"
3820 {
3821 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
3822 emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
3823 operands[2]));
3824 DONE;
3825 }
3826 )
3827
3828 ;; Predicated integer absolute difference.
3829 (define_insn "@aarch64_pred_<su>abd<mode>"
3830 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3831 (unspec:SVE_FULL_I
3832 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3833 (minus:SVE_FULL_I
3834 (USMAX:SVE_FULL_I
3835 (match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
3836 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
3837 (<max_opp>:SVE_FULL_I
3838 (match_dup 2)
3839 (match_dup 3)))]
3840 UNSPEC_PRED_X))]
3841 "TARGET_SVE"
3842 "@
3843 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3844 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3845 [(set_attr "movprfx" "*,yes")]
3846 )
3847
3848 (define_expand "@aarch64_cond_<su>abd<mode>"
3849 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3850 (unspec:SVE_FULL_I
3851 [(match_operand:<VPRED> 1 "register_operand")
3852 (minus:SVE_FULL_I
3853 (unspec:SVE_FULL_I
3854 [(match_dup 1)
3855 (USMAX:SVE_FULL_I
3856 (match_operand:SVE_FULL_I 2 "register_operand")
3857 (match_operand:SVE_FULL_I 3 "register_operand"))]
3858 UNSPEC_PRED_X)
3859 (unspec:SVE_FULL_I
3860 [(match_dup 1)
3861 (<max_opp>:SVE_FULL_I
3862 (match_dup 2)
3863 (match_dup 3))]
3864 UNSPEC_PRED_X))
3865 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3866 UNSPEC_SEL))]
3867 "TARGET_SVE"
3868 {
3869 if (rtx_equal_p (operands[3], operands[4]))
3870 std::swap (operands[2], operands[3]);
3871 })
3872
3873 ;; Predicated integer absolute difference, merging with the first input.
3874 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
3875 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3876 (unspec:SVE_FULL_I
3877 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3878 (minus:SVE_FULL_I
3879 (unspec:SVE_FULL_I
3880 [(match_operand 4)
3881 (USMAX:SVE_FULL_I
3882 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3883 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
3884 UNSPEC_PRED_X)
3885 (unspec:SVE_FULL_I
3886 [(match_operand 5)
3887 (<max_opp>:SVE_FULL_I
3888 (match_dup 2)
3889 (match_dup 3))]
3890 UNSPEC_PRED_X))
3891 (match_dup 2)]
3892 UNSPEC_SEL))]
3893 "TARGET_SVE"
3894 "@
3895 <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3896 movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3897 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3898 {
3899 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
3900 }
3901 [(set_attr "movprfx" "*,yes")]
3902 )
3903
3904 ;; Predicated integer absolute difference, merging with an independent value.
3905 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
3906 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3907 (unspec:SVE_FULL_I
3908 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3909 (minus:SVE_FULL_I
3910 (unspec:SVE_FULL_I
3911 [(match_operand 5)
3912 (USMAX:SVE_FULL_I
3913 (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
3914 (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))]
3915 UNSPEC_PRED_X)
3916 (unspec:SVE_FULL_I
3917 [(match_operand 6)
3918 (<max_opp>:SVE_FULL_I
3919 (match_dup 2)
3920 (match_dup 3))]
3921 UNSPEC_PRED_X))
3922 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3923 UNSPEC_SEL))]
3924 "TARGET_SVE
3925 && !rtx_equal_p (operands[2], operands[4])
3926 && !rtx_equal_p (operands[3], operands[4])"
3927 "@
3928 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3929 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3930 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3931 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3932 #"
3933 "&& 1"
3934 {
3935 if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
3936 operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
3937 else if (reload_completed
3938 && register_operand (operands[4], <MODE>mode)
3939 && !rtx_equal_p (operands[0], operands[4]))
3940 {
3941 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3942 operands[4], operands[1]));
3943 operands[4] = operands[2] = operands[0];
3944 }
3945 else
3946 FAIL;
3947 }
3948 [(set_attr "movprfx" "yes")]
3949 )
3950
3951 ;; -------------------------------------------------------------------------
3952 ;; ---- [INT] Saturating addition and subtraction
3953 ;; -------------------------------------------------------------------------
3954 ;; - SQADD
3955 ;; - SQSUB
3956 ;; - UQADD
3957 ;; - UQSUB
3958 ;; -------------------------------------------------------------------------
3959
3960 ;; Unpredicated saturating signed addition and subtraction.
3961 (define_insn "@aarch64_sve_<optab><mode>"
3962 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w")
3963 (SBINQOPS:SVE_FULL_I
3964 (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w")
3965 (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))]
3966 "TARGET_SVE"
3967 "@
3968 <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3969 <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
3970 movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3971 movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
3972 <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3973 [(set_attr "movprfx" "*,*,yes,yes,*")]
3974 )
3975
3976 ;; Unpredicated saturating unsigned addition and subtraction.
3977 (define_insn "@aarch64_sve_<optab><mode>"
3978 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w")
3979 (UBINQOPS:SVE_FULL_I
3980 (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w")
3981 (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))]
3982 "TARGET_SVE"
3983 "@
3984 <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3985 movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3986 <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3987 [(set_attr "movprfx" "*,yes,*")]
3988 )
3989
3990 ;; -------------------------------------------------------------------------
3991 ;; ---- [INT] Highpart multiplication
3992 ;; -------------------------------------------------------------------------
3993 ;; Includes:
3994 ;; - SMULH
3995 ;; - UMULH
3996 ;; -------------------------------------------------------------------------
3997
3998 ;; Unpredicated highpart multiplication.
3999 (define_expand "<su>mul<mode>3_highpart"
4000 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4001 (unspec:SVE_FULL_I
4002 [(match_dup 3)
4003 (unspec:SVE_FULL_I
4004 [(match_operand:SVE_FULL_I 1 "register_operand")
4005 (match_operand:SVE_FULL_I 2 "register_operand")]
4006 MUL_HIGHPART)]
4007 UNSPEC_PRED_X))]
4008 "TARGET_SVE"
4009 {
4010 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4011 }
4012 )
4013
4014 ;; Predicated highpart multiplication.
4015 (define_insn "@aarch64_pred_<optab><mode>"
4016 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4017 (unspec:SVE_FULL_I
4018 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4019 (unspec:SVE_FULL_I
4020 [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
4021 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4022 MUL_HIGHPART)]
4023 UNSPEC_PRED_X))]
4024 "TARGET_SVE"
4025 "@
4026 <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4027 movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4028 [(set_attr "movprfx" "*,yes")]
4029 )
4030
4031 ;; Predicated highpart multiplications with merging.
4032 (define_expand "@cond_<optab><mode>"
4033 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4034 (unspec:SVE_FULL_I
4035 [(match_operand:<VPRED> 1 "register_operand")
4036 (unspec:SVE_FULL_I
4037 [(match_operand:SVE_FULL_I 2 "register_operand")
4038 (match_operand:SVE_FULL_I 3 "register_operand")]
4039 MUL_HIGHPART)
4040 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4041 UNSPEC_SEL))]
4042 "TARGET_SVE"
4043 {
4044 /* Only target code is aware of these operations, so we don't need
4045 to handle the fully-general case. */
4046 gcc_assert (rtx_equal_p (operands[2], operands[4])
4047 || CONSTANT_P (operands[4]));
4048 })
4049
4050 ;; Predicated highpart multiplications, merging with the first input.
4051 (define_insn "*cond_<optab><mode>_2"
4052 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4053 (unspec:SVE_FULL_I
4054 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4055 (unspec:SVE_FULL_I
4056 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4057 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4058 MUL_HIGHPART)
4059 (match_dup 2)]
4060 UNSPEC_SEL))]
4061 "TARGET_SVE"
4062 "@
4063 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4064 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4065 [(set_attr "movprfx" "*,yes")])
4066
4067 ;; Predicated highpart multiplications, merging with zero.
4068 (define_insn "*cond_<optab><mode>_z"
4069 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
4070 (unspec:SVE_FULL_I
4071 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4072 (unspec:SVE_FULL_I
4073 [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
4074 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4075 MUL_HIGHPART)
4076 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4077 UNSPEC_SEL))]
4078 "TARGET_SVE"
4079 "@
4080 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4081 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4082 [(set_attr "movprfx" "yes")])
4083
4084 ;; -------------------------------------------------------------------------
4085 ;; ---- [INT] Division
4086 ;; -------------------------------------------------------------------------
4087 ;; Includes:
4088 ;; - SDIV
4089 ;; - SDIVR
4090 ;; - UDIV
4091 ;; - UDIVR
4092 ;; -------------------------------------------------------------------------
4093
4094 ;; Unpredicated integer division.
4095 (define_expand "<optab><mode>3"
4096 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4097 (unspec:SVE_FULL_SDI
4098 [(match_dup 3)
4099 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4100 (match_operand:SVE_FULL_SDI 1 "register_operand")
4101 (match_operand:SVE_FULL_SDI 2 "register_operand"))]
4102 UNSPEC_PRED_X))]
4103 "TARGET_SVE"
4104 {
4105 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4106 }
4107 )
4108
4109 ;; Integer division predicated with a PTRUE.
4110 (define_insn "@aarch64_pred_<optab><mode>"
4111 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w")
4112 (unspec:SVE_FULL_SDI
4113 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4114 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4115 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w")
4116 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))]
4117 UNSPEC_PRED_X))]
4118 "TARGET_SVE"
4119 "@
4120 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4121 <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4122 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4123 [(set_attr "movprfx" "*,*,yes")]
4124 )
4125
4126 ;; Predicated integer division with merging.
4127 (define_expand "@cond_<optab><mode>"
4128 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4129 (unspec:SVE_FULL_SDI
4130 [(match_operand:<VPRED> 1 "register_operand")
4131 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4132 (match_operand:SVE_FULL_SDI 2 "register_operand")
4133 (match_operand:SVE_FULL_SDI 3 "register_operand"))
4134 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4135 UNSPEC_SEL))]
4136 "TARGET_SVE"
4137 )
4138
4139 ;; Predicated integer division, merging with the first input.
4140 (define_insn "*cond_<optab><mode>_2"
4141 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4142 (unspec:SVE_FULL_SDI
4143 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4144 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4145 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w")
4146 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))
4147 (match_dup 2)]
4148 UNSPEC_SEL))]
4149 "TARGET_SVE"
4150 "@
4151 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4152 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4153 [(set_attr "movprfx" "*,yes")]
4154 )
4155
4156 ;; Predicated integer division, merging with the second input.
4157 (define_insn "*cond_<optab><mode>_3"
4158 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4159 (unspec:SVE_FULL_SDI
4160 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4161 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4162 (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w")
4163 (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w"))
4164 (match_dup 3)]
4165 UNSPEC_SEL))]
4166 "TARGET_SVE"
4167 "@
4168 <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4169 movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4170 [(set_attr "movprfx" "*,yes")]
4171 )
4172
4173 ;; Predicated integer division, merging with an independent value.
4174 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4175 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4176 (unspec:SVE_FULL_SDI
4177 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4178 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4179 (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w")
4180 (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w"))
4181 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4182 UNSPEC_SEL))]
4183 "TARGET_SVE
4184 && !rtx_equal_p (operands[2], operands[4])
4185 && !rtx_equal_p (operands[3], operands[4])"
4186 "@
4187 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4188 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4189 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4190 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4191 #"
4192 "&& reload_completed
4193 && register_operand (operands[4], <MODE>mode)
4194 && !rtx_equal_p (operands[0], operands[4])"
4195 {
4196 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4197 operands[4], operands[1]));
4198 operands[4] = operands[2] = operands[0];
4199 }
4200 [(set_attr "movprfx" "yes")]
4201 )
4202
4203 ;; -------------------------------------------------------------------------
4204 ;; ---- [INT] Binary logical operations
4205 ;; -------------------------------------------------------------------------
4206 ;; Includes:
4207 ;; - AND
4208 ;; - EOR
4209 ;; - ORR
4210 ;; -------------------------------------------------------------------------
4211
4212 ;; Unpredicated integer binary logical operations.
4213 (define_insn "<optab><mode>3"
4214 [(set (match_operand:SVE_I 0 "register_operand" "=w, ?w, w")
4215 (LOGICAL:SVE_I
4216 (match_operand:SVE_I 1 "register_operand" "%0, w, w")
4217 (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))]
4218 "TARGET_SVE"
4219 "@
4220 <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4221 movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4222 <logical>\t%0.d, %1.d, %2.d"
4223 [(set_attr "movprfx" "*,yes,*")]
4224 )
4225
4226 ;; Merging forms are handled through SVE_INT_BINARY.
4227
4228 ;; -------------------------------------------------------------------------
4229 ;; ---- [INT] Binary logical operations (inverted second input)
4230 ;; -------------------------------------------------------------------------
4231 ;; Includes:
4232 ;; - BIC
4233 ;; -------------------------------------------------------------------------
4234
4235 ;; Unpredicated BIC.
4236 (define_expand "@aarch64_bic<mode>"
4237 [(set (match_operand:SVE_I 0 "register_operand")
4238 (and:SVE_I
4239 (unspec:SVE_I
4240 [(match_dup 3)
4241 (not:SVE_I (match_operand:SVE_I 2 "register_operand"))]
4242 UNSPEC_PRED_X)
4243 (match_operand:SVE_I 1 "register_operand")))]
4244 "TARGET_SVE"
4245 {
4246 operands[3] = CONSTM1_RTX (<VPRED>mode);
4247 }
4248 )
4249
4250 ;; Predicated BIC.
4251 (define_insn_and_rewrite "*bic<mode>3"
4252 [(set (match_operand:SVE_I 0 "register_operand" "=w")
4253 (and:SVE_I
4254 (unspec:SVE_I
4255 [(match_operand 3)
4256 (not:SVE_I
4257 (match_operand:SVE_I 2 "register_operand" "w"))]
4258 UNSPEC_PRED_X)
4259 (match_operand:SVE_I 1 "register_operand" "w")))]
4260 "TARGET_SVE"
4261 "bic\t%0.d, %1.d, %2.d"
4262 "&& !CONSTANT_P (operands[3])"
4263 {
4264 operands[3] = CONSTM1_RTX (<VPRED>mode);
4265 }
4266 )
4267
4268 ;; Predicated BIC with merging.
4269 (define_expand "@cond_bic<mode>"
4270 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4271 (unspec:SVE_FULL_I
4272 [(match_operand:<VPRED> 1 "register_operand")
4273 (and:SVE_FULL_I
4274 (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
4275 (match_operand:SVE_FULL_I 2 "register_operand"))
4276 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4277 UNSPEC_SEL))]
4278 "TARGET_SVE"
4279 )
4280
4281 ;; Predicated integer BIC, merging with the first input.
4282 (define_insn "*cond_bic<mode>_2"
4283 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4284 (unspec:SVE_FULL_I
4285 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4286 (and:SVE_FULL_I
4287 (not:SVE_FULL_I
4288 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
4289 (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
4290 (match_dup 2)]
4291 UNSPEC_SEL))]
4292 "TARGET_SVE"
4293 "@
4294 bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4295 movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4296 [(set_attr "movprfx" "*,yes")]
4297 )
4298
4299 ;; Predicated integer BIC, merging with an independent value.
4300 (define_insn_and_rewrite "*cond_bic<mode>_any"
4301 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, ?&w")
4302 (unspec:SVE_FULL_I
4303 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4304 (and:SVE_FULL_I
4305 (not:SVE_FULL_I
4306 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, w"))
4307 (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w"))
4308 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4309 UNSPEC_SEL))]
4310 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4311 "@
4312 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4313 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4314 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4315 #"
4316 "&& reload_completed
4317 && register_operand (operands[4], <MODE>mode)
4318 && !rtx_equal_p (operands[0], operands[4])"
4319 {
4320 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4321 operands[4], operands[1]));
4322 operands[4] = operands[2] = operands[0];
4323 }
4324 [(set_attr "movprfx" "yes")]
4325 )
4326
4327 ;; -------------------------------------------------------------------------
4328 ;; ---- [INT] Shifts (rounding towards -Inf)
4329 ;; -------------------------------------------------------------------------
4330 ;; Includes:
4331 ;; - ASR
4332 ;; - ASRR
4333 ;; - LSL
4334 ;; - LSLR
4335 ;; - LSR
4336 ;; - LSRR
4337 ;; -------------------------------------------------------------------------
4338
4339 ;; Unpredicated shift by a scalar, which expands into one of the vector
4340 ;; shifts below.
4341 (define_expand "<ASHIFT:optab><mode>3"
4342 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4343 (ASHIFT:SVE_FULL_I
4344 (match_operand:SVE_FULL_I 1 "register_operand")
4345 (match_operand:<VEL> 2 "general_operand")))]
4346 "TARGET_SVE"
4347 {
4348 rtx amount;
4349 if (CONST_INT_P (operands[2]))
4350 {
4351 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
4352 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
4353 amount = force_reg (<MODE>mode, amount);
4354 }
4355 else
4356 {
4357 amount = gen_reg_rtx (<MODE>mode);
4358 emit_insn (gen_vec_duplicate<mode> (amount,
4359 convert_to_mode (<VEL>mode,
4360 operands[2], 0)));
4361 }
4362 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
4363 DONE;
4364 }
4365 )
4366
4367 ;; Unpredicated shift by a vector.
4368 (define_expand "v<optab><mode>3"
4369 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4370 (unspec:SVE_FULL_I
4371 [(match_dup 3)
4372 (ASHIFT:SVE_FULL_I
4373 (match_operand:SVE_FULL_I 1 "register_operand")
4374 (match_operand:SVE_FULL_I 2 "aarch64_sve_<lr>shift_operand"))]
4375 UNSPEC_PRED_X))]
4376 "TARGET_SVE"
4377 {
4378 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4379 }
4380 )
4381
4382 ;; Shift by a vector, predicated with a PTRUE. We don't actually need
4383 ;; the predicate for the first alternative, but using Upa or X isn't
4384 ;; likely to gain much and would make the instruction seem less uniform
4385 ;; to the register allocator.
4386 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4387 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
4388 (unspec:SVE_FULL_I
4389 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4390 (ASHIFT:SVE_FULL_I
4391 (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w")
4392 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
4393 UNSPEC_PRED_X))]
4394 "TARGET_SVE"
4395 "@
4396 #
4397 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4398 <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
4399 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4400 "&& reload_completed
4401 && !register_operand (operands[3], <MODE>mode)"
4402 [(set (match_dup 0) (ASHIFT:SVE_FULL_I (match_dup 2) (match_dup 3)))]
4403 ""
4404 [(set_attr "movprfx" "*,*,*,yes")]
4405 )
4406
4407 ;; Unpredicated shift operations by a constant (post-RA only).
4408 ;; These are generated by splitting a predicated instruction whose
4409 ;; predicate is unused.
4410 (define_insn "*post_ra_v<optab><mode>3"
4411 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4412 (ASHIFT:SVE_FULL_I
4413 (match_operand:SVE_FULL_I 1 "register_operand" "w")
4414 (match_operand:SVE_FULL_I 2 "aarch64_simd_<lr>shift_imm")))]
4415 "TARGET_SVE && reload_completed"
4416 "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
4417 )
4418
4419 ;; Predicated integer shift, merging with the first input.
4420 (define_insn "*cond_<optab><mode>_2_const"
4421 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4422 (unspec:SVE_FULL_I
4423 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4424 (ASHIFT:SVE_FULL_I
4425 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4426 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm"))
4427 (match_dup 2)]
4428 UNSPEC_SEL))]
4429 "TARGET_SVE"
4430 "@
4431 <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4432 movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4433 [(set_attr "movprfx" "*,yes")]
4434 )
4435
4436 ;; Predicated integer shift, merging with an independent value.
4437 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4438 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, &w, ?&w")
4439 (unspec:SVE_FULL_I
4440 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4441 (ASHIFT:SVE_FULL_I
4442 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
4443 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm"))
4444 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4445 UNSPEC_SEL))]
4446 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4447 "@
4448 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4449 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4450 #"
4451 "&& reload_completed
4452 && register_operand (operands[4], <MODE>mode)
4453 && !rtx_equal_p (operands[0], operands[4])"
4454 {
4455 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4456 operands[4], operands[1]));
4457 operands[4] = operands[2] = operands[0];
4458 }
4459 [(set_attr "movprfx" "yes")]
4460 )
4461
4462 ;; Unpredicated shifts of narrow elements by 64-bit amounts.
4463 (define_insn "@aarch64_sve_<sve_int_op><mode>"
4464 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
4465 (unspec:SVE_FULL_BHSI
4466 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
4467 (match_operand:VNx2DI 2 "register_operand" "w")]
4468 SVE_SHIFT_WIDE))]
4469 "TARGET_SVE"
4470 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
4471 )
4472
4473 ;; Merging predicated shifts of narrow elements by 64-bit amounts.
4474 (define_expand "@cond_<sve_int_op><mode>"
4475 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4476 (unspec:SVE_FULL_BHSI
4477 [(match_operand:<VPRED> 1 "register_operand")
4478 (unspec:SVE_FULL_BHSI
4479 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4480 (match_operand:VNx2DI 3 "register_operand")]
4481 SVE_SHIFT_WIDE)
4482 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
4483 UNSPEC_SEL))]
4484 "TARGET_SVE"
4485 )
4486
4487 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with
4488 ;; the first input.
4489 (define_insn "*cond_<sve_int_op><mode>_m"
4490 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w")
4491 (unspec:SVE_FULL_BHSI
4492 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4493 (unspec:SVE_FULL_BHSI
4494 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4495 (match_operand:VNx2DI 3 "register_operand" "w, w")]
4496 SVE_SHIFT_WIDE)
4497 (match_dup 2)]
4498 UNSPEC_SEL))]
4499 "TARGET_SVE"
4500 "@
4501 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4502 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4503 [(set_attr "movprfx" "*, yes")])
4504
4505 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
4506 (define_insn "*cond_<sve_int_op><mode>_z"
4507 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w")
4508 (unspec:SVE_FULL_BHSI
4509 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4510 (unspec:SVE_FULL_BHSI
4511 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4512 (match_operand:VNx2DI 3 "register_operand" "w, w")]
4513 SVE_SHIFT_WIDE)
4514 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
4515 UNSPEC_SEL))]
4516 "TARGET_SVE"
4517 "@
4518 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4519 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4520 [(set_attr "movprfx" "yes")])
4521
4522 ;; -------------------------------------------------------------------------
4523 ;; ---- [INT] Shifts (rounding towards 0)
4524 ;; -------------------------------------------------------------------------
4525 ;; Includes:
4526 ;; - ASRD
4527 ;; - SQSHLU (SVE2)
4528 ;; - SRSHR (SVE2)
4529 ;; - URSHR (SVE2)
4530 ;; -------------------------------------------------------------------------
4531
4532 ;; Unpredicated <SVE_INT_OP>.
4533 (define_expand "sdiv_pow2<mode>3"
4534 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4535 (unspec:SVE_FULL_I
4536 [(match_dup 3)
4537 (unspec:SVE_FULL_I
4538 [(match_operand:SVE_FULL_I 1 "register_operand")
4539 (match_operand 2 "aarch64_simd_rshift_imm")]
4540 UNSPEC_ASRD)
4541 (match_dup 1)]
4542 UNSPEC_SEL))]
4543 "TARGET_SVE"
4544 {
4545 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4546 }
4547 )
4548
4549 ;; Predicated right shift with merging.
4550 (define_expand "@cond_<sve_int_op><mode>"
4551 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4552 (unspec:SVE_FULL_I
4553 [(match_operand:<VPRED> 1 "register_operand")
4554 (unspec:SVE_FULL_I
4555 [(match_operand:SVE_FULL_I 2 "register_operand")
4556 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4557 SVE_INT_SHIFT_IMM)
4558 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4559 UNSPEC_SEL))]
4560 "TARGET_SVE"
4561 )
4562
4563 ;; Predicated right shift, merging with the first input.
4564 (define_insn "*cond_<sve_int_op><mode>_2"
4565 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4566 (unspec:SVE_FULL_I
4567 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4568 (unspec:SVE_FULL_I
4569 [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4570 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4571 SVE_INT_SHIFT_IMM)
4572 (match_dup 2)]
4573 UNSPEC_SEL))]
4574 "TARGET_SVE"
4575 "@
4576 <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4577 movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4578 [(set_attr "movprfx" "*,yes")])
4579
4580 ;; Predicated right shift, merging with zero.
4581 (define_insn "*cond_<sve_int_op><mode>_z"
4582 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4583 (unspec:SVE_FULL_I
4584 [(match_operand:<VPRED> 1 "register_operand" "Upl")
4585 (unspec:SVE_FULL_I
4586 [(match_operand:SVE_FULL_I 2 "register_operand" "w")
4587 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4588 SVE_INT_SHIFT_IMM)
4589 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4590 UNSPEC_SEL))]
4591 "TARGET_SVE"
4592 "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4593 [(set_attr "movprfx" "yes")])
4594
4595 ;; -------------------------------------------------------------------------
4596 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
4597 ;; -------------------------------------------------------------------------
4598 ;; Includes:
4599 ;; - FSCALE
4600 ;; - FTSMUL
4601 ;; - FTSSEL
4602 ;; -------------------------------------------------------------------------
4603
4604 ;; Unpredicated floating-point binary operations that take an integer as
4605 ;; their second operand.
4606 (define_insn "@aarch64_sve_<optab><mode>"
4607 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4608 (unspec:SVE_FULL_F
4609 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4610 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
4611 SVE_FP_BINARY_INT))]
4612 "TARGET_SVE"
4613 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4614 )
4615
4616 ;; Predicated floating-point binary operations that take an integer
4617 ;; as their second operand.
4618 (define_insn "@aarch64_pred_<optab><mode>"
4619 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4620 (unspec:SVE_FULL_F
4621 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4622 (match_operand:SI 4 "aarch64_sve_gp_strictness")
4623 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4624 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4625 SVE_COND_FP_BINARY_INT))]
4626 "TARGET_SVE"
4627 "@
4628 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4629 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4630 [(set_attr "movprfx" "*,yes")]
4631 )
4632
4633 ;; Predicated floating-point binary operations with merging, taking an
4634 ;; integer as their second operand.
4635 (define_expand "@cond_<optab><mode>"
4636 [(set (match_operand:SVE_FULL_F 0 "register_operand")
4637 (unspec:SVE_FULL_F
4638 [(match_operand:<VPRED> 1 "register_operand")
4639 (unspec:SVE_FULL_F
4640 [(match_dup 1)
4641 (const_int SVE_STRICT_GP)
4642 (match_operand:SVE_FULL_F 2 "register_operand")
4643 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
4644 SVE_COND_FP_BINARY_INT)
4645 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4646 UNSPEC_SEL))]
4647 "TARGET_SVE"
4648 )
4649
4650 ;; Predicated floating-point binary operations that take an integer as their
4651 ;; second operand, with inactive lanes coming from the first operand.
4652 (define_insn_and_rewrite "*cond_<optab><mode>_2"
4653 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4654 (unspec:SVE_FULL_F
4655 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4656 (unspec:SVE_FULL_F
4657 [(match_operand 4)
4658 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4659 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4660 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4661 SVE_COND_FP_BINARY_INT)
4662 (match_dup 2)]
4663 UNSPEC_SEL))]
4664 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4665 "@
4666 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4667 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4668 "&& !rtx_equal_p (operands[1], operands[4])"
4669 {
4670 operands[4] = copy_rtx (operands[1]);
4671 }
4672 [(set_attr "movprfx" "*,yes")]
4673 )
4674
4675 ;; Predicated floating-point binary operations that take an integer as
4676 ;; their second operand, with the values of inactive lanes being distinct
4677 ;; from the other inputs.
4678 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4679 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
4680 (unspec:SVE_FULL_F
4681 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4682 (unspec:SVE_FULL_F
4683 [(match_operand 5)
4684 (match_operand:SI 6 "aarch64_sve_gp_strictness")
4685 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
4686 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
4687 SVE_COND_FP_BINARY_INT)
4688 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4689 UNSPEC_SEL))]
4690 "TARGET_SVE
4691 && !rtx_equal_p (operands[2], operands[4])
4692 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4693 "@
4694 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4695 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4696 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4697 #"
4698 "&& 1"
4699 {
4700 if (reload_completed
4701 && register_operand (operands[4], <MODE>mode)
4702 && !rtx_equal_p (operands[0], operands[4]))
4703 {
4704 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4705 operands[4], operands[1]));
4706 operands[4] = operands[2] = operands[0];
4707 }
4708 else if (!rtx_equal_p (operands[1], operands[5]))
4709 operands[5] = copy_rtx (operands[1]);
4710 else
4711 FAIL;
4712 }
4713 [(set_attr "movprfx" "yes")]
4714 )
4715
4716 ;; -------------------------------------------------------------------------
4717 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
4718 ;; -------------------------------------------------------------------------
4719 ;; Includes post-RA forms of:
4720 ;; - FADD
4721 ;; - FMUL
4722 ;; - FSUB
4723 ;; -------------------------------------------------------------------------
4724
4725 ;; Unpredicated floating-point binary operations (post-RA only).
4726 ;; These are generated by splitting a predicated instruction whose
4727 ;; predicate is unused.
4728 (define_insn "*post_ra_<sve_fp_op><mode>3"
4729 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4730 (SVE_UNPRED_FP_BINARY:SVE_FULL_F
4731 (match_operand:SVE_FULL_F 1 "register_operand" "w")
4732 (match_operand:SVE_FULL_F 2 "register_operand" "w")))]
4733 "TARGET_SVE && reload_completed"
4734 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
4735
4736 ;; -------------------------------------------------------------------------
4737 ;; ---- [FP] General binary arithmetic corresponding to unspecs
4738 ;; -------------------------------------------------------------------------
4739 ;; Includes merging forms of:
4740 ;; - FADD (constant forms handled in the "Addition" section)
4741 ;; - FDIV
4742 ;; - FDIVR
4743 ;; - FMAX
4744 ;; - FMAXNM (including #0.0 and #1.0)
4745 ;; - FMIN
4746 ;; - FMINNM (including #0.0 and #1.0)
4747 ;; - FMUL (including #0.5 and #2.0)
4748 ;; - FMULX
4749 ;; - FRECPS
4750 ;; - FRSQRTS
4751 ;; - FSUB (constant forms handled in the "Addition" section)
4752 ;; - FSUBR (constant forms handled in the "Subtraction" section)
4753 ;; -------------------------------------------------------------------------
4754
4755 ;; Unpredicated floating-point binary operations.
4756 (define_insn "@aarch64_sve_<optab><mode>"
4757 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4758 (unspec:SVE_FULL_F
4759 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4760 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
4761 SVE_FP_BINARY))]
4762 "TARGET_SVE"
4763 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4764 )
4765
4766 ;; Unpredicated floating-point binary operations that need to be predicated
4767 ;; for SVE.
4768 (define_expand "<optab><mode>3"
4769 [(set (match_operand:SVE_FULL_F 0 "register_operand")
4770 (unspec:SVE_FULL_F
4771 [(match_dup 3)
4772 (const_int SVE_RELAXED_GP)
4773 (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
4774 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
4775 SVE_COND_FP_BINARY_OPTAB))]
4776 "TARGET_SVE"
4777 {
4778 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4779 }
4780 )
4781
4782 ;; Predicated floating-point binary operations that have no immediate forms.
4783 (define_insn "@aarch64_pred_<optab><mode>"
4784 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
4785 (unspec:SVE_FULL_F
4786 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4787 (match_operand:SI 4 "aarch64_sve_gp_strictness")
4788 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w")
4789 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")]
4790 SVE_COND_FP_BINARY_REG))]
4791 "TARGET_SVE"
4792 "@
4793 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4794 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4795 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4796 [(set_attr "movprfx" "*,*,yes")]
4797 )
4798
4799 ;; Predicated floating-point operations with merging.
4800 (define_expand "@cond_<optab><mode>"
4801 [(set (match_operand:SVE_FULL_F 0 "register_operand")
4802 (unspec:SVE_FULL_F
4803 [(match_operand:<VPRED> 1 "register_operand")
4804 (unspec:SVE_FULL_F
4805 [(match_dup 1)
4806 (const_int SVE_STRICT_GP)
4807 (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>")
4808 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")]
4809 SVE_COND_FP_BINARY)
4810 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4811 UNSPEC_SEL))]
4812 "TARGET_SVE"
4813 )
4814
4815 ;; Predicated floating-point operations, merging with the first input.
4816 (define_insn_and_rewrite "*cond_<optab><mode>_2"
4817 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4818 (unspec:SVE_FULL_F
4819 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4820 (unspec:SVE_FULL_F
4821 [(match_operand 4)
4822 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4823 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4824 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
4825 SVE_COND_FP_BINARY)
4826 (match_dup 2)]
4827 UNSPEC_SEL))]
4828 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4829 "@
4830 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4831 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4832 "&& !rtx_equal_p (operands[1], operands[4])"
4833 {
4834 operands[4] = copy_rtx (operands[1]);
4835 }
4836 [(set_attr "movprfx" "*,yes")]
4837 )
4838
4839 ;; Same for operations that take a 1-bit constant.
4840 (define_insn_and_rewrite "*cond_<optab><mode>_2_const"
4841 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
4842 (unspec:SVE_FULL_F
4843 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4844 (unspec:SVE_FULL_F
4845 [(match_operand 4)
4846 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4847 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4848 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
4849 SVE_COND_FP_BINARY_I1)
4850 (match_dup 2)]
4851 UNSPEC_SEL))]
4852 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4853 "@
4854 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4855 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4856 "&& !rtx_equal_p (operands[1], operands[4])"
4857 {
4858 operands[4] = copy_rtx (operands[1]);
4859 }
4860 [(set_attr "movprfx" "*,yes")]
4861 )
4862
4863 ;; Predicated floating-point operations, merging with the second input.
4864 (define_insn_and_rewrite "*cond_<optab><mode>_3"
4865 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4866 (unspec:SVE_FULL_F
4867 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4868 (unspec:SVE_FULL_F
4869 [(match_operand 4)
4870 (match_operand:SI 5 "aarch64_sve_gp_strictness")
4871 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
4872 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
4873 SVE_COND_FP_BINARY)
4874 (match_dup 3)]
4875 UNSPEC_SEL))]
4876 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4877 "@
4878 <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4879 movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4880 "&& !rtx_equal_p (operands[1], operands[4])"
4881 {
4882 operands[4] = copy_rtx (operands[1]);
4883 }
4884 [(set_attr "movprfx" "*,yes")]
4885 )
4886
4887 ;; Predicated floating-point operations, merging with an independent value.
4888 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4889 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4890 (unspec:SVE_FULL_F
4891 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4892 (unspec:SVE_FULL_F
4893 [(match_operand 5)
4894 (match_operand:SI 6 "aarch64_sve_gp_strictness")
4895 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
4896 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
4897 SVE_COND_FP_BINARY)
4898 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4899 UNSPEC_SEL))]
4900 "TARGET_SVE
4901 && !rtx_equal_p (operands[2], operands[4])
4902 && !rtx_equal_p (operands[3], operands[4])
4903 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4904 "@
4905 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4906 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4907 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4908 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4909 #"
4910 "&& 1"
4911 {
4912 if (reload_completed
4913 && register_operand (operands[4], <MODE>mode)
4914 && !rtx_equal_p (operands[0], operands[4]))
4915 {
4916 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4917 operands[4], operands[1]));
4918 operands[4] = operands[2] = operands[0];
4919 }
4920 else if (!rtx_equal_p (operands[1], operands[5]))
4921 operands[5] = copy_rtx (operands[1]);
4922 else
4923 FAIL;
4924 }
4925 [(set_attr "movprfx" "yes")]
4926 )
4927
4928 ;; Same for operations that take a 1-bit constant.
4929 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4930 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
4931 (unspec:SVE_FULL_F
4932 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4933 (unspec:SVE_FULL_F
4934 [(match_operand 5)
4935 (match_operand:SI 6 "aarch64_sve_gp_strictness")
4936 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
4937 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
4938 SVE_COND_FP_BINARY_I1)
4939 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4940 UNSPEC_SEL))]
4941 "TARGET_SVE
4942 && !rtx_equal_p (operands[2], operands[4])
4943 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4944 "@
4945 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4946 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4947 #"
4948 "&& 1"
4949 {
4950 if (reload_completed
4951 && register_operand (operands[4], <MODE>mode)
4952 && !rtx_equal_p (operands[0], operands[4]))
4953 {
4954 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4955 operands[4], operands[1]));
4956 operands[4] = operands[2] = operands[0];
4957 }
4958 else if (!rtx_equal_p (operands[1], operands[5]))
4959 operands[5] = copy_rtx (operands[1]);
4960 else
4961 FAIL;
4962 }
4963 [(set_attr "movprfx" "yes")]
4964 )
4965
4966 ;; -------------------------------------------------------------------------
4967 ;; ---- [FP] Addition
4968 ;; -------------------------------------------------------------------------
4969 ;; Includes:
4970 ;; - FADD
4971 ;; - FSUB
4972 ;; -------------------------------------------------------------------------
4973
4974 ;; Predicated floating-point addition.
4975 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4976 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w")
4977 (unspec:SVE_FULL_F
4978 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl")
4979 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1")
4980 (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w")
4981 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")]
4982 SVE_COND_FP_ADD))]
4983 "TARGET_SVE"
4984 "@
4985 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4986 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4987 #
4988 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4989 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4990 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4991 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4992 ; Split the unpredicated form after reload, so that we don't have
4993 ; the unnecessary PTRUE.
4994 "&& reload_completed
4995 && register_operand (operands[3], <MODE>mode)
4996 && INTVAL (operands[4]) == SVE_RELAXED_GP"
4997 [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
4998 ""
4999 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
5000 )
5001
5002 ;; Predicated floating-point addition of a constant, merging with the
5003 ;; first input.
5004 (define_insn_and_rewrite "*cond_add<mode>_2_const"
5005 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
5006 (unspec:SVE_FULL_F
5007 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5008 (unspec:SVE_FULL_F
5009 [(match_operand 4)
5010 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5011 (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
5012 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
5013 UNSPEC_COND_FADD)
5014 (match_dup 2)]
5015 UNSPEC_SEL))]
5016 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5017 "@
5018 fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5019 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5020 movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5021 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
5022 "&& !rtx_equal_p (operands[1], operands[4])"
5023 {
5024 operands[4] = copy_rtx (operands[1]);
5025 }
5026 [(set_attr "movprfx" "*,*,yes,yes")]
5027 )
5028
5029 ;; Predicated floating-point addition of a constant, merging with an
5030 ;; independent value.
5031 (define_insn_and_rewrite "*cond_add<mode>_any_const"
5032 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
5033 (unspec:SVE_FULL_F
5034 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5035 (unspec:SVE_FULL_F
5036 [(match_operand 5)
5037 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5038 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
5039 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
5040 UNSPEC_COND_FADD)
5041 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
5042 UNSPEC_SEL))]
5043 "TARGET_SVE
5044 && !rtx_equal_p (operands[2], operands[4])
5045 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5046 "@
5047 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5048 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5049 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5050 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5051 #
5052 #"
5053 "&& 1"
5054 {
5055 if (reload_completed
5056 && register_operand (operands[4], <MODE>mode)
5057 && !rtx_equal_p (operands[0], operands[4]))
5058 {
5059 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5060 operands[4], operands[1]));
5061 operands[4] = operands[2] = operands[0];
5062 }
5063 else if (!rtx_equal_p (operands[1], operands[5]))
5064 operands[5] = copy_rtx (operands[1]);
5065 else
5066 FAIL;
5067 }
5068 [(set_attr "movprfx" "yes")]
5069 )
5070
5071 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5072
5073 ;; -------------------------------------------------------------------------
5074 ;; ---- [FP] Complex addition
5075 ;; -------------------------------------------------------------------------
5076 ;; Includes:
5077 ;; - FCADD
5078 ;; -------------------------------------------------------------------------
5079
5080 ;; Predicated FCADD.
5081 (define_insn "@aarch64_pred_<optab><mode>"
5082 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5083 (unspec:SVE_FULL_F
5084 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5085 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5086 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5087 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5088 SVE_COND_FCADD))]
5089 "TARGET_SVE"
5090 "@
5091 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5092 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5093 [(set_attr "movprfx" "*,yes")]
5094 )
5095
5096 ;; Predicated FCADD with merging.
5097 (define_expand "@cond_<optab><mode>"
5098 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5099 (unspec:SVE_FULL_F
5100 [(match_operand:<VPRED> 1 "register_operand")
5101 (unspec:SVE_FULL_F
5102 [(match_dup 1)
5103 (const_int SVE_STRICT_GP)
5104 (match_operand:SVE_FULL_F 2 "register_operand")
5105 (match_operand:SVE_FULL_F 3 "register_operand")]
5106 SVE_COND_FCADD)
5107 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5108 UNSPEC_SEL))]
5109 "TARGET_SVE"
5110 )
5111
5112 ;; Predicated FCADD, merging with the first input.
5113 (define_insn_and_rewrite "*cond_<optab><mode>_2"
5114 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5115 (unspec:SVE_FULL_F
5116 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5117 (unspec:SVE_FULL_F
5118 [(match_operand 4)
5119 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5120 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5121 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5122 SVE_COND_FCADD)
5123 (match_dup 2)]
5124 UNSPEC_SEL))]
5125 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5126 "@
5127 fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5128 movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5129 "&& !rtx_equal_p (operands[1], operands[4])"
5130 {
5131 operands[4] = copy_rtx (operands[1]);
5132 }
5133 [(set_attr "movprfx" "*,yes")]
5134 )
5135
5136 ;; Predicated FCADD, merging with an independent value.
5137 (define_insn_and_rewrite "*cond_<optab><mode>_any"
5138 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
5139 (unspec:SVE_FULL_F
5140 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5141 (unspec:SVE_FULL_F
5142 [(match_operand 5)
5143 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5144 (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
5145 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
5146 SVE_COND_FCADD)
5147 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
5148 UNSPEC_SEL))]
5149 "TARGET_SVE
5150 && !rtx_equal_p (operands[2], operands[4])
5151 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5152 "@
5153 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5154 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5155 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5156 #"
5157 "&& 1"
5158 {
5159 if (reload_completed
5160 && register_operand (operands[4], <MODE>mode)
5161 && !rtx_equal_p (operands[0], operands[4]))
5162 {
5163 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5164 operands[4], operands[1]));
5165 operands[4] = operands[2] = operands[0];
5166 }
5167 else if (!rtx_equal_p (operands[1], operands[5]))
5168 operands[5] = copy_rtx (operands[1]);
5169 else
5170 FAIL;
5171 }
5172 [(set_attr "movprfx" "yes")]
5173 )
5174
5175 ;; -------------------------------------------------------------------------
5176 ;; ---- [FP] Subtraction
5177 ;; -------------------------------------------------------------------------
5178 ;; Includes:
5179 ;; - FSUB
5180 ;; - FSUBR
5181 ;; -------------------------------------------------------------------------
5182
5183 ;; Predicated floating-point subtraction.
5184 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5185 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w")
5186 (unspec:SVE_FULL_F
5187 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5188 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1")
5189 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w")
5190 (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")]
5191 SVE_COND_FP_SUB))]
5192 "TARGET_SVE"
5193 "@
5194 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5195 #
5196 fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5197 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5198 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5199 movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5200 ; Split the unpredicated form after reload, so that we don't have
5201 ; the unnecessary PTRUE.
5202 "&& reload_completed
5203 && register_operand (operands[2], <MODE>mode)
5204 && INTVAL (operands[4]) == SVE_RELAXED_GP"
5205 [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5206 ""
5207 [(set_attr "movprfx" "*,*,*,*,yes,yes")]
5208 )
5209
5210 ;; Predicated floating-point subtraction from a constant, merging with the
5211 ;; second input.
5212 (define_insn_and_rewrite "*cond_sub<mode>_3_const"
5213 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5214 (unspec:SVE_FULL_F
5215 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5216 (unspec:SVE_FULL_F
5217 [(match_operand 4)
5218 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5219 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5220 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5221 UNSPEC_COND_FSUB)
5222 (match_dup 3)]
5223 UNSPEC_SEL))]
5224 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5225 "@
5226 fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5227 movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
5228 "&& !rtx_equal_p (operands[1], operands[4])"
5229 {
5230 operands[4] = copy_rtx (operands[1]);
5231 }
5232 [(set_attr "movprfx" "*,yes")]
5233 )
5234
5235 ;; Predicated floating-point subtraction from a constant, merging with an
5236 ;; independent value.
5237 (define_insn_and_rewrite "*cond_sub<mode>_any_const"
5238 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5239 (unspec:SVE_FULL_F
5240 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5241 (unspec:SVE_FULL_F
5242 [(match_operand 5)
5243 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5244 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5245 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
5246 UNSPEC_COND_FSUB)
5247 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5248 UNSPEC_SEL))]
5249 "TARGET_SVE
5250 && !rtx_equal_p (operands[3], operands[4])
5251 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5252 "@
5253 movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5254 movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5255 #"
5256 "&& 1"
5257 {
5258 if (reload_completed
5259 && register_operand (operands[4], <MODE>mode)
5260 && !rtx_equal_p (operands[0], operands[4]))
5261 {
5262 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5263 operands[4], operands[1]));
5264 operands[4] = operands[3] = operands[0];
5265 }
5266 else if (!rtx_equal_p (operands[1], operands[5]))
5267 operands[5] = copy_rtx (operands[1]);
5268 else
5269 FAIL;
5270 }
5271 [(set_attr "movprfx" "yes")]
5272 )
5273
5274 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5275
5276 ;; -------------------------------------------------------------------------
5277 ;; ---- [FP] Absolute difference
5278 ;; -------------------------------------------------------------------------
5279 ;; Includes:
5280 ;; - FABD
5281 ;; -------------------------------------------------------------------------
5282
5283 ;; Predicated floating-point absolute difference.
5284 (define_expand "@aarch64_pred_abd<mode>"
5285 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5286 (unspec:SVE_FULL_F
5287 [(match_operand:<VPRED> 1 "register_operand")
5288 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5289 (unspec:SVE_FULL_F
5290 [(match_dup 1)
5291 (match_dup 4)
5292 (match_operand:SVE_FULL_F 2 "register_operand")
5293 (match_operand:SVE_FULL_F 3 "register_operand")]
5294 UNSPEC_COND_FSUB)]
5295 UNSPEC_COND_FABS))]
5296 "TARGET_SVE"
5297 )
5298
5299 ;; Predicated floating-point absolute difference.
5300 (define_insn_and_rewrite "*aarch64_pred_abd<mode>"
5301 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5302 (unspec:SVE_FULL_F
5303 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5304 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5305 (unspec:SVE_FULL_F
5306 [(match_operand 5)
5307 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5308 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
5309 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5310 UNSPEC_COND_FSUB)]
5311 UNSPEC_COND_FABS))]
5312 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5313 "@
5314 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5315 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5316 "&& !rtx_equal_p (operands[1], operands[5])"
5317 {
5318 operands[5] = copy_rtx (operands[1]);
5319 }
5320 [(set_attr "movprfx" "*,yes")]
5321 )
5322
5323 (define_expand "@aarch64_cond_abd<mode>"
5324 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5325 (unspec:SVE_FULL_F
5326 [(match_operand:<VPRED> 1 "register_operand")
5327 (unspec:SVE_FULL_F
5328 [(match_dup 1)
5329 (const_int SVE_STRICT_GP)
5330 (unspec:SVE_FULL_F
5331 [(match_dup 1)
5332 (const_int SVE_STRICT_GP)
5333 (match_operand:SVE_FULL_F 2 "register_operand")
5334 (match_operand:SVE_FULL_F 3 "register_operand")]
5335 UNSPEC_COND_FSUB)]
5336 UNSPEC_COND_FABS)
5337 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5338 UNSPEC_SEL))]
5339 "TARGET_SVE"
5340 {
5341 if (rtx_equal_p (operands[3], operands[4]))
5342 std::swap (operands[2], operands[3]);
5343 })
5344
5345 ;; Predicated floating-point absolute difference, merging with the first
5346 ;; input.
5347 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_2"
5348 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5349 (unspec:SVE_FULL_F
5350 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5351 (unspec:SVE_FULL_F
5352 [(match_operand 4)
5353 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5354 (unspec:SVE_FULL_F
5355 [(match_operand 6)
5356 (match_operand:SI 7 "aarch64_sve_gp_strictness")
5357 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5358 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5359 UNSPEC_COND_FSUB)]
5360 UNSPEC_COND_FABS)
5361 (match_dup 2)]
5362 UNSPEC_SEL))]
5363 "TARGET_SVE
5364 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
5365 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
5366 "@
5367 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5368 movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5369 "&& (!rtx_equal_p (operands[1], operands[4])
5370 || !rtx_equal_p (operands[1], operands[6]))"
5371 {
5372 operands[4] = copy_rtx (operands[1]);
5373 operands[6] = copy_rtx (operands[1]);
5374 }
5375 [(set_attr "movprfx" "*,yes")]
5376 )
5377
5378 ;; Predicated floating-point absolute difference, merging with the second
5379 ;; input.
5380 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_3"
5381 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5382 (unspec:SVE_FULL_F
5383 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5384 (unspec:SVE_FULL_F
5385 [(match_operand 4)
5386 (match_operand:SI 5 "aarch64_sve_gp_strictness")
5387 (unspec:SVE_FULL_F
5388 [(match_operand 6)
5389 (match_operand:SI 7 "aarch64_sve_gp_strictness")
5390 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5391 (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5392 UNSPEC_COND_FSUB)]
5393 UNSPEC_COND_FABS)
5394 (match_dup 3)]
5395 UNSPEC_SEL))]
5396 "TARGET_SVE
5397 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
5398 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
5399 "@
5400 fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5401 movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5402 "&& (!rtx_equal_p (operands[1], operands[4])
5403 || !rtx_equal_p (operands[1], operands[6]))"
5404 {
5405 operands[4] = copy_rtx (operands[1]);
5406 operands[6] = copy_rtx (operands[1]);
5407 }
5408 [(set_attr "movprfx" "*,yes")]
5409 )
5410
5411 ;; Predicated floating-point absolute difference, merging with an
5412 ;; independent value.
5413 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any"
5414 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5415 (unspec:SVE_FULL_F
5416 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5417 (unspec:SVE_FULL_F
5418 [(match_operand 5)
5419 (match_operand:SI 6 "aarch64_sve_gp_strictness")
5420 (unspec:SVE_FULL_F
5421 [(match_operand 7)
5422 (match_operand:SI 8 "aarch64_sve_gp_strictness")
5423 (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5424 (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5425 UNSPEC_COND_FSUB)]
5426 UNSPEC_COND_FABS)
5427 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5428 UNSPEC_SEL))]
5429 "TARGET_SVE
5430 && !rtx_equal_p (operands[2], operands[4])
5431 && !rtx_equal_p (operands[3], operands[4])
5432 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
5433 && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
5434 "@
5435 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5436 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5437 movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5438 movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5439 #"
5440 "&& 1"
5441 {
5442 if (reload_completed
5443 && register_operand (operands[4], <MODE>mode)
5444 && !rtx_equal_p (operands[0], operands[4]))
5445 {
5446 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5447 operands[4], operands[1]));
5448 operands[4] = operands[3] = operands[0];
5449 }
5450 else if (!rtx_equal_p (operands[1], operands[5])
5451 || !rtx_equal_p (operands[1], operands[7]))
5452 {
5453 operands[5] = copy_rtx (operands[1]);
5454 operands[7] = copy_rtx (operands[1]);
5455 }
5456 else
5457 FAIL;
5458 }
5459 [(set_attr "movprfx" "yes")]
5460 )
5461
5462 ;; -------------------------------------------------------------------------
5463 ;; ---- [FP] Multiplication
5464 ;; -------------------------------------------------------------------------
5465 ;; Includes:
5466 ;; - FMUL
5467 ;; -------------------------------------------------------------------------
5468
5469 ;; Predicated floating-point multiplication.
5470 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5471 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w")
5472 (unspec:SVE_FULL_F
5473 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5474 (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1")
5475 (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w")
5476 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")]
5477 SVE_COND_FP_MUL))]
5478 "TARGET_SVE"
5479 "@
5480 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5481 #
5482 fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5483 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5484 movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5485 ; Split the unpredicated form after reload, so that we don't have
5486 ; the unnecessary PTRUE.
5487 "&& reload_completed
5488 && register_operand (operands[3], <MODE>mode)
5489 && INTVAL (operands[4]) == SVE_RELAXED_GP"
5490 [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5491 ""
5492 [(set_attr "movprfx" "*,*,*,yes,yes")]
5493 )
5494
5495 ;; Merging forms are handled through SVE_COND_FP_BINARY and
5496 ;; SVE_COND_FP_BINARY_I1.
5497
5498 ;; Unpredicated multiplication by selected lanes.
5499 (define_insn "@aarch64_mul_lane_<mode>"
5500 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5501 (mult:SVE_FULL_F
5502 (unspec:SVE_FULL_F
5503 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>")
5504 (match_operand:SI 3 "const_int_operand")]
5505 UNSPEC_SVE_LANE_SELECT)
5506 (match_operand:SVE_FULL_F 1 "register_operand" "w")))]
5507 "TARGET_SVE"
5508 "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
5509 )
5510
5511 ;; -------------------------------------------------------------------------
5512 ;; ---- [FP] Division
5513 ;; -------------------------------------------------------------------------
5514 ;; The patterns in this section are synthetic.
5515 ;; -------------------------------------------------------------------------
5516
5517 (define_expand "div<mode>3"
5518 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5519 (unspec:SVE_FULL_F
5520 [(match_dup 3)
5521 (const_int SVE_RELAXED_GP)
5522 (match_operand:SVE_FULL_F 1 "nonmemory_operand")
5523 (match_operand:SVE_FULL_F 2 "register_operand")]
5524 UNSPEC_COND_FDIV))]
5525 "TARGET_SVE"
5526 {
5527 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
5528 DONE;
5529
5530 operands[1] = force_reg (<MODE>mode, operands[1]);
5531 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5532 }
5533 )
5534
5535 (define_expand "@aarch64_frecpe<mode>"
5536 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5537 (unspec:SVE_FULL_F
5538 [(match_operand:SVE_FULL_F 1 "register_operand")]
5539 UNSPEC_FRECPE))]
5540 "TARGET_SVE"
5541 )
5542
5543 (define_expand "@aarch64_frecps<mode>"
5544 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5545 (unspec:SVE_FULL_F
5546 [(match_operand:SVE_FULL_F 1 "register_operand")
5547 (match_operand:SVE_FULL_F 2 "register_operand")]
5548 UNSPEC_FRECPS))]
5549 "TARGET_SVE"
5550 )
5551
5552 ;; -------------------------------------------------------------------------
5553 ;; ---- [FP] Binary logical operations
5554 ;; -------------------------------------------------------------------------
5555 ;; Includes
5556 ;; - AND
5557 ;; - EOR
5558 ;; - ORR
5559 ;; -------------------------------------------------------------------------
5560
5561 ;; Binary logical operations on floating-point modes. We avoid subregs
5562 ;; by providing this, but we need to use UNSPECs since rtx logical ops
5563 ;; aren't defined for floating-point modes.
5564 (define_insn "*<optab><mode>3"
5565 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5566 (unspec:SVE_FULL_F
5567 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5568 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
5569 LOGICALF))]
5570 "TARGET_SVE"
5571 "<logicalf_op>\t%0.d, %1.d, %2.d"
5572 )
5573
5574 ;; -------------------------------------------------------------------------
5575 ;; ---- [FP] Sign copying
5576 ;; -------------------------------------------------------------------------
5577 ;; The patterns in this section are synthetic.
5578 ;; -------------------------------------------------------------------------
5579
5580 (define_expand "copysign<mode>3"
5581 [(match_operand:SVE_FULL_F 0 "register_operand")
5582 (match_operand:SVE_FULL_F 1 "register_operand")
5583 (match_operand:SVE_FULL_F 2 "register_operand")]
5584 "TARGET_SVE"
5585 {
5586 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
5587 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
5588 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
5589 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
5590
5591 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
5592 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
5593
5594 emit_insn (gen_and<v_int_equiv>3
5595 (sign, arg2,
5596 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5597 HOST_WIDE_INT_M1U
5598 << bits)));
5599 emit_insn (gen_and<v_int_equiv>3
5600 (mant, arg1,
5601 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5602 ~(HOST_WIDE_INT_M1U
5603 << bits))));
5604 emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
5605 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
5606 DONE;
5607 }
5608 )
5609
5610 (define_expand "xorsign<mode>3"
5611 [(match_operand:SVE_FULL_F 0 "register_operand")
5612 (match_operand:SVE_FULL_F 1 "register_operand")
5613 (match_operand:SVE_FULL_F 2 "register_operand")]
5614 "TARGET_SVE"
5615 {
5616 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
5617 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
5618 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
5619
5620 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
5621 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
5622
5623 emit_insn (gen_and<v_int_equiv>3
5624 (sign, arg2,
5625 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5626 HOST_WIDE_INT_M1U
5627 << bits)));
5628 emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
5629 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
5630 DONE;
5631 }
5632 )
5633
5634 ;; -------------------------------------------------------------------------
5635 ;; ---- [FP] Maximum and minimum
5636 ;; -------------------------------------------------------------------------
5637 ;; Includes:
5638 ;; - FMAX
5639 ;; - FMAXNM
5640 ;; - FMIN
5641 ;; - FMINNM
5642 ;; -------------------------------------------------------------------------
5643
5644 ;; Unpredicated fmax/fmin (the libm functions). The optabs for the
5645 ;; smin/smax rtx codes are handled in the generic section above.
5646 (define_expand "<maxmin_uns><mode>3"
5647 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5648 (unspec:SVE_FULL_F
5649 [(match_dup 3)
5650 (const_int SVE_RELAXED_GP)
5651 (match_operand:SVE_FULL_F 1 "register_operand")
5652 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")]
5653 SVE_COND_FP_MAXMIN_PUBLIC))]
5654 "TARGET_SVE"
5655 {
5656 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5657 }
5658 )
5659
5660 ;; Predicated floating-point maximum/minimum.
5661 (define_insn "@aarch64_pred_<optab><mode>"
5662 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w")
5663 (unspec:SVE_FULL_F
5664 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5665 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5666 (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w")
5667 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")]
5668 SVE_COND_FP_MAXMIN))]
5669 "TARGET_SVE"
5670 "@
5671 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5672 <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5673 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5674 movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5675 [(set_attr "movprfx" "*,*,yes,yes")]
5676 )
5677
5678 ;; Merging forms are handled through SVE_COND_FP_BINARY and
5679 ;; SVE_COND_FP_BINARY_I1.
5680
5681 ;; -------------------------------------------------------------------------
5682 ;; ---- [PRED] Binary logical operations
5683 ;; -------------------------------------------------------------------------
5684 ;; Includes:
5685 ;; - AND
5686 ;; - ANDS
5687 ;; - EOR
5688 ;; - EORS
5689 ;; - ORR
5690 ;; - ORRS
5691 ;; -------------------------------------------------------------------------
5692
5693 ;; Predicate AND. We can reuse one of the inputs as the GP.
5694 ;; Doubling the second operand is the preferred implementation
5695 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
5696 (define_insn "and<mode>3"
5697 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5698 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
5699 (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
5700 "TARGET_SVE"
5701 "and\t%0.b, %1/z, %2.b, %2.b"
5702 )
5703
5704 ;; Unpredicated predicate EOR and ORR.
5705 (define_expand "<optab><mode>3"
5706 [(set (match_operand:PRED_ALL 0 "register_operand")
5707 (and:PRED_ALL
5708 (LOGICAL_OR:PRED_ALL
5709 (match_operand:PRED_ALL 1 "register_operand")
5710 (match_operand:PRED_ALL 2 "register_operand"))
5711 (match_dup 3)))]
5712 "TARGET_SVE"
5713 {
5714 operands[3] = aarch64_ptrue_reg (<MODE>mode);
5715 }
5716 )
5717
5718 ;; Predicated predicate AND, EOR and ORR.
5719 (define_insn "@aarch64_pred_<optab><mode>_z"
5720 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5721 (and:PRED_ALL
5722 (LOGICAL:PRED_ALL
5723 (match_operand:PRED_ALL 2 "register_operand" "Upa")
5724 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5725 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5726 "TARGET_SVE"
5727 "<logical>\t%0.b, %1/z, %2.b, %3.b"
5728 )
5729
5730 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
5731 ;; the GP. Store the result in operand 0 and set the flags in the same
5732 ;; way as for PTEST.
5733 (define_insn "*<optab><mode>3_cc"
5734 [(set (reg:CC_NZC CC_REGNUM)
5735 (unspec:CC_NZC
5736 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5737 (match_operand 4)
5738 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5739 (and:PRED_ALL
5740 (LOGICAL:PRED_ALL
5741 (match_operand:PRED_ALL 2 "register_operand" "Upa")
5742 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5743 (match_dup 4))]
5744 UNSPEC_PTEST))
5745 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5746 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
5747 (match_dup 4)))]
5748 "TARGET_SVE"
5749 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
5750 )
5751
5752 ;; Same with just the flags result.
5753 (define_insn "*<optab><mode>3_ptest"
5754 [(set (reg:CC_NZC CC_REGNUM)
5755 (unspec:CC_NZC
5756 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5757 (match_operand 4)
5758 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5759 (and:PRED_ALL
5760 (LOGICAL:PRED_ALL
5761 (match_operand:PRED_ALL 2 "register_operand" "Upa")
5762 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5763 (match_dup 4))]
5764 UNSPEC_PTEST))
5765 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5766 "TARGET_SVE"
5767 "<logical>s\t%0.b, %1/z, %2.b, %3.b"
5768 )
5769
5770 ;; -------------------------------------------------------------------------
5771 ;; ---- [PRED] Binary logical operations (inverted second input)
5772 ;; -------------------------------------------------------------------------
5773 ;; Includes:
5774 ;; - BIC
5775 ;; - ORN
5776 ;; -------------------------------------------------------------------------
5777
5778 ;; Predicated predicate BIC and ORN.
5779 (define_insn "aarch64_pred_<nlogical><mode>_z"
5780 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5781 (and:PRED_ALL
5782 (NLOGICAL:PRED_ALL
5783 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5784 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5785 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5786 "TARGET_SVE"
5787 "<nlogical>\t%0.b, %1/z, %2.b, %3.b"
5788 )
5789
5790 ;; Same, but set the flags as a side-effect.
5791 (define_insn "*<nlogical><mode>3_cc"
5792 [(set (reg:CC_NZC CC_REGNUM)
5793 (unspec:CC_NZC
5794 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5795 (match_operand 4)
5796 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5797 (and:PRED_ALL
5798 (NLOGICAL:PRED_ALL
5799 (not:PRED_ALL
5800 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5801 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5802 (match_dup 4))]
5803 UNSPEC_PTEST))
5804 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5805 (and:PRED_ALL (NLOGICAL:PRED_ALL
5806 (not:PRED_ALL (match_dup 3))
5807 (match_dup 2))
5808 (match_dup 4)))]
5809 "TARGET_SVE"
5810 "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
5811 )
5812
5813 ;; Same with just the flags result.
5814 (define_insn "*<nlogical><mode>3_ptest"
5815 [(set (reg:CC_NZC CC_REGNUM)
5816 (unspec:CC_NZC
5817 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5818 (match_operand 4)
5819 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5820 (and:PRED_ALL
5821 (NLOGICAL:PRED_ALL
5822 (not:PRED_ALL
5823 (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5824 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5825 (match_dup 4))]
5826 UNSPEC_PTEST))
5827 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5828 "TARGET_SVE"
5829 "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
5830 )
5831
5832 ;; -------------------------------------------------------------------------
5833 ;; ---- [PRED] Binary logical operations (inverted result)
5834 ;; -------------------------------------------------------------------------
5835 ;; Includes:
5836 ;; - NAND
5837 ;; - NOR
5838 ;; -------------------------------------------------------------------------
5839
5840 ;; Predicated predicate NAND and NOR.
5841 (define_insn "aarch64_pred_<logical_nn><mode>_z"
5842 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5843 (and:PRED_ALL
5844 (NLOGICAL:PRED_ALL
5845 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5846 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5847 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5848 "TARGET_SVE"
5849 "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
5850 )
5851
5852 ;; Same, but set the flags as a side-effect.
5853 (define_insn "*<logical_nn><mode>3_cc"
5854 [(set (reg:CC_NZC CC_REGNUM)
5855 (unspec:CC_NZC
5856 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5857 (match_operand 4)
5858 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5859 (and:PRED_ALL
5860 (NLOGICAL:PRED_ALL
5861 (not:PRED_ALL
5862 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5863 (not:PRED_ALL
5864 (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5865 (match_dup 4))]
5866 UNSPEC_PTEST))
5867 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5868 (and:PRED_ALL (NLOGICAL:PRED_ALL
5869 (not:PRED_ALL (match_dup 2))
5870 (not:PRED_ALL (match_dup 3)))
5871 (match_dup 4)))]
5872 "TARGET_SVE"
5873 "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
5874 )
5875
5876 ;; Same with just the flags result.
5877 (define_insn "*<logical_nn><mode>3_ptest"
5878 [(set (reg:CC_NZC CC_REGNUM)
5879 (unspec:CC_NZC
5880 [(match_operand:VNx16BI 1 "register_operand" "Upa")
5881 (match_operand 4)
5882 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5883 (and:PRED_ALL
5884 (NLOGICAL:PRED_ALL
5885 (not:PRED_ALL
5886 (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5887 (not:PRED_ALL
5888 (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5889 (match_dup 4))]
5890 UNSPEC_PTEST))
5891 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5892 "TARGET_SVE"
5893 "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
5894 )
5895
5896 ;; =========================================================================
5897 ;; == Ternary arithmetic
5898 ;; =========================================================================
5899
5900 ;; -------------------------------------------------------------------------
5901 ;; ---- [INT] MLA and MAD
5902 ;; -------------------------------------------------------------------------
5903 ;; Includes:
5904 ;; - MAD
5905 ;; - MLA
5906 ;; -------------------------------------------------------------------------
5907
5908 ;; Unpredicated integer addition of product.
5909 (define_expand "fma<mode>4"
5910 [(set (match_operand:SVE_FULL_I 0 "register_operand")
5911 (plus:SVE_FULL_I
5912 (unspec:SVE_FULL_I
5913 [(match_dup 4)
5914 (mult:SVE_FULL_I
5915 (match_operand:SVE_FULL_I 1 "register_operand")
5916 (match_operand:SVE_FULL_I 2 "nonmemory_operand"))]
5917 UNSPEC_PRED_X)
5918 (match_operand:SVE_FULL_I 3 "register_operand")))]
5919 "TARGET_SVE"
5920 {
5921 if (aarch64_prepare_sve_int_fma (operands, PLUS))
5922 DONE;
5923 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
5924 }
5925 )
5926
5927 ;; Predicated integer addition of product.
5928 (define_insn "@aarch64_pred_fma<mode>"
5929 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
5930 (plus:SVE_FULL_I
5931 (unspec:SVE_FULL_I
5932 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5933 (mult:SVE_FULL_I
5934 (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
5935 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
5936 UNSPEC_PRED_X)
5937 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")))]
5938 "TARGET_SVE"
5939 "@
5940 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5941 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5942 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
5943 [(set_attr "movprfx" "*,*,yes")]
5944 )
5945
5946 ;; Predicated integer addition of product with merging.
5947 (define_expand "cond_fma<mode>"
5948 [(set (match_operand:SVE_FULL_I 0 "register_operand")
5949 (unspec:SVE_FULL_I
5950 [(match_operand:<VPRED> 1 "register_operand")
5951 (plus:SVE_FULL_I
5952 (mult:SVE_FULL_I
5953 (match_operand:SVE_FULL_I 2 "register_operand")
5954 (match_operand:SVE_FULL_I 3 "general_operand"))
5955 (match_operand:SVE_FULL_I 4 "register_operand"))
5956 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
5957 UNSPEC_SEL))]
5958 "TARGET_SVE"
5959 {
5960 if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
5961 DONE;
5962 /* Swap the multiplication operands if the fallback value is the
5963 second of the two. */
5964 if (rtx_equal_p (operands[3], operands[5]))
5965 std::swap (operands[2], operands[3]);
5966 }
5967 )
5968
5969 ;; Predicated integer addition of product, merging with the first input.
5970 (define_insn "*cond_fma<mode>_2"
5971 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
5972 (unspec:SVE_FULL_I
5973 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5974 (plus:SVE_FULL_I
5975 (mult:SVE_FULL_I
5976 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
5977 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
5978 (match_operand:SVE_FULL_I 4 "register_operand" "w, w"))
5979 (match_dup 2)]
5980 UNSPEC_SEL))]
5981 "TARGET_SVE"
5982 "@
5983 mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5984 movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
5985 [(set_attr "movprfx" "*,yes")]
5986 )
5987
5988 ;; Predicated integer addition of product, merging with the third input.
5989 (define_insn "*cond_fma<mode>_4"
5990 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
5991 (unspec:SVE_FULL_I
5992 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5993 (plus:SVE_FULL_I
5994 (mult:SVE_FULL_I
5995 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
5996 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
5997 (match_operand:SVE_FULL_I 4 "register_operand" "0, w"))
5998 (match_dup 4)]
5999 UNSPEC_SEL))]
6000 "TARGET_SVE"
6001 "@
6002 mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6003 movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6004 [(set_attr "movprfx" "*,yes")]
6005 )
6006
6007 ;; Predicated integer addition of product, merging with an independent value.
6008 (define_insn_and_rewrite "*cond_fma<mode>_any"
6009 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6010 (unspec:SVE_FULL_I
6011 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6012 (plus:SVE_FULL_I
6013 (mult:SVE_FULL_I
6014 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w")
6015 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w"))
6016 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w"))
6017 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6018 UNSPEC_SEL))]
6019 "TARGET_SVE
6020 && !rtx_equal_p (operands[2], operands[5])
6021 && !rtx_equal_p (operands[3], operands[5])
6022 && !rtx_equal_p (operands[4], operands[5])"
6023 "@
6024 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6025 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6026 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6027 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6028 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6029 #"
6030 "&& reload_completed
6031 && register_operand (operands[5], <MODE>mode)
6032 && !rtx_equal_p (operands[0], operands[5])"
6033 {
6034 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6035 operands[5], operands[1]));
6036 operands[5] = operands[4] = operands[0];
6037 }
6038 [(set_attr "movprfx" "yes")]
6039 )
6040
6041 ;; -------------------------------------------------------------------------
6042 ;; ---- [INT] MLS and MSB
6043 ;; -------------------------------------------------------------------------
6044 ;; Includes:
6045 ;; - MLS
6046 ;; - MSB
6047 ;; -------------------------------------------------------------------------
6048
6049 ;; Unpredicated integer subtraction of product.
6050 (define_expand "fnma<mode>4"
6051 [(set (match_operand:SVE_FULL_I 0 "register_operand")
6052 (minus:SVE_FULL_I
6053 (match_operand:SVE_FULL_I 3 "register_operand")
6054 (unspec:SVE_FULL_I
6055 [(match_dup 4)
6056 (mult:SVE_FULL_I
6057 (match_operand:SVE_FULL_I 1 "register_operand")
6058 (match_operand:SVE_FULL_I 2 "general_operand"))]
6059 UNSPEC_PRED_X)))]
6060 "TARGET_SVE"
6061 {
6062 if (aarch64_prepare_sve_int_fma (operands, MINUS))
6063 DONE;
6064 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6065 }
6066 )
6067
6068 ;; Predicated integer subtraction of product.
6069 (define_insn "@aarch64_pred_fnma<mode>"
6070 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
6071 (minus:SVE_FULL_I
6072 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")
6073 (unspec:SVE_FULL_I
6074 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6075 (mult:SVE_FULL_I
6076 (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
6077 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
6078 UNSPEC_PRED_X)))]
6079 "TARGET_SVE"
6080 "@
6081 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6082 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6083 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6084 [(set_attr "movprfx" "*,*,yes")]
6085 )
6086
6087 ;; Predicated integer subtraction of product with merging.
6088 (define_expand "cond_fnma<mode>"
6089 [(set (match_operand:SVE_FULL_I 0 "register_operand")
6090 (unspec:SVE_FULL_I
6091 [(match_operand:<VPRED> 1 "register_operand")
6092 (minus:SVE_FULL_I
6093 (match_operand:SVE_FULL_I 4 "register_operand")
6094 (mult:SVE_FULL_I
6095 (match_operand:SVE_FULL_I 2 "register_operand")
6096 (match_operand:SVE_FULL_I 3 "general_operand")))
6097 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
6098 UNSPEC_SEL))]
6099 "TARGET_SVE"
6100 {
6101 if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
6102 DONE;
6103 /* Swap the multiplication operands if the fallback value is the
6104 second of the two. */
6105 if (rtx_equal_p (operands[3], operands[5]))
6106 std::swap (operands[2], operands[3]);
6107 }
6108 )
6109
6110 ;; Predicated integer subtraction of product, merging with the first input.
6111 (define_insn "*cond_fnma<mode>_2"
6112 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6113 (unspec:SVE_FULL_I
6114 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6115 (minus:SVE_FULL_I
6116 (match_operand:SVE_FULL_I 4 "register_operand" "w, w")
6117 (mult:SVE_FULL_I
6118 (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
6119 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))
6120 (match_dup 2)]
6121 UNSPEC_SEL))]
6122 "TARGET_SVE"
6123 "@
6124 msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6125 movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6126 [(set_attr "movprfx" "*,yes")]
6127 )
6128
6129 ;; Predicated integer subtraction of product, merging with the third input.
6130 (define_insn "*cond_fnma<mode>_4"
6131 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6132 (unspec:SVE_FULL_I
6133 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6134 (minus:SVE_FULL_I
6135 (match_operand:SVE_FULL_I 4 "register_operand" "0, w")
6136 (mult:SVE_FULL_I
6137 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6138 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))
6139 (match_dup 4)]
6140 UNSPEC_SEL))]
6141 "TARGET_SVE"
6142 "@
6143 mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6144 movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6145 [(set_attr "movprfx" "*,yes")]
6146 )
6147
6148 ;; Predicated integer subtraction of product, merging with an
6149 ;; independent value.
6150 (define_insn_and_rewrite "*cond_fnma<mode>_any"
6151 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6152 (unspec:SVE_FULL_I
6153 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6154 (minus:SVE_FULL_I
6155 (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w")
6156 (mult:SVE_FULL_I
6157 (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w")
6158 (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w")))
6159 (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6160 UNSPEC_SEL))]
6161 "TARGET_SVE
6162 && !rtx_equal_p (operands[2], operands[5])
6163 && !rtx_equal_p (operands[3], operands[5])
6164 && !rtx_equal_p (operands[4], operands[5])"
6165 "@
6166 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6167 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6168 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6169 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6170 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6171 #"
6172 "&& reload_completed
6173 && register_operand (operands[5], <MODE>mode)
6174 && !rtx_equal_p (operands[0], operands[5])"
6175 {
6176 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6177 operands[5], operands[1]));
6178 operands[5] = operands[4] = operands[0];
6179 }
6180 [(set_attr "movprfx" "yes")]
6181 )
6182
6183 ;; -------------------------------------------------------------------------
6184 ;; ---- [INT] Dot product
6185 ;; -------------------------------------------------------------------------
6186 ;; Includes:
6187 ;; - SDOT
6188 ;; - SUDOT (I8MM)
6189 ;; - UDOT
6190 ;; - USDOT (I8MM)
6191 ;; -------------------------------------------------------------------------
6192
6193 ;; Four-element integer dot-product with accumulation.
6194 (define_insn "<sur>dot_prod<vsi2qi>"
6195 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6196 (plus:SVE_FULL_SDI
6197 (unspec:SVE_FULL_SDI
6198 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6199 (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6200 DOTPROD)
6201 (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))]
6202 "TARGET_SVE"
6203 "@
6204 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
6205 movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
6206 [(set_attr "movprfx" "*,yes")]
6207 )
6208
6209 ;; Four-element integer dot-product by selected lanes with accumulation.
6210 (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6211 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6212 (plus:SVE_FULL_SDI
6213 (unspec:SVE_FULL_SDI
6214 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6215 (unspec:<VSI2QI>
6216 [(match_operand:<VSI2QI> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6217 (match_operand:SI 3 "const_int_operand")]
6218 UNSPEC_SVE_LANE_SELECT)]
6219 DOTPROD)
6220 (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))]
6221 "TARGET_SVE"
6222 "@
6223 <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
6224 movprfx\t%0, %4\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]"
6225 [(set_attr "movprfx" "*,yes")]
6226 )
6227
6228 (define_insn "@aarch64_<sur>dot_prod<vsi2qi>"
6229 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6230 (plus:VNx4SI_ONLY
6231 (unspec:VNx4SI_ONLY
6232 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6233 (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6234 DOTPROD_US_ONLY)
6235 (match_operand:VNx4SI_ONLY 3 "register_operand" "0, w")))]
6236 "TARGET_SVE_I8MM"
6237 "@
6238 <sur>dot\\t%0.s, %1.b, %2.b
6239 movprfx\t%0, %3\;<sur>dot\\t%0.s, %1.b, %2.b"
6240 [(set_attr "movprfx" "*,yes")]
6241 )
6242
6243 (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6244 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6245 (plus:VNx4SI_ONLY
6246 (unspec:VNx4SI_ONLY
6247 [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6248 (unspec:<VSI2QI>
6249 [(match_operand:<VSI2QI> 2 "register_operand" "y, y")
6250 (match_operand:SI 3 "const_int_operand")]
6251 UNSPEC_SVE_LANE_SELECT)]
6252 DOTPROD_I8MM)
6253 (match_operand:VNx4SI_ONLY 4 "register_operand" "0, w")))]
6254 "TARGET_SVE_I8MM"
6255 "@
6256 <sur>dot\\t%0.s, %1.b, %2.b[%3]
6257 movprfx\t%0, %4\;<sur>dot\\t%0.s, %1.b, %2.b[%3]"
6258 [(set_attr "movprfx" "*,yes")]
6259 )
6260
6261 ;; -------------------------------------------------------------------------
6262 ;; ---- [INT] Sum of absolute differences
6263 ;; -------------------------------------------------------------------------
6264 ;; The patterns in this section are synthetic.
6265 ;; -------------------------------------------------------------------------
6266
6267 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
6268 ;; operands 1 and 2. The sequence also has to perform a widening reduction of
6269 ;; the difference into a vector and accumulate that into operand 3 before
6270 ;; copying that into the result operand 0.
6271 ;; Perform that with a sequence of:
6272 ;; MOV ones.b, #1
6273 ;; [SU]ABD diff.b, p0/m, op1.b, op2.b
6274 ;; MOVPRFX op0, op3 // If necessary
6275 ;; UDOT op0.s, diff.b, ones.b
6276 (define_expand "<sur>sad<vsi2qi>"
6277 [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
6278 (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
6279 (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
6280 (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
6281 "TARGET_SVE"
6282 {
6283 rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
6284 rtx diff = gen_reg_rtx (<VSI2QI>mode);
6285 emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
6286 emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
6287 DONE;
6288 }
6289 )
6290
6291 ;; -------------------------------------------------------------------------
6292 ;; ---- [INT] Matrix multiply-accumulate
6293 ;; -------------------------------------------------------------------------
6294 ;; Includes:
6295 ;; - SMMLA (I8MM)
6296 ;; - UMMLA (I8MM)
6297 ;; - USMMLA (I8MM)
6298 ;; -------------------------------------------------------------------------
6299
6300 (define_insn "@aarch64_sve_add_<optab><vsi2qi>"
6301 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6302 (plus:VNx4SI_ONLY
6303 (unspec:VNx4SI_ONLY
6304 [(match_operand:<VSI2QI> 2 "register_operand" "w, w")
6305 (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
6306 MATMUL)
6307 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
6308 "TARGET_SVE_I8MM"
6309 "@
6310 <sur>mmla\\t%0.s, %2.b, %3.b
6311 movprfx\t%0, %1\;<sur>mmla\\t%0.s, %2.b, %3.b"
6312 [(set_attr "movprfx" "*,yes")]
6313 )
6314
6315 ;; -------------------------------------------------------------------------
6316 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
6317 ;; -------------------------------------------------------------------------
6318 ;; Includes merging patterns for:
6319 ;; - FMAD
6320 ;; - FMLA
6321 ;; - FMLS
6322 ;; - FMSB
6323 ;; - FNMAD
6324 ;; - FNMLA
6325 ;; - FNMLS
6326 ;; - FNMSB
6327 ;; -------------------------------------------------------------------------
6328
6329 ;; Unpredicated floating-point ternary operations.
6330 (define_expand "<optab><mode>4"
6331 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6332 (unspec:SVE_FULL_F
6333 [(match_dup 4)
6334 (const_int SVE_RELAXED_GP)
6335 (match_operand:SVE_FULL_F 1 "register_operand")
6336 (match_operand:SVE_FULL_F 2 "register_operand")
6337 (match_operand:SVE_FULL_F 3 "register_operand")]
6338 SVE_COND_FP_TERNARY))]
6339 "TARGET_SVE"
6340 {
6341 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6342 }
6343 )
6344
6345 ;; Predicated floating-point ternary operations.
6346 (define_insn "@aarch64_pred_<optab><mode>"
6347 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
6348 (unspec:SVE_FULL_F
6349 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6350 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6351 (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w")
6352 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")
6353 (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")]
6354 SVE_COND_FP_TERNARY))]
6355 "TARGET_SVE"
6356 "@
6357 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6358 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6359 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6360 [(set_attr "movprfx" "*,*,yes")]
6361 )
6362
6363 ;; Predicated floating-point ternary operations with merging.
6364 (define_expand "@cond_<optab><mode>"
6365 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6366 (unspec:SVE_FULL_F
6367 [(match_operand:<VPRED> 1 "register_operand")
6368 (unspec:SVE_FULL_F
6369 [(match_dup 1)
6370 (const_int SVE_STRICT_GP)
6371 (match_operand:SVE_FULL_F 2 "register_operand")
6372 (match_operand:SVE_FULL_F 3 "register_operand")
6373 (match_operand:SVE_FULL_F 4 "register_operand")]
6374 SVE_COND_FP_TERNARY)
6375 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
6376 UNSPEC_SEL))]
6377 "TARGET_SVE"
6378 {
6379 /* Swap the multiplication operands if the fallback value is the
6380 second of the two. */
6381 if (rtx_equal_p (operands[3], operands[5]))
6382 std::swap (operands[2], operands[3]);
6383 })
6384
6385 ;; Predicated floating-point ternary operations, merging with the
6386 ;; first input.
6387 (define_insn_and_rewrite "*cond_<optab><mode>_2"
6388 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6389 (unspec:SVE_FULL_F
6390 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6391 (unspec:SVE_FULL_F
6392 [(match_operand 5)
6393 (match_operand:SI 6 "aarch64_sve_gp_strictness")
6394 (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
6395 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6396 (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
6397 SVE_COND_FP_TERNARY)
6398 (match_dup 2)]
6399 UNSPEC_SEL))]
6400 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6401 "@
6402 <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6403 movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6404 "&& !rtx_equal_p (operands[1], operands[5])"
6405 {
6406 operands[5] = copy_rtx (operands[1]);
6407 }
6408 [(set_attr "movprfx" "*,yes")]
6409 )
6410
6411 ;; Predicated floating-point ternary operations, merging with the
6412 ;; third input.
6413 (define_insn_and_rewrite "*cond_<optab><mode>_4"
6414 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6415 (unspec:SVE_FULL_F
6416 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6417 (unspec:SVE_FULL_F
6418 [(match_operand 5)
6419 (match_operand:SI 6 "aarch64_sve_gp_strictness")
6420 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6421 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6422 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6423 SVE_COND_FP_TERNARY)
6424 (match_dup 4)]
6425 UNSPEC_SEL))]
6426 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6427 "@
6428 <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6429 movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6430 "&& !rtx_equal_p (operands[1], operands[5])"
6431 {
6432 operands[5] = copy_rtx (operands[1]);
6433 }
6434 [(set_attr "movprfx" "*,yes")]
6435 )
6436
6437 ;; Predicated floating-point ternary operations, merging with an
6438 ;; independent value.
6439 (define_insn_and_rewrite "*cond_<optab><mode>_any"
6440 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6441 (unspec:SVE_FULL_F
6442 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6443 (unspec:SVE_FULL_F
6444 [(match_operand 6)
6445 (match_operand:SI 7 "aarch64_sve_gp_strictness")
6446 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
6447 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
6448 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
6449 SVE_COND_FP_TERNARY)
6450 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6451 UNSPEC_SEL))]
6452 "TARGET_SVE
6453 && !rtx_equal_p (operands[2], operands[5])
6454 && !rtx_equal_p (operands[3], operands[5])
6455 && !rtx_equal_p (operands[4], operands[5])
6456 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
6457 "@
6458 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6459 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6460 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6461 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6462 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6463 #"
6464 "&& 1"
6465 {
6466 if (reload_completed
6467 && register_operand (operands[5], <MODE>mode)
6468 && !rtx_equal_p (operands[0], operands[5]))
6469 {
6470 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6471 operands[5], operands[1]));
6472 operands[5] = operands[4] = operands[0];
6473 }
6474 else if (!rtx_equal_p (operands[1], operands[6]))
6475 operands[6] = copy_rtx (operands[1]);
6476 else
6477 FAIL;
6478 }
6479 [(set_attr "movprfx" "yes")]
6480 )
6481
6482 ;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using
6483 ;; (fma ...) since target-independent code won't understand the indexing.
6484 (define_insn "@aarch64_<optab>_lane_<mode>"
6485 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6486 (unspec:SVE_FULL_F
6487 [(match_operand:SVE_FULL_F 1 "register_operand" "w, w")
6488 (unspec:SVE_FULL_F
6489 [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6490 (match_operand:SI 3 "const_int_operand")]
6491 UNSPEC_SVE_LANE_SELECT)
6492 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6493 SVE_FP_TERNARY_LANE))]
6494 "TARGET_SVE"
6495 "@
6496 <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
6497 movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
6498 [(set_attr "movprfx" "*,yes")]
6499 )
6500
6501 ;; -------------------------------------------------------------------------
6502 ;; ---- [FP] Complex multiply-add
6503 ;; -------------------------------------------------------------------------
6504 ;; Includes merging patterns for:
6505 ;; - FCMLA
6506 ;; -------------------------------------------------------------------------
6507
6508 ;; Predicated FCMLA.
6509 (define_insn "@aarch64_pred_<optab><mode>"
6510 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6511 (unspec:SVE_FULL_F
6512 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6513 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6514 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6515 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6516 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6517 SVE_COND_FCMLA))]
6518 "TARGET_SVE"
6519 "@
6520 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6521 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
6522 [(set_attr "movprfx" "*,yes")]
6523 )
6524
6525 ;; Predicated FCMLA with merging.
6526 (define_expand "@cond_<optab><mode>"
6527 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6528 (unspec:SVE_FULL_F
6529 [(match_operand:<VPRED> 1 "register_operand")
6530 (unspec:SVE_FULL_F
6531 [(match_dup 1)
6532 (const_int SVE_STRICT_GP)
6533 (match_operand:SVE_FULL_F 2 "register_operand")
6534 (match_operand:SVE_FULL_F 3 "register_operand")
6535 (match_operand:SVE_FULL_F 4 "register_operand")]
6536 SVE_COND_FCMLA)
6537 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
6538 UNSPEC_SEL))]
6539 "TARGET_SVE"
6540 )
6541
6542 ;; Predicated FCMLA, merging with the third input.
6543 (define_insn_and_rewrite "*cond_<optab><mode>_4"
6544 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6545 (unspec:SVE_FULL_F
6546 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6547 (unspec:SVE_FULL_F
6548 [(match_operand 5)
6549 (match_operand:SI 6 "aarch64_sve_gp_strictness")
6550 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6551 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6552 (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6553 SVE_COND_FCMLA)
6554 (match_dup 4)]
6555 UNSPEC_SEL))]
6556 "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6557 "@
6558 fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6559 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
6560 "&& !rtx_equal_p (operands[1], operands[5])"
6561 {
6562 operands[5] = copy_rtx (operands[1]);
6563 }
6564 [(set_attr "movprfx" "*,yes")]
6565 )
6566
6567 ;; Predicated FCMLA, merging with an independent value.
6568 (define_insn_and_rewrite "*cond_<optab><mode>_any"
6569 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
6570 (unspec:SVE_FULL_F
6571 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
6572 (unspec:SVE_FULL_F
6573 [(match_operand 6)
6574 (match_operand:SI 7 "aarch64_sve_gp_strictness")
6575 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
6576 (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
6577 (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
6578 SVE_COND_FCMLA)
6579 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
6580 UNSPEC_SEL))]
6581 "TARGET_SVE
6582 && !rtx_equal_p (operands[4], operands[5])
6583 && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
6584 "@
6585 movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6586 movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6587 movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6588 #"
6589 "&& 1"
6590 {
6591 if (reload_completed
6592 && register_operand (operands[5], <MODE>mode)
6593 && !rtx_equal_p (operands[0], operands[5]))
6594 {
6595 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6596 operands[5], operands[1]));
6597 operands[5] = operands[4] = operands[0];
6598 }
6599 else if (!rtx_equal_p (operands[1], operands[6]))
6600 operands[6] = copy_rtx (operands[1]);
6601 else
6602 FAIL;
6603 }
6604 [(set_attr "movprfx" "yes")]
6605 )
6606
6607 ;; Unpredicated FCMLA with indexing.
6608 (define_insn "@aarch64_<optab>_lane_<mode>"
6609 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
6610 (unspec:SVE_FULL_HSF
6611 [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w")
6612 (unspec:SVE_FULL_HSF
6613 [(match_operand:SVE_FULL_HSF 2 "register_operand" "<sve_lane_pair_con>, <sve_lane_pair_con>")
6614 (match_operand:SI 3 "const_int_operand")]
6615 UNSPEC_SVE_LANE_SELECT)
6616 (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")]
6617 FCMLA))]
6618 "TARGET_SVE"
6619 "@
6620 fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
6621 movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>"
6622 [(set_attr "movprfx" "*,yes")]
6623 )
6624
6625 ;; -------------------------------------------------------------------------
6626 ;; ---- [FP] Trigonometric multiply-add
6627 ;; -------------------------------------------------------------------------
6628 ;; Includes:
6629 ;; - FTMAD
6630 ;; -------------------------------------------------------------------------
6631
6632 (define_insn "@aarch64_sve_tmad<mode>"
6633 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6634 (unspec:SVE_FULL_F
6635 [(match_operand:SVE_FULL_F 1 "register_operand" "0, w")
6636 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6637 (match_operand:DI 3 "const_int_operand")]
6638 UNSPEC_FTMAD))]
6639 "TARGET_SVE"
6640 "@
6641 ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
6642 movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
6643 [(set_attr "movprfx" "*,yes")]
6644 )
6645
6646 ;; -------------------------------------------------------------------------
6647 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
6648 ;; -------------------------------------------------------------------------
6649 ;; Includes:
6650 ;; - BFDOT (BF16)
6651 ;; - BFMLALB (BF16)
6652 ;; - BFMLALT (BF16)
6653 ;; - BFMMLA (BF16)
6654 ;; -------------------------------------------------------------------------
6655
6656 (define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
6657 [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
6658 (unspec:VNx4SF
6659 [(match_operand:VNx4SF 1 "register_operand" "0, w")
6660 (match_operand:VNx8BF 2 "register_operand" "w, w")
6661 (match_operand:VNx8BF 3 "register_operand" "w, w")]
6662 SVE_BFLOAT_TERNARY_LONG))]
6663 "TARGET_SVE_BF16"
6664 "@
6665 <sve_fp_op>\t%0.s, %2.h, %3.h
6666 movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h"
6667 [(set_attr "movprfx" "*,yes")]
6668 )
6669
6670 ;; The immediate range is enforced before generating the instruction.
6671 (define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
6672 [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
6673 (unspec:VNx4SF
6674 [(match_operand:VNx4SF 1 "register_operand" "0, w")
6675 (match_operand:VNx8BF 2 "register_operand" "w, w")
6676 (match_operand:VNx8BF 3 "register_operand" "y, y")
6677 (match_operand:SI 4 "const_int_operand")]
6678 SVE_BFLOAT_TERNARY_LONG_LANE))]
6679 "TARGET_SVE_BF16"
6680 "@
6681 <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
6682 movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]"
6683 [(set_attr "movprfx" "*,yes")]
6684 )
6685
6686 ;; -------------------------------------------------------------------------
6687 ;; ---- [FP] Matrix multiply-accumulate
6688 ;; -------------------------------------------------------------------------
6689 ;; Includes:
6690 ;; - FMMLA (F32MM,F64MM)
6691 ;; -------------------------------------------------------------------------
6692
6693 ;; The mode iterator enforces the target requirements.
6694 (define_insn "@aarch64_sve_<sve_fp_op><mode>"
6695 [(set (match_operand:SVE_MATMULF 0 "register_operand" "=w, ?&w")
6696 (unspec:SVE_MATMULF
6697 [(match_operand:SVE_MATMULF 2 "register_operand" "w, w")
6698 (match_operand:SVE_MATMULF 3 "register_operand" "w, w")
6699 (match_operand:SVE_MATMULF 1 "register_operand" "0, w")]
6700 FMMLA))]
6701 "TARGET_SVE"
6702 "@
6703 <sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
6704 movprfx\t%0, %1\;<sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
6705 [(set_attr "movprfx" "*,yes")]
6706 )
6707
6708 ;; =========================================================================
6709 ;; == Comparisons and selects
6710 ;; =========================================================================
6711
6712 ;; -------------------------------------------------------------------------
6713 ;; ---- [INT,FP] Select based on predicates
6714 ;; -------------------------------------------------------------------------
6715 ;; Includes merging patterns for:
6716 ;; - FMOV
6717 ;; - MOV
6718 ;; - SEL
6719 ;; -------------------------------------------------------------------------
6720
6721 ;; vcond_mask operand order: true, false, mask
6722 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
6723 ;; SEL operand order: mask, true, false
6724 (define_expand "@vcond_mask_<mode><vpred>"
6725 [(set (match_operand:SVE_FULL 0 "register_operand")
6726 (unspec:SVE_FULL
6727 [(match_operand:<VPRED> 3 "register_operand")
6728 (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm")
6729 (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero")]
6730 UNSPEC_SEL))]
6731 "TARGET_SVE"
6732 {
6733 if (register_operand (operands[1], <MODE>mode))
6734 operands[2] = force_reg (<MODE>mode, operands[2]);
6735 }
6736 )
6737
6738 ;; Selects between:
6739 ;; - two registers
6740 ;; - a duplicated immediate and a register
6741 ;; - a duplicated immediate and zero
6742 (define_insn "*vcond_mask_<mode><vpred>"
6743 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
6744 (unspec:SVE_FULL
6745 [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
6746 (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
6747 (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
6748 UNSPEC_SEL))]
6749 "TARGET_SVE
6750 && (!register_operand (operands[1], <MODE>mode)
6751 || register_operand (operands[2], <MODE>mode))"
6752 "@
6753 sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
6754 mov\t%0.<Vetype>, %3/m, #%I1
6755 mov\t%0.<Vetype>, %3/z, #%I1
6756 fmov\t%0.<Vetype>, %3/m, #%1
6757 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
6758 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
6759 movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
6760 [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
6761 )
6762
6763 ;; Optimize selects between a duplicated scalar variable and another vector,
6764 ;; the latter of which can be a zero constant or a variable. Treat duplicates
6765 ;; of GPRs as being more expensive than duplicates of FPRs, since they
6766 ;; involve a cross-file move.
6767 (define_insn "@aarch64_sel_dup<mode>"
6768 [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
6769 (unspec:SVE_FULL
6770 [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6771 (vec_duplicate:SVE_FULL
6772 (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
6773 (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
6774 UNSPEC_SEL))]
6775 "TARGET_SVE"
6776 "@
6777 mov\t%0.<Vetype>, %3/m, %<vwcore>1
6778 mov\t%0.<Vetype>, %3/m, %<Vetype>1
6779 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
6780 movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
6781 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
6782 movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1"
6783 [(set_attr "movprfx" "*,*,yes,yes,yes,yes")]
6784 )
6785
6786 ;; -------------------------------------------------------------------------
6787 ;; ---- [INT,FP] Compare and select
6788 ;; -------------------------------------------------------------------------
6789 ;; The patterns in this section are synthetic.
6790 ;; -------------------------------------------------------------------------
6791
6792 ;; Integer (signed) vcond. Don't enforce an immediate range here, since it
6793 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
6794 (define_expand "vcond<mode><v_int_equiv>"
6795 [(set (match_operand:SVE_FULL 0 "register_operand")
6796 (if_then_else:SVE_FULL
6797 (match_operator 3 "comparison_operator"
6798 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
6799 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
6800 (match_operand:SVE_FULL 1 "nonmemory_operand")
6801 (match_operand:SVE_FULL 2 "nonmemory_operand")))]
6802 "TARGET_SVE"
6803 {
6804 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
6805 DONE;
6806 }
6807 )
6808
6809 ;; Integer vcondu. Don't enforce an immediate range here, since it
6810 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
6811 (define_expand "vcondu<mode><v_int_equiv>"
6812 [(set (match_operand:SVE_FULL 0 "register_operand")
6813 (if_then_else:SVE_FULL
6814 (match_operator 3 "comparison_operator"
6815 [(match_operand:<V_INT_EQUIV> 4 "register_operand")
6816 (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
6817 (match_operand:SVE_FULL 1 "nonmemory_operand")
6818 (match_operand:SVE_FULL 2 "nonmemory_operand")))]
6819 "TARGET_SVE"
6820 {
6821 aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
6822 DONE;
6823 }
6824 )
6825
6826 ;; Floating-point vcond. All comparisons except FCMUO allow a zero operand;
6827 ;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
6828 (define_expand "vcond<mode><v_fp_equiv>"
6829 [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
6830 (if_then_else:SVE_FULL_HSD
6831 (match_operator 3 "comparison_operator"
6832 [(match_operand:<V_FP_EQUIV> 4 "register_operand")
6833 (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
6834 (match_operand:SVE_FULL_HSD 1 "nonmemory_operand")
6835 (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))]
6836 "TARGET_SVE"
6837 {
6838 aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
6839 DONE;
6840 }
6841 )
6842
6843 ;; -------------------------------------------------------------------------
6844 ;; ---- [INT] Comparisons
6845 ;; -------------------------------------------------------------------------
6846 ;; Includes:
6847 ;; - CMPEQ
6848 ;; - CMPGE
6849 ;; - CMPGT
6850 ;; - CMPHI
6851 ;; - CMPHS
6852 ;; - CMPLE
6853 ;; - CMPLO
6854 ;; - CMPLS
6855 ;; - CMPLT
6856 ;; - CMPNE
6857 ;; -------------------------------------------------------------------------
6858
6859 ;; Signed integer comparisons. Don't enforce an immediate range here, since
6860 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
6861 ;; instead.
6862 (define_expand "vec_cmp<mode><vpred>"
6863 [(parallel
6864 [(set (match_operand:<VPRED> 0 "register_operand")
6865 (match_operator:<VPRED> 1 "comparison_operator"
6866 [(match_operand:SVE_FULL_I 2 "register_operand")
6867 (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
6868 (clobber (reg:CC_NZC CC_REGNUM))])]
6869 "TARGET_SVE"
6870 {
6871 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
6872 operands[2], operands[3]);
6873 DONE;
6874 }
6875 )
6876
6877 ;; Unsigned integer comparisons. Don't enforce an immediate range here, since
6878 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
6879 ;; instead.
6880 (define_expand "vec_cmpu<mode><vpred>"
6881 [(parallel
6882 [(set (match_operand:<VPRED> 0 "register_operand")
6883 (match_operator:<VPRED> 1 "comparison_operator"
6884 [(match_operand:SVE_FULL_I 2 "register_operand")
6885 (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
6886 (clobber (reg:CC_NZC CC_REGNUM))])]
6887 "TARGET_SVE"
6888 {
6889 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
6890 operands[2], operands[3]);
6891 DONE;
6892 }
6893 )
6894
6895 ;; Predicated integer comparisons.
6896 (define_insn "@aarch64_pred_cmp<cmp_op><mode>"
6897 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6898 (unspec:<VPRED>
6899 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6900 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6901 (SVE_INT_CMP:<VPRED>
6902 (match_operand:SVE_FULL_I 3 "register_operand" "w, w")
6903 (match_operand:SVE_FULL_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6904 UNSPEC_PRED_Z))
6905 (clobber (reg:CC_NZC CC_REGNUM))]
6906 "TARGET_SVE"
6907 "@
6908 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
6909 cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
6910 )
6911
6912 ;; Predicated integer comparisons in which both the flag and predicate
6913 ;; results are interesting.
6914 (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
6915 [(set (reg:CC_NZC CC_REGNUM)
6916 (unspec:CC_NZC
6917 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
6918 (match_operand 4)
6919 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6920 (unspec:<VPRED>
6921 [(match_operand 6)
6922 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6923 (SVE_INT_CMP:<VPRED>
6924 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6925 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6926 UNSPEC_PRED_Z)]
6927 UNSPEC_PTEST))
6928 (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6929 (unspec:<VPRED>
6930 [(match_dup 6)
6931 (match_dup 7)
6932 (SVE_INT_CMP:<VPRED>
6933 (match_dup 2)
6934 (match_dup 3))]
6935 UNSPEC_PRED_Z))]
6936 "TARGET_SVE
6937 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6938 "@
6939 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
6940 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
6941 "&& !rtx_equal_p (operands[4], operands[6])"
6942 {
6943 operands[6] = copy_rtx (operands[4]);
6944 operands[7] = operands[5];
6945 }
6946 )
6947
6948 ;; Predicated integer comparisons in which only the flags result is
6949 ;; interesting.
6950 (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
6951 [(set (reg:CC_NZC CC_REGNUM)
6952 (unspec:CC_NZC
6953 [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
6954 (match_operand 4)
6955 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6956 (unspec:<VPRED>
6957 [(match_operand 6)
6958 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6959 (SVE_INT_CMP:<VPRED>
6960 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6961 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6962 UNSPEC_PRED_Z)]
6963 UNSPEC_PTEST))
6964 (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
6965 "TARGET_SVE
6966 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6967 "@
6968 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
6969 cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
6970 "&& !rtx_equal_p (operands[4], operands[6])"
6971 {
6972 operands[6] = copy_rtx (operands[4]);
6973 operands[7] = operands[5];
6974 }
6975 )
6976
6977 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
6978 ;; comparison with an AND. Split the instruction into its preferred form
6979 ;; at the earliest opportunity, in order to get rid of the redundant
6980 ;; operand 4.
6981 (define_insn_and_split "*cmp<cmp_op><mode>_and"
6982 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6983 (and:<VPRED>
6984 (unspec:<VPRED>
6985 [(match_operand 4)
6986 (const_int SVE_KNOWN_PTRUE)
6987 (SVE_INT_CMP:<VPRED>
6988 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6989 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6990 UNSPEC_PRED_Z)
6991 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
6992 (clobber (reg:CC_NZC CC_REGNUM))]
6993 "TARGET_SVE"
6994 "#"
6995 "&& 1"
6996 [(parallel
6997 [(set (match_dup 0)
6998 (unspec:<VPRED>
6999 [(match_dup 1)
7000 (const_int SVE_MAYBE_NOT_PTRUE)
7001 (SVE_INT_CMP:<VPRED>
7002 (match_dup 2)
7003 (match_dup 3))]
7004 UNSPEC_PRED_Z))
7005 (clobber (reg:CC_NZC CC_REGNUM))])]
7006 )
7007
7008 ;; Predicated integer wide comparisons.
7009 (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
7010 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7011 (unspec:<VPRED>
7012 [(match_operand:VNx16BI 1 "register_operand" "Upl")
7013 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7014 (unspec:<VPRED>
7015 [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w")
7016 (match_operand:VNx2DI 4 "register_operand" "w")]
7017 SVE_COND_INT_CMP_WIDE)]
7018 UNSPEC_PRED_Z))
7019 (clobber (reg:CC_NZC CC_REGNUM))]
7020 "TARGET_SVE"
7021 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d"
7022 )
7023
7024 ;; Predicated integer wide comparisons in which both the flag and
7025 ;; predicate results are interesting.
7026 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
7027 [(set (reg:CC_NZC CC_REGNUM)
7028 (unspec:CC_NZC
7029 [(match_operand:VNx16BI 1 "register_operand" "Upl")
7030 (match_operand 4)
7031 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7032 (unspec:<VPRED>
7033 [(match_operand:VNx16BI 6 "register_operand" "Upl")
7034 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7035 (unspec:<VPRED>
7036 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
7037 (match_operand:VNx2DI 3 "register_operand" "w")]
7038 SVE_COND_INT_CMP_WIDE)]
7039 UNSPEC_PRED_Z)]
7040 UNSPEC_PTEST))
7041 (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7042 (unspec:<VPRED>
7043 [(match_dup 6)
7044 (match_dup 7)
7045 (unspec:<VPRED>
7046 [(match_dup 2)
7047 (match_dup 3)]
7048 SVE_COND_INT_CMP_WIDE)]
7049 UNSPEC_PRED_Z))]
7050 "TARGET_SVE
7051 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7052 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
7053 )
7054
7055 ;; Predicated integer wide comparisons in which only the flags result
7056 ;; is interesting.
7057 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
7058 [(set (reg:CC_NZC CC_REGNUM)
7059 (unspec:CC_NZC
7060 [(match_operand:VNx16BI 1 "register_operand" "Upl")
7061 (match_operand 4)
7062 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7063 (unspec:<VPRED>
7064 [(match_operand:VNx16BI 6 "register_operand" "Upl")
7065 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7066 (unspec:<VPRED>
7067 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
7068 (match_operand:VNx2DI 3 "register_operand" "w")]
7069 SVE_COND_INT_CMP_WIDE)]
7070 UNSPEC_PRED_Z)]
7071 UNSPEC_PTEST))
7072 (clobber (match_scratch:<VPRED> 0 "=Upa"))]
7073 "TARGET_SVE
7074 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7075 "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
7076 )
7077
7078 ;; -------------------------------------------------------------------------
7079 ;; ---- [INT] While tests
7080 ;; -------------------------------------------------------------------------
7081 ;; Includes:
7082 ;; - WHILEGE (SVE2)
7083 ;; - WHILEGT (SVE2)
7084 ;; - WHILEHI (SVE2)
7085 ;; - WHILEHS (SVE2)
7086 ;; - WHILELE
7087 ;; - WHILELO
7088 ;; - WHILELS
7089 ;; - WHILELT
7090 ;; - WHILERW (SVE2)
7091 ;; - WHILEWR (SVE2)
7092 ;; -------------------------------------------------------------------------
7093
7094 ;; Set element I of the result if (cmp (plus operand1 J) operand2) is
7095 ;; true for all J in [0, I].
7096 (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
7097 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7098 (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7099 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7100 SVE_WHILE))
7101 (clobber (reg:CC_NZC CC_REGNUM))]
7102 "TARGET_SVE"
7103 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7104 )
7105
7106 ;; The WHILE instructions set the flags in the same way as a PTEST with
7107 ;; a PTRUE GP. Handle the case in which both results are useful. The GP
7108 ;; operands to the PTEST aren't needed, so we allow them to be anything.
7109 (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
7110 [(set (reg:CC_NZC CC_REGNUM)
7111 (unspec:CC_NZC
7112 [(match_operand 3)
7113 (match_operand 4)
7114 (const_int SVE_KNOWN_PTRUE)
7115 (unspec:PRED_ALL
7116 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7117 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7118 SVE_WHILE)]
7119 UNSPEC_PTEST))
7120 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7121 (unspec:PRED_ALL [(match_dup 1)
7122 (match_dup 2)]
7123 SVE_WHILE))]
7124 "TARGET_SVE"
7125 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7126 ;; Force the compiler to drop the unused predicate operand, so that we
7127 ;; don't have an unnecessary PTRUE.
7128 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
7129 {
7130 operands[3] = CONSTM1_RTX (VNx16BImode);
7131 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
7132 }
7133 )
7134
7135 ;; Same, but handle the case in which only the flags result is useful.
7136 (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
7137 [(set (reg:CC_NZC CC_REGNUM)
7138 (unspec:CC_NZC
7139 [(match_operand 3)
7140 (match_operand 4)
7141 (const_int SVE_KNOWN_PTRUE)
7142 (unspec:PRED_ALL
7143 [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7144 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7145 SVE_WHILE)]
7146 UNSPEC_PTEST))
7147 (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
7148 "TARGET_SVE"
7149 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7150 ;; Force the compiler to drop the unused predicate operand, so that we
7151 ;; don't have an unnecessary PTRUE.
7152 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
7153 {
7154 operands[3] = CONSTM1_RTX (VNx16BImode);
7155 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
7156 }
7157 )
7158
7159 ;; -------------------------------------------------------------------------
7160 ;; ---- [FP] Direct comparisons
7161 ;; -------------------------------------------------------------------------
7162 ;; Includes:
7163 ;; - FCMEQ
7164 ;; - FCMGE
7165 ;; - FCMGT
7166 ;; - FCMLE
7167 ;; - FCMLT
7168 ;; - FCMNE
7169 ;; - FCMUO
7170 ;; -------------------------------------------------------------------------
7171
7172 ;; Floating-point comparisons. All comparisons except FCMUO allow a zero
7173 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
7174 ;; with zero.
7175 (define_expand "vec_cmp<mode><vpred>"
7176 [(set (match_operand:<VPRED> 0 "register_operand")
7177 (match_operator:<VPRED> 1 "comparison_operator"
7178 [(match_operand:SVE_FULL_F 2 "register_operand")
7179 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
7180 "TARGET_SVE"
7181 {
7182 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
7183 operands[2], operands[3], false);
7184 DONE;
7185 }
7186 )
7187
7188 ;; Predicated floating-point comparisons.
7189 (define_insn "@aarch64_pred_fcm<cmp_op><mode>"
7190 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7191 (unspec:<VPRED>
7192 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7193 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7194 (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7195 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")]
7196 SVE_COND_FP_CMP_I0))]
7197 "TARGET_SVE"
7198 "@
7199 fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
7200 fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
7201 )
7202
7203 ;; Same for unordered comparisons.
7204 (define_insn "@aarch64_pred_fcmuo<mode>"
7205 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7206 (unspec:<VPRED>
7207 [(match_operand:<VPRED> 1 "register_operand" "Upl")
7208 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7209 (match_operand:SVE_FULL_F 3 "register_operand" "w")
7210 (match_operand:SVE_FULL_F 4 "register_operand" "w")]
7211 UNSPEC_COND_FCMUO))]
7212 "TARGET_SVE"
7213 "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
7214 )
7215
7216 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
7217 ;; with another predicate P. This does not have the same trapping behavior
7218 ;; as predicating the comparison itself on P, but it's a legitimate fold,
7219 ;; since we can drop any potentially-trapping operations whose results
7220 ;; are not needed.
7221 ;;
7222 ;; Split the instruction into its preferred form (below) at the earliest
7223 ;; opportunity, in order to get rid of the redundant operand 1.
7224 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
7225 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7226 (and:<VPRED>
7227 (unspec:<VPRED>
7228 [(match_operand:<VPRED> 1)
7229 (const_int SVE_KNOWN_PTRUE)
7230 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7231 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
7232 SVE_COND_FP_CMP_I0)
7233 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
7234 "TARGET_SVE"
7235 "#"
7236 "&& 1"
7237 [(set (match_dup 0)
7238 (unspec:<VPRED>
7239 [(match_dup 4)
7240 (const_int SVE_MAYBE_NOT_PTRUE)
7241 (match_dup 2)
7242 (match_dup 3)]
7243 SVE_COND_FP_CMP_I0))]
7244 )
7245
7246 ;; Same for unordered comparisons.
7247 (define_insn_and_split "*fcmuo<mode>_and_combine"
7248 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7249 (and:<VPRED>
7250 (unspec:<VPRED>
7251 [(match_operand:<VPRED> 1)
7252 (const_int SVE_KNOWN_PTRUE)
7253 (match_operand:SVE_FULL_F 2 "register_operand" "w")
7254 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7255 UNSPEC_COND_FCMUO)
7256 (match_operand:<VPRED> 4 "register_operand" "Upl")))]
7257 "TARGET_SVE"
7258 "#"
7259 "&& 1"
7260 [(set (match_dup 0)
7261 (unspec:<VPRED>
7262 [(match_dup 4)
7263 (const_int SVE_MAYBE_NOT_PTRUE)
7264 (match_dup 2)
7265 (match_dup 3)]
7266 UNSPEC_COND_FCMUO))]
7267 )
7268
7269 ;; -------------------------------------------------------------------------
7270 ;; ---- [FP] Absolute comparisons
7271 ;; -------------------------------------------------------------------------
7272 ;; Includes:
7273 ;; - FACGE
7274 ;; - FACGT
7275 ;; - FACLE
7276 ;; - FACLT
7277 ;; -------------------------------------------------------------------------
7278
7279 ;; Predicated floating-point absolute comparisons.
7280 (define_expand "@aarch64_pred_fac<cmp_op><mode>"
7281 [(set (match_operand:<VPRED> 0 "register_operand")
7282 (unspec:<VPRED>
7283 [(match_operand:<VPRED> 1 "register_operand")
7284 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7285 (unspec:SVE_FULL_F
7286 [(match_dup 1)
7287 (match_dup 2)
7288 (match_operand:SVE_FULL_F 3 "register_operand")]
7289 UNSPEC_COND_FABS)
7290 (unspec:SVE_FULL_F
7291 [(match_dup 1)
7292 (match_dup 2)
7293 (match_operand:SVE_FULL_F 4 "register_operand")]
7294 UNSPEC_COND_FABS)]
7295 SVE_COND_FP_ABS_CMP))]
7296 "TARGET_SVE"
7297 )
7298
7299 (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>"
7300 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7301 (unspec:<VPRED>
7302 [(match_operand:<VPRED> 1 "register_operand" "Upl")
7303 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
7304 (unspec:SVE_FULL_F
7305 [(match_operand 5)
7306 (match_operand:SI 6 "aarch64_sve_gp_strictness")
7307 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7308 UNSPEC_COND_FABS)
7309 (unspec:SVE_FULL_F
7310 [(match_operand 7)
7311 (match_operand:SI 8 "aarch64_sve_gp_strictness")
7312 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7313 UNSPEC_COND_FABS)]
7314 SVE_COND_FP_ABS_CMP))]
7315 "TARGET_SVE
7316 && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
7317 && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
7318 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
7319 "&& (!rtx_equal_p (operands[1], operands[5])
7320 || !rtx_equal_p (operands[1], operands[7]))"
7321 {
7322 operands[5] = copy_rtx (operands[1]);
7323 operands[7] = copy_rtx (operands[1]);
7324 }
7325 )
7326
7327 ;; -------------------------------------------------------------------------
7328 ;; ---- [PRED] Select
7329 ;; -------------------------------------------------------------------------
7330 ;; Includes:
7331 ;; - SEL
7332 ;; -------------------------------------------------------------------------
7333
7334 (define_insn "@vcond_mask_<mode><mode>"
7335 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7336 (ior:PRED_ALL
7337 (and:PRED_ALL
7338 (match_operand:PRED_ALL 3 "register_operand" "Upa")
7339 (match_operand:PRED_ALL 1 "register_operand" "Upa"))
7340 (and:PRED_ALL
7341 (not (match_dup 3))
7342 (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
7343 "TARGET_SVE"
7344 "sel\t%0.b, %3, %1.b, %2.b"
7345 )
7346
7347 ;; -------------------------------------------------------------------------
7348 ;; ---- [PRED] Test bits
7349 ;; -------------------------------------------------------------------------
7350 ;; Includes:
7351 ;; - PTEST
7352 ;; -------------------------------------------------------------------------
7353
7354 ;; Branch based on predicate equality or inequality.
7355 (define_expand "cbranch<mode>4"
7356 [(set (pc)
7357 (if_then_else
7358 (match_operator 0 "aarch64_equality_operator"
7359 [(match_operand:PRED_ALL 1 "register_operand")
7360 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
7361 (label_ref (match_operand 3 ""))
7362 (pc)))]
7363 ""
7364 {
7365 rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
7366 rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
7367 rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
7368 rtx pred;
7369 if (operands[2] == CONST0_RTX (<MODE>mode))
7370 pred = operands[1];
7371 else
7372 {
7373 pred = gen_reg_rtx (<MODE>mode);
7374 emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
7375 operands[2]));
7376 }
7377 emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
7378 operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
7379 operands[2] = const0_rtx;
7380 }
7381 )
7382
7383 ;; See "Description of UNSPEC_PTEST" above for details.
7384 (define_insn "aarch64_ptest<mode>"
7385 [(set (reg:CC_NZC CC_REGNUM)
7386 (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
7387 (match_operand 1)
7388 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7389 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
7390 UNSPEC_PTEST))]
7391 "TARGET_SVE"
7392 "ptest\t%0, %3.b"
7393 )
7394
7395 ;; =========================================================================
7396 ;; == Reductions
7397 ;; =========================================================================
7398
7399 ;; -------------------------------------------------------------------------
7400 ;; ---- [INT,FP] Conditional reductions
7401 ;; -------------------------------------------------------------------------
7402 ;; Includes:
7403 ;; - CLASTA
7404 ;; - CLASTB
7405 ;; -------------------------------------------------------------------------
7406
7407 ;; Set operand 0 to the last active element in operand 3, or to tied
7408 ;; operand 1 if no elements are active.
7409 (define_insn "@fold_extract_<last_op>_<mode>"
7410 [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
7411 (unspec:<VEL>
7412 [(match_operand:<VEL> 1 "register_operand" "0, 0")
7413 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
7414 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7415 CLAST))]
7416 "TARGET_SVE"
7417 "@
7418 clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
7419 clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
7420 )
7421
7422 (define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
7423 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7424 (unspec:SVE_FULL
7425 [(match_operand:SVE_FULL 1 "register_operand" "0, w")
7426 (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
7427 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7428 CLAST))]
7429 "TARGET_SVE"
7430 "@
7431 clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
7432 movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>"
7433 )
7434
7435 ;; -------------------------------------------------------------------------
7436 ;; ---- [INT] Tree reductions
7437 ;; -------------------------------------------------------------------------
7438 ;; Includes:
7439 ;; - ANDV
7440 ;; - EORV
7441 ;; - ORV
7442 ;; - SADDV
7443 ;; - SMAXV
7444 ;; - SMINV
7445 ;; - UADDV
7446 ;; - UMAXV
7447 ;; - UMINV
7448 ;; -------------------------------------------------------------------------
7449
7450 ;; Unpredicated integer add reduction.
7451 (define_expand "reduc_plus_scal_<mode>"
7452 [(match_operand:<VEL> 0 "register_operand")
7453 (match_operand:SVE_FULL_I 1 "register_operand")]
7454 "TARGET_SVE"
7455 {
7456 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
7457 rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
7458 emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
7459 if (tmp != operands[0])
7460 emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
7461 DONE;
7462 }
7463 )
7464
7465 ;; Predicated integer add reduction. The result is always 64-bits.
7466 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7467 [(set (match_operand:DI 0 "register_operand" "=w")
7468 (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
7469 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
7470 SVE_INT_ADDV))]
7471 "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
7472 "<su>addv\t%d0, %1, %2.<Vetype>"
7473 )
7474
7475 ;; Unpredicated integer reductions.
7476 (define_expand "reduc_<optab>_scal_<mode>"
7477 [(set (match_operand:<VEL> 0 "register_operand")
7478 (unspec:<VEL> [(match_dup 2)
7479 (match_operand:SVE_FULL_I 1 "register_operand")]
7480 SVE_INT_REDUCTION))]
7481 "TARGET_SVE"
7482 {
7483 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7484 }
7485 )
7486
7487 ;; Predicated integer reductions.
7488 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7489 [(set (match_operand:<VEL> 0 "register_operand" "=w")
7490 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
7491 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
7492 SVE_INT_REDUCTION))]
7493 "TARGET_SVE"
7494 "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
7495 )
7496
7497 ;; -------------------------------------------------------------------------
7498 ;; ---- [FP] Tree reductions
7499 ;; -------------------------------------------------------------------------
7500 ;; Includes:
7501 ;; - FADDV
7502 ;; - FMAXNMV
7503 ;; - FMAXV
7504 ;; - FMINNMV
7505 ;; - FMINV
7506 ;; -------------------------------------------------------------------------
7507
7508 ;; Unpredicated floating-point tree reductions.
7509 (define_expand "reduc_<optab>_scal_<mode>"
7510 [(set (match_operand:<VEL> 0 "register_operand")
7511 (unspec:<VEL> [(match_dup 2)
7512 (match_operand:SVE_FULL_F 1 "register_operand")]
7513 SVE_FP_REDUCTION))]
7514 "TARGET_SVE"
7515 {
7516 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7517 }
7518 )
7519
7520 ;; Predicated floating-point tree reductions.
7521 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7522 [(set (match_operand:<VEL> 0 "register_operand" "=w")
7523 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
7524 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7525 SVE_FP_REDUCTION))]
7526 "TARGET_SVE"
7527 "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
7528 )
7529
7530 ;; -------------------------------------------------------------------------
7531 ;; ---- [FP] Left-to-right reductions
7532 ;; -------------------------------------------------------------------------
7533 ;; Includes:
7534 ;; - FADDA
7535 ;; -------------------------------------------------------------------------
7536
7537 ;; Unpredicated in-order FP reductions.
7538 (define_expand "fold_left_plus_<mode>"
7539 [(set (match_operand:<VEL> 0 "register_operand")
7540 (unspec:<VEL> [(match_dup 3)
7541 (match_operand:<VEL> 1 "register_operand")
7542 (match_operand:SVE_FULL_F 2 "register_operand")]
7543 UNSPEC_FADDA))]
7544 "TARGET_SVE"
7545 {
7546 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
7547 }
7548 )
7549
7550 ;; Predicated in-order FP reductions.
7551 (define_insn "mask_fold_left_plus_<mode>"
7552 [(set (match_operand:<VEL> 0 "register_operand" "=w")
7553 (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
7554 (match_operand:<VEL> 1 "register_operand" "0")
7555 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7556 UNSPEC_FADDA))]
7557 "TARGET_SVE"
7558 "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
7559 )
7560
7561 ;; =========================================================================
7562 ;; == Permutes
7563 ;; =========================================================================
7564
7565 ;; -------------------------------------------------------------------------
7566 ;; ---- [INT,FP] General permutes
7567 ;; -------------------------------------------------------------------------
7568 ;; Includes:
7569 ;; - TBL
7570 ;; -------------------------------------------------------------------------
7571
7572 (define_expand "vec_perm<mode>"
7573 [(match_operand:SVE_FULL 0 "register_operand")
7574 (match_operand:SVE_FULL 1 "register_operand")
7575 (match_operand:SVE_FULL 2 "register_operand")
7576 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
7577 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
7578 {
7579 aarch64_expand_sve_vec_perm (operands[0], operands[1],
7580 operands[2], operands[3]);
7581 DONE;
7582 }
7583 )
7584
7585 (define_insn "@aarch64_sve_tbl<mode>"
7586 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7587 (unspec:SVE_FULL
7588 [(match_operand:SVE_FULL 1 "register_operand" "w")
7589 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
7590 UNSPEC_TBL))]
7591 "TARGET_SVE"
7592 "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7593 )
7594
7595 ;; -------------------------------------------------------------------------
7596 ;; ---- [INT,FP] Special-purpose unary permutes
7597 ;; -------------------------------------------------------------------------
7598 ;; Includes:
7599 ;; - COMPACT
7600 ;; - DUP
7601 ;; - REV
7602 ;; -------------------------------------------------------------------------
7603
7604 ;; Compact active elements and pad with zeros.
7605 (define_insn "@aarch64_sve_compact<mode>"
7606 [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
7607 (unspec:SVE_FULL_SD
7608 [(match_operand:<VPRED> 1 "register_operand" "Upl")
7609 (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
7610 UNSPEC_SVE_COMPACT))]
7611 "TARGET_SVE"
7612 "compact\t%0.<Vetype>, %1, %2.<Vetype>"
7613 )
7614
7615 ;; Duplicate one element of a vector.
7616 (define_insn "@aarch64_sve_dup_lane<mode>"
7617 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7618 (vec_duplicate:SVE_FULL
7619 (vec_select:<VEL>
7620 (match_operand:SVE_FULL 1 "register_operand" "w")
7621 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
7622 "TARGET_SVE
7623 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
7624 "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
7625 )
7626
7627 ;; Use DUP.Q to duplicate a 128-bit segment of a register.
7628 ;;
7629 ;; The vec_select:<V128> sets memory lane number N of the V128 to lane
7630 ;; number op2 + N of op1. (We don't need to distinguish between memory
7631 ;; and architectural register lane numbering for op1 or op0, since the
7632 ;; two numbering schemes are the same for SVE.)
7633 ;;
7634 ;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
7635 ;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
7636 ;; of op0. We therefore get the correct result for both endiannesses.
7637 ;;
7638 ;; The wrinkle is that for big-endian V128 registers, memory lane numbering
7639 ;; is in the opposite order to architectural register lane numbering.
7640 ;; Thus if we were to do this operation via a V128 temporary register,
7641 ;; the vec_select and vec_duplicate would both involve a reverse operation
7642 ;; for big-endian targets. In this fused pattern the two reverses cancel
7643 ;; each other out.
7644 (define_insn "@aarch64_sve_dupq_lane<mode>"
7645 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7646 (vec_duplicate:SVE_FULL
7647 (vec_select:<V128>
7648 (match_operand:SVE_FULL 1 "register_operand" "w")
7649 (match_operand 2 "ascending_int_parallel"))))]
7650 "TARGET_SVE
7651 && (INTVAL (XVECEXP (operands[2], 0, 0))
7652 * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
7653 && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
7654 * GET_MODE_SIZE (<VEL>mode), 0, 63)"
7655 {
7656 unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
7657 * GET_MODE_SIZE (<VEL>mode));
7658 operands[2] = gen_int_mode (byte / 16, DImode);
7659 return "dup\t%0.q, %1.q[%2]";
7660 }
7661 )
7662
7663 ;; Reverse the order of elements within a full vector.
7664 (define_insn "@aarch64_sve_rev<mode>"
7665 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7666 (unspec:SVE_FULL
7667 [(match_operand:SVE_FULL 1 "register_operand" "w")]
7668 UNSPEC_REV))]
7669 "TARGET_SVE"
7670 "rev\t%0.<Vetype>, %1.<Vetype>")
7671
7672 ;; -------------------------------------------------------------------------
7673 ;; ---- [INT,FP] Special-purpose binary permutes
7674 ;; -------------------------------------------------------------------------
7675 ;; Includes:
7676 ;; - SPLICE
7677 ;; - TRN1
7678 ;; - TRN2
7679 ;; - UZP1
7680 ;; - UZP2
7681 ;; - ZIP1
7682 ;; - ZIP2
7683 ;; -------------------------------------------------------------------------
7684
7685 ;; Like EXT, but start at the first active element.
7686 (define_insn "@aarch64_sve_splice<mode>"
7687 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7688 (unspec:SVE_FULL
7689 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7690 (match_operand:SVE_FULL 2 "register_operand" "0, w")
7691 (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7692 UNSPEC_SVE_SPLICE))]
7693 "TARGET_SVE"
7694 "@
7695 splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
7696 movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>"
7697 [(set_attr "movprfx" "*, yes")]
7698 )
7699
7700 ;; Permutes that take half the elements from one vector and half the
7701 ;; elements from the other.
7702 (define_insn "@aarch64_sve_<perm_insn><mode>"
7703 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7704 (unspec:SVE_FULL
7705 [(match_operand:SVE_FULL 1 "register_operand" "w")
7706 (match_operand:SVE_FULL 2 "register_operand" "w")]
7707 PERMUTE))]
7708 "TARGET_SVE"
7709 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7710 )
7711
7712 ;; Apply PERMUTE to 128-bit sequences. The behavior of these patterns
7713 ;; doesn't depend on the mode.
7714 (define_insn "@aarch64_sve_<optab><mode>"
7715 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7716 (unspec:SVE_FULL
7717 [(match_operand:SVE_FULL 1 "register_operand" "w")
7718 (match_operand:SVE_FULL 2 "register_operand" "w")]
7719 PERMUTEQ))]
7720 "TARGET_SVE_F64MM"
7721 "<perm_insn>\t%0.q, %1.q, %2.q"
7722 )
7723
7724 ;; Concatenate two vectors and extract a subvector. Note that the
7725 ;; immediate (third) operand is the lane index not the byte index.
7726 (define_insn "@aarch64_sve_ext<mode>"
7727 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7728 (unspec:SVE_FULL
7729 [(match_operand:SVE_FULL 1 "register_operand" "0, w")
7730 (match_operand:SVE_FULL 2 "register_operand" "w, w")
7731 (match_operand:SI 3 "const_int_operand")]
7732 UNSPEC_EXT))]
7733 "TARGET_SVE
7734 && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
7735 {
7736 operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
7737 return (which_alternative == 0
7738 ? "ext\\t%0.b, %0.b, %2.b, #%3"
7739 : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
7740 }
7741 [(set_attr "movprfx" "*,yes")]
7742 )
7743
7744 ;; -------------------------------------------------------------------------
7745 ;; ---- [PRED] Special-purpose unary permutes
7746 ;; -------------------------------------------------------------------------
7747 ;; Includes:
7748 ;; - REV
7749 ;; -------------------------------------------------------------------------
7750
7751 (define_insn "@aarch64_sve_rev<mode>"
7752 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7753 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
7754 UNSPEC_REV))]
7755 "TARGET_SVE"
7756 "rev\t%0.<Vetype>, %1.<Vetype>")
7757
7758 ;; -------------------------------------------------------------------------
7759 ;; ---- [PRED] Special-purpose binary permutes
7760 ;; -------------------------------------------------------------------------
7761 ;; Includes:
7762 ;; - TRN1
7763 ;; - TRN2
7764 ;; - UZP1
7765 ;; - UZP2
7766 ;; - ZIP1
7767 ;; - ZIP2
7768 ;; -------------------------------------------------------------------------
7769
7770 ;; Permutes that take half the elements from one vector and half the
7771 ;; elements from the other.
7772 (define_insn "@aarch64_sve_<perm_insn><mode>"
7773 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7774 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
7775 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
7776 PERMUTE))]
7777 "TARGET_SVE"
7778 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7779 )
7780
7781 ;; =========================================================================
7782 ;; == Conversions
7783 ;; =========================================================================
7784
7785 ;; -------------------------------------------------------------------------
7786 ;; ---- [INT<-INT] Packs
7787 ;; -------------------------------------------------------------------------
7788 ;; Includes:
7789 ;; - UZP1
7790 ;; -------------------------------------------------------------------------
7791
7792 ;; Integer pack. Use UZP1 on the narrower type, which discards
7793 ;; the high part of each wide element.
7794 (define_insn "vec_pack_trunc_<Vwide>"
7795 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
7796 (unspec:SVE_FULL_BHSI
7797 [(match_operand:<VWIDE> 1 "register_operand" "w")
7798 (match_operand:<VWIDE> 2 "register_operand" "w")]
7799 UNSPEC_PACK))]
7800 "TARGET_SVE"
7801 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7802 )
7803
7804 ;; -------------------------------------------------------------------------
7805 ;; ---- [INT<-INT] Unpacks
7806 ;; -------------------------------------------------------------------------
7807 ;; Includes:
7808 ;; - SUNPKHI
7809 ;; - SUNPKLO
7810 ;; - UUNPKHI
7811 ;; - UUNPKLO
7812 ;; -------------------------------------------------------------------------
7813
7814 ;; Unpack the low or high half of a vector, where "high" refers to
7815 ;; the low-numbered lanes for big-endian and the high-numbered lanes
7816 ;; for little-endian.
7817 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
7818 [(match_operand:<VWIDE> 0 "register_operand")
7819 (unspec:<VWIDE>
7820 [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
7821 "TARGET_SVE"
7822 {
7823 emit_insn ((<hi_lanes_optab>
7824 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
7825 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
7826 (operands[0], operands[1]));
7827 DONE;
7828 }
7829 )
7830
7831 (define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
7832 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
7833 (unspec:<VWIDE>
7834 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
7835 UNPACK))]
7836 "TARGET_SVE"
7837 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
7838 )
7839
7840 ;; -------------------------------------------------------------------------
7841 ;; ---- [INT<-FP] Conversions
7842 ;; -------------------------------------------------------------------------
7843 ;; Includes:
7844 ;; - FCVTZS
7845 ;; - FCVTZU
7846 ;; -------------------------------------------------------------------------
7847
7848 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
7849 ;; SF to SI or DF to DI).
7850 (define_expand "<optab><mode><v_int_equiv>2"
7851 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7852 (unspec:<V_INT_EQUIV>
7853 [(match_dup 2)
7854 (const_int SVE_RELAXED_GP)
7855 (match_operand:SVE_FULL_F 1 "register_operand")]
7856 SVE_COND_FCVTI))]
7857 "TARGET_SVE"
7858 {
7859 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7860 }
7861 )
7862
7863 ;; Predicated float-to-integer conversion, either to the same width or wider.
7864 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7865 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
7866 (unspec:SVE_FULL_HSDI
7867 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
7868 (match_operand:SI 3 "aarch64_sve_gp_strictness")
7869 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7870 SVE_COND_FCVTI))]
7871 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7872 "fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
7873 )
7874
7875 ;; Predicated narrowing float-to-integer conversion.
7876 (define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7877 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w")
7878 (unspec:VNx4SI_ONLY
7879 [(match_operand:VNx2BI 1 "register_operand" "Upl")
7880 (match_operand:SI 3 "aarch64_sve_gp_strictness")
7881 (match_operand:VNx2DF_ONLY 2 "register_operand" "w")]
7882 SVE_COND_FCVTI))]
7883 "TARGET_SVE"
7884 "fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
7885 )
7886
7887 ;; Predicated float-to-integer conversion with merging, either to the same
7888 ;; width or wider.
7889 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7890 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
7891 (unspec:SVE_FULL_HSDI
7892 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
7893 (unspec:SVE_FULL_HSDI
7894 [(match_dup 1)
7895 (const_int SVE_STRICT_GP)
7896 (match_operand:SVE_FULL_F 2 "register_operand")]
7897 SVE_COND_FCVTI)
7898 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
7899 UNSPEC_SEL))]
7900 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7901 )
7902
7903 ;; The first alternative doesn't need the earlyclobber, but the only case
7904 ;; it would help is the uninteresting one in which operands 2 and 3 are
7905 ;; the same register (despite having different modes). Making all the
7906 ;; alternatives earlyclobber makes things more consistent for the
7907 ;; register allocator.
7908 (define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7909 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
7910 (unspec:SVE_FULL_HSDI
7911 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
7912 (unspec:SVE_FULL_HSDI
7913 [(match_operand 4)
7914 (match_operand:SI 5 "aarch64_sve_gp_strictness")
7915 (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
7916 SVE_COND_FCVTI)
7917 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7918 UNSPEC_SEL))]
7919 "TARGET_SVE
7920 && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
7921 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
7922 "@
7923 fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
7924 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
7925 movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
7926 "&& !rtx_equal_p (operands[1], operands[4])"
7927 {
7928 operands[4] = copy_rtx (operands[1]);
7929 }
7930 [(set_attr "movprfx" "*,yes,yes")]
7931 )
7932
7933 ;; Predicated narrowing float-to-integer conversion with merging.
7934 (define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7935 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7936 (unspec:VNx4SI_ONLY
7937 [(match_operand:VNx2BI 1 "register_operand")
7938 (unspec:VNx4SI_ONLY
7939 [(match_dup 1)
7940 (const_int SVE_STRICT_GP)
7941 (match_operand:VNx2DF_ONLY 2 "register_operand")]
7942 SVE_COND_FCVTI)
7943 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
7944 UNSPEC_SEL))]
7945 "TARGET_SVE"
7946 )
7947
7948 (define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7949 [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w")
7950 (unspec:VNx4SI_ONLY
7951 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
7952 (unspec:VNx4SI_ONLY
7953 [(match_dup 1)
7954 (match_operand:SI 4 "aarch64_sve_gp_strictness")
7955 (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")]
7956 SVE_COND_FCVTI)
7957 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7958 UNSPEC_SEL))]
7959 "TARGET_SVE"
7960 "@
7961 fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
7962 movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
7963 movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
7964 [(set_attr "movprfx" "*,yes,yes")]
7965 )
7966
7967 ;; -------------------------------------------------------------------------
7968 ;; ---- [INT<-FP] Packs
7969 ;; -------------------------------------------------------------------------
7970 ;; The patterns in this section are synthetic.
7971 ;; -------------------------------------------------------------------------
7972
7973 ;; Convert two vectors of DF to SI and pack the results into a single vector.
7974 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
7975 [(set (match_dup 4)
7976 (unspec:VNx4SI
7977 [(match_dup 3)
7978 (const_int SVE_RELAXED_GP)
7979 (match_operand:VNx2DF 1 "register_operand")]
7980 SVE_COND_FCVTI))
7981 (set (match_dup 5)
7982 (unspec:VNx4SI
7983 [(match_dup 3)
7984 (const_int SVE_RELAXED_GP)
7985 (match_operand:VNx2DF 2 "register_operand")]
7986 SVE_COND_FCVTI))
7987 (set (match_operand:VNx4SI 0 "register_operand")
7988 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
7989 "TARGET_SVE"
7990 {
7991 operands[3] = aarch64_ptrue_reg (VNx2BImode);
7992 operands[4] = gen_reg_rtx (VNx4SImode);
7993 operands[5] = gen_reg_rtx (VNx4SImode);
7994 }
7995 )
7996
7997 ;; -------------------------------------------------------------------------
7998 ;; ---- [INT<-FP] Unpacks
7999 ;; -------------------------------------------------------------------------
8000 ;; No patterns here yet!
8001 ;; -------------------------------------------------------------------------
8002
8003 ;; -------------------------------------------------------------------------
8004 ;; ---- [FP<-INT] Conversions
8005 ;; -------------------------------------------------------------------------
8006 ;; Includes:
8007 ;; - SCVTF
8008 ;; - UCVTF
8009 ;; -------------------------------------------------------------------------
8010
8011 ;; Unpredicated conversion of integers to floats of the same size
8012 ;; (HI to HF, SI to SF or DI to DF).
8013 (define_expand "<optab><v_int_equiv><mode>2"
8014 [(set (match_operand:SVE_FULL_F 0 "register_operand")
8015 (unspec:SVE_FULL_F
8016 [(match_dup 2)
8017 (const_int SVE_RELAXED_GP)
8018 (match_operand:<V_INT_EQUIV> 1 "register_operand")]
8019 SVE_COND_ICVTF))]
8020 "TARGET_SVE"
8021 {
8022 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8023 }
8024 )
8025
8026 ;; Predicated integer-to-float conversion, either to the same width or
8027 ;; narrower.
8028 (define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
8029 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
8030 (unspec:SVE_FULL_F
8031 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
8032 (match_operand:SI 3 "aarch64_sve_gp_strictness")
8033 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
8034 SVE_COND_ICVTF))]
8035 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8036 "<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
8037 )
8038
8039 ;; Predicated widening integer-to-float conversion.
8040 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
8041 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w")
8042 (unspec:VNx2DF_ONLY
8043 [(match_operand:VNx2BI 1 "register_operand" "Upl")
8044 (match_operand:SI 3 "aarch64_sve_gp_strictness")
8045 (match_operand:VNx4SI_ONLY 2 "register_operand" "w")]
8046 SVE_COND_ICVTF))]
8047 "TARGET_SVE"
8048 "<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
8049 )
8050
8051 ;; Predicated integer-to-float conversion with merging, either to the same
8052 ;; width or narrower.
8053 (define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
8054 [(set (match_operand:SVE_FULL_F 0 "register_operand")
8055 (unspec:SVE_FULL_F
8056 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
8057 (unspec:SVE_FULL_F
8058 [(match_dup 1)
8059 (const_int SVE_STRICT_GP)
8060 (match_operand:SVE_FULL_HSDI 2 "register_operand")]
8061 SVE_COND_ICVTF)
8062 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
8063 UNSPEC_SEL))]
8064 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8065 )
8066
8067 ;; The first alternative doesn't need the earlyclobber, but the only case
8068 ;; it would help is the uninteresting one in which operands 2 and 3 are
8069 ;; the same register (despite having different modes). Making all the
8070 ;; alternatives earlyclobber makes things more consistent for the
8071 ;; register allocator.
8072 (define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
8073 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
8074 (unspec:SVE_FULL_F
8075 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8076 (unspec:SVE_FULL_F
8077 [(match_operand 4)
8078 (match_operand:SI 5 "aarch64_sve_gp_strictness")
8079 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
8080 SVE_COND_ICVTF)
8081 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8082 UNSPEC_SEL))]
8083 "TARGET_SVE
8084 && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
8085 && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
8086 "@
8087 <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
8088 movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
8089 movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
8090 "&& !rtx_equal_p (operands[1], operands[4])"
8091 {
8092 operands[4] = copy_rtx (operands[1]);
8093 }
8094 [(set_attr "movprfx" "*,yes,yes")]
8095 )
8096
8097 ;; Predicated widening integer-to-float conversion with merging.
8098 (define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
8099 [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
8100 (unspec:VNx2DF_ONLY
8101 [(match_operand:VNx2BI 1 "register_operand")
8102 (unspec:VNx2DF_ONLY
8103 [(match_dup 1)
8104 (const_int SVE_STRICT_GP)
8105 (match_operand:VNx4SI_ONLY 2 "register_operand")]
8106 SVE_COND_ICVTF)
8107 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
8108 UNSPEC_SEL))]
8109 "TARGET_SVE"
8110 )
8111
8112 (define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
8113 [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
8114 (unspec:VNx2DF_ONLY
8115 [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
8116 (unspec:VNx2DF_ONLY
8117 [(match_dup 1)
8118 (match_operand:SI 4 "aarch64_sve_gp_strictness")
8119 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
8120 SVE_COND_ICVTF)
8121 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8122 UNSPEC_SEL))]
8123 "TARGET_SVE"
8124 "@
8125 <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
8126 movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
8127 movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
8128 [(set_attr "movprfx" "*,yes,yes")]
8129 )
8130
8131 ;; -------------------------------------------------------------------------
8132 ;; ---- [FP<-INT] Packs
8133 ;; -------------------------------------------------------------------------
8134 ;; No patterns here yet!
8135 ;; -------------------------------------------------------------------------
8136
8137 ;; -------------------------------------------------------------------------
8138 ;; ---- [FP<-INT] Unpacks
8139 ;; -------------------------------------------------------------------------
8140 ;; The patterns in this section are synthetic.
8141 ;; -------------------------------------------------------------------------
8142
8143 ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
8144 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
8145 ;; unpacked VNx4SI to VNx2DF.
8146 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
8147 [(match_operand:VNx2DF 0 "register_operand")
8148 (FLOATUORS:VNx2DF
8149 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
8150 UNPACK_UNSIGNED))]
8151 "TARGET_SVE"
8152 {
8153 /* Use ZIP to do the unpack, since we don't care about the upper halves
8154 and since it has the nice property of not needing any subregs.
8155 If using UUNPK* turns out to be preferable, we could model it as
8156 a ZIP whose first operand is zero. */
8157 rtx temp = gen_reg_rtx (VNx4SImode);
8158 emit_insn ((<hi_lanes_optab>
8159 ? gen_aarch64_sve_zip2vnx4si
8160 : gen_aarch64_sve_zip1vnx4si)
8161 (temp, operands[1], operands[1]));
8162 rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
8163 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
8164 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
8165 (operands[0], ptrue, temp, strictness));
8166 DONE;
8167 }
8168 )
8169
8170 ;; -------------------------------------------------------------------------
8171 ;; ---- [FP<-FP] Packs
8172 ;; -------------------------------------------------------------------------
8173 ;; Includes:
8174 ;; - FCVT
8175 ;; -------------------------------------------------------------------------
8176
8177 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
8178 ;; the results into a single vector.
8179 (define_expand "vec_pack_trunc_<Vwide>"
8180 [(set (match_dup 4)
8181 (unspec:SVE_FULL_HSF
8182 [(match_dup 3)
8183 (const_int SVE_RELAXED_GP)
8184 (match_operand:<VWIDE> 1 "register_operand")]
8185 UNSPEC_COND_FCVT))
8186 (set (match_dup 5)
8187 (unspec:SVE_FULL_HSF
8188 [(match_dup 3)
8189 (const_int SVE_RELAXED_GP)
8190 (match_operand:<VWIDE> 2 "register_operand")]
8191 UNSPEC_COND_FCVT))
8192 (set (match_operand:SVE_FULL_HSF 0 "register_operand")
8193 (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
8194 "TARGET_SVE"
8195 {
8196 operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
8197 operands[4] = gen_reg_rtx (<MODE>mode);
8198 operands[5] = gen_reg_rtx (<MODE>mode);
8199 }
8200 )
8201
8202 ;; Predicated float-to-float truncation.
8203 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
8204 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
8205 (unspec:SVE_FULL_HSF
8206 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl")
8207 (match_operand:SI 3 "aarch64_sve_gp_strictness")
8208 (match_operand:SVE_FULL_SDF 2 "register_operand" "w")]
8209 SVE_COND_FCVT))]
8210 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8211 "fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
8212 )
8213
8214 ;; Predicated float-to-float truncation with merging.
8215 (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
8216 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
8217 (unspec:SVE_FULL_HSF
8218 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
8219 (unspec:SVE_FULL_HSF
8220 [(match_dup 1)
8221 (const_int SVE_STRICT_GP)
8222 (match_operand:SVE_FULL_SDF 2 "register_operand")]
8223 SVE_COND_FCVT)
8224 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
8225 UNSPEC_SEL))]
8226 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8227 )
8228
8229 (define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
8230 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w")
8231 (unspec:SVE_FULL_HSF
8232 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8233 (unspec:SVE_FULL_HSF
8234 [(match_dup 1)
8235 (match_operand:SI 4 "aarch64_sve_gp_strictness")
8236 (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")]
8237 SVE_COND_FCVT)
8238 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8239 UNSPEC_SEL))]
8240 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8241 "@
8242 fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
8243 movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
8244 movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
8245 [(set_attr "movprfx" "*,yes,yes")]
8246 )
8247
8248 ;; -------------------------------------------------------------------------
8249 ;; ---- [FP<-FP] Packs (bfloat16)
8250 ;; -------------------------------------------------------------------------
8251 ;; Includes:
8252 ;; - BFCVT (BF16)
8253 ;; - BFCVTNT (BF16)
8254 ;; -------------------------------------------------------------------------
8255
8256 ;; Predicated BFCVT.
8257 (define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
8258 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
8259 (unspec:VNx8BF_ONLY
8260 [(match_operand:VNx4BI 1 "register_operand" "Upl")
8261 (match_operand:SI 3 "aarch64_sve_gp_strictness")
8262 (match_operand:VNx4SF_ONLY 2 "register_operand" "w")]
8263 SVE_COND_FCVT))]
8264 "TARGET_SVE_BF16"
8265 "bfcvt\t%0.h, %1/m, %2.s"
8266 )
8267
8268 ;; Predicated BFCVT with merging.
8269 (define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
8270 [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
8271 (unspec:VNx8BF_ONLY
8272 [(match_operand:VNx4BI 1 "register_operand")
8273 (unspec:VNx8BF_ONLY
8274 [(match_dup 1)
8275 (const_int SVE_STRICT_GP)
8276 (match_operand:VNx4SF_ONLY 2 "register_operand")]
8277 SVE_COND_FCVT)
8278 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
8279 UNSPEC_SEL))]
8280 "TARGET_SVE_BF16"
8281 )
8282
8283 (define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
8284 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
8285 (unspec:VNx8BF_ONLY
8286 [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl")
8287 (unspec:VNx8BF_ONLY
8288 [(match_dup 1)
8289 (match_operand:SI 4 "aarch64_sve_gp_strictness")
8290 (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")]
8291 SVE_COND_FCVT)
8292 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8293 UNSPEC_SEL))]
8294 "TARGET_SVE_BF16"
8295 "@
8296 bfcvt\t%0.h, %1/m, %2.s
8297 movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
8298 movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s"
8299 [(set_attr "movprfx" "*,yes,yes")]
8300 )
8301
8302 ;; Predicated BFCVTNT. This doesn't give a natural aarch64_pred_*/cond_*
8303 ;; pair because the even elements always have to be supplied for active
8304 ;; elements, even if the inactive elements don't matter.
8305 ;;
8306 ;; This instructions does not take MOVPRFX.
8307 (define_insn "@aarch64_sve_cvtnt<mode>"
8308 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
8309 (unspec:VNx8BF_ONLY
8310 [(match_operand:VNx4BI 2 "register_operand" "Upl")
8311 (const_int SVE_STRICT_GP)
8312 (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
8313 (match_operand:VNx4SF 3 "register_operand" "w")]
8314 UNSPEC_COND_FCVTNT))]
8315 "TARGET_SVE_BF16"
8316 "bfcvtnt\t%0.h, %2/m, %3.s"
8317 )
8318
8319 ;; -------------------------------------------------------------------------
8320 ;; ---- [FP<-FP] Unpacks
8321 ;; -------------------------------------------------------------------------
8322 ;; Includes:
8323 ;; - FCVT
8324 ;; -------------------------------------------------------------------------
8325
8326 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
8327 ;; First unpack the source without conversion, then float-convert the
8328 ;; unpacked source.
8329 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
8330 [(match_operand:<VWIDE> 0 "register_operand")
8331 (unspec:SVE_FULL_HSF
8332 [(match_operand:SVE_FULL_HSF 1 "register_operand")]
8333 UNPACK_UNSIGNED)]
8334 "TARGET_SVE"
8335 {
8336 /* Use ZIP to do the unpack, since we don't care about the upper halves
8337 and since it has the nice property of not needing any subregs.
8338 If using UUNPK* turns out to be preferable, we could model it as
8339 a ZIP whose first operand is zero. */
8340 rtx temp = gen_reg_rtx (<MODE>mode);
8341 emit_insn ((<hi_lanes_optab>
8342 ? gen_aarch64_sve_zip2<mode>
8343 : gen_aarch64_sve_zip1<mode>)
8344 (temp, operands[1], operands[1]));
8345 rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
8346 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
8347 emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
8348 (operands[0], ptrue, temp, strictness));
8349 DONE;
8350 }
8351 )
8352
8353 ;; Predicated float-to-float extension.
8354 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8355 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
8356 (unspec:SVE_FULL_SDF
8357 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl")
8358 (match_operand:SI 3 "aarch64_sve_gp_strictness")
8359 (match_operand:SVE_FULL_HSF 2 "register_operand" "w")]
8360 SVE_COND_FCVT))]
8361 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8362 "fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
8363 )
8364
8365 ;; Predicated float-to-float extension with merging.
8366 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8367 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
8368 (unspec:SVE_FULL_SDF
8369 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
8370 (unspec:SVE_FULL_SDF
8371 [(match_dup 1)
8372 (const_int SVE_STRICT_GP)
8373 (match_operand:SVE_FULL_HSF 2 "register_operand")]
8374 SVE_COND_FCVT)
8375 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
8376 UNSPEC_SEL))]
8377 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8378 )
8379
8380 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8381 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w")
8382 (unspec:SVE_FULL_SDF
8383 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8384 (unspec:SVE_FULL_SDF
8385 [(match_dup 1)
8386 (match_operand:SI 4 "aarch64_sve_gp_strictness")
8387 (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")]
8388 SVE_COND_FCVT)
8389 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8390 UNSPEC_SEL))]
8391 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8392 "@
8393 fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
8394 movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
8395 movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
8396 [(set_attr "movprfx" "*,yes,yes")]
8397 )
8398
8399 ;; -------------------------------------------------------------------------
8400 ;; ---- [PRED<-PRED] Packs
8401 ;; -------------------------------------------------------------------------
8402 ;; Includes:
8403 ;; - UZP1
8404 ;; -------------------------------------------------------------------------
8405
8406 ;; Predicate pack. Use UZP1 on the narrower type, which discards
8407 ;; the high part of each wide element.
8408 (define_insn "vec_pack_trunc_<Vwide>"
8409 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
8410 (unspec:PRED_BHS
8411 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
8412 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
8413 UNSPEC_PACK))]
8414 "TARGET_SVE"
8415 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8416 )
8417
8418 ;; -------------------------------------------------------------------------
8419 ;; ---- [PRED<-PRED] Unpacks
8420 ;; -------------------------------------------------------------------------
8421 ;; Includes:
8422 ;; - PUNPKHI
8423 ;; - PUNPKLO
8424 ;; -------------------------------------------------------------------------
8425
8426 ;; Unpack the low or high half of a predicate, where "high" refers to
8427 ;; the low-numbered lanes for big-endian and the high-numbered lanes
8428 ;; for little-endian.
8429 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
8430 [(match_operand:<VWIDE> 0 "register_operand")
8431 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
8432 UNPACK)]
8433 "TARGET_SVE"
8434 {
8435 emit_insn ((<hi_lanes_optab>
8436 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
8437 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
8438 (operands[0], operands[1]));
8439 DONE;
8440 }
8441 )
8442
8443 (define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
8444 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
8445 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
8446 UNPACK_UNSIGNED))]
8447 "TARGET_SVE"
8448 "punpk<perm_hilo>\t%0.h, %1.b"
8449 )
8450
8451 ;; =========================================================================
8452 ;; == Vector partitioning
8453 ;; =========================================================================
8454
8455 ;; -------------------------------------------------------------------------
8456 ;; ---- [PRED] Unary partitioning
8457 ;; -------------------------------------------------------------------------
8458 ;; Includes:
8459 ;; - BRKA
8460 ;; - BRKAS
8461 ;; - BRKB
8462 ;; - BRKBS
8463 ;; -------------------------------------------------------------------------
8464
8465 ;; Note that unlike most other instructions that have both merging and
8466 ;; zeroing forms, these instructions don't operate elementwise and so
8467 ;; don't fit the IFN_COND model.
8468 (define_insn "@aarch64_brk<brk_op>"
8469 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
8470 (unspec:VNx16BI
8471 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8472 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8473 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8474 SVE_BRK_UNARY))]
8475 "TARGET_SVE"
8476 "@
8477 brk<brk_op>\t%0.b, %1/z, %2.b
8478 brk<brk_op>\t%0.b, %1/m, %2.b"
8479 )
8480
8481 ;; Same, but also producing a flags result.
8482 (define_insn "*aarch64_brk<brk_op>_cc"
8483 [(set (reg:CC_NZC CC_REGNUM)
8484 (unspec:CC_NZC
8485 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8486 (match_dup 1)
8487 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8488 (unspec:VNx16BI
8489 [(match_dup 1)
8490 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8491 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8492 SVE_BRK_UNARY)]
8493 UNSPEC_PTEST))
8494 (set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
8495 (unspec:VNx16BI
8496 [(match_dup 1)
8497 (match_dup 2)
8498 (match_dup 3)]
8499 SVE_BRK_UNARY))]
8500 "TARGET_SVE"
8501 "@
8502 brk<brk_op>s\t%0.b, %1/z, %2.b
8503 brk<brk_op>s\t%0.b, %1/m, %2.b"
8504 )
8505
8506 ;; Same, but with only the flags result being interesting.
8507 (define_insn "*aarch64_brk<brk_op>_ptest"
8508 [(set (reg:CC_NZC CC_REGNUM)
8509 (unspec:CC_NZC
8510 [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8511 (match_dup 1)
8512 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8513 (unspec:VNx16BI
8514 [(match_dup 1)
8515 (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8516 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8517 SVE_BRK_UNARY)]
8518 UNSPEC_PTEST))
8519 (clobber (match_scratch:VNx16BI 0 "=Upa, Upa"))]
8520 "TARGET_SVE"
8521 "@
8522 brk<brk_op>s\t%0.b, %1/z, %2.b
8523 brk<brk_op>s\t%0.b, %1/m, %2.b"
8524 )
8525
8526 ;; -------------------------------------------------------------------------
8527 ;; ---- [PRED] Binary partitioning
8528 ;; -------------------------------------------------------------------------
8529 ;; Includes:
8530 ;; - BRKN
8531 ;; - BRKNS
8532 ;; - BRKPA
8533 ;; - BRKPAS
8534 ;; - BRKPB
8535 ;; - BRKPBS
8536 ;; -------------------------------------------------------------------------
8537
8538 ;; Binary BRKs (BRKN, BRKPA, BRKPB).
8539 (define_insn "@aarch64_brk<brk_op>"
8540 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8541 (unspec:VNx16BI
8542 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8543 (match_operand:VNx16BI 2 "register_operand" "Upa")
8544 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8545 SVE_BRK_BINARY))]
8546 "TARGET_SVE"
8547 "brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8548 )
8549
8550 ;; Same, but also producing a flags result.
8551 (define_insn "*aarch64_brk<brk_op>_cc"
8552 [(set (reg:CC_NZC CC_REGNUM)
8553 (unspec:CC_NZC
8554 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8555 (match_dup 1)
8556 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8557 (unspec:VNx16BI
8558 [(match_dup 1)
8559 (match_operand:VNx16BI 2 "register_operand" "Upa")
8560 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8561 SVE_BRK_BINARY)]
8562 UNSPEC_PTEST))
8563 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8564 (unspec:VNx16BI
8565 [(match_dup 1)
8566 (match_dup 2)
8567 (match_dup 3)]
8568 SVE_BRK_BINARY))]
8569 "TARGET_SVE"
8570 "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8571 )
8572
8573 ;; Same, but with only the flags result being interesting.
8574 (define_insn "*aarch64_brk<brk_op>_ptest"
8575 [(set (reg:CC_NZC CC_REGNUM)
8576 (unspec:CC_NZC
8577 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8578 (match_dup 1)
8579 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8580 (unspec:VNx16BI
8581 [(match_dup 1)
8582 (match_operand:VNx16BI 2 "register_operand" "Upa")
8583 (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8584 SVE_BRK_BINARY)]
8585 UNSPEC_PTEST))
8586 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
8587 "TARGET_SVE"
8588 "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8589 )
8590
8591 ;; -------------------------------------------------------------------------
8592 ;; ---- [PRED] Scalarization
8593 ;; -------------------------------------------------------------------------
8594 ;; Includes:
8595 ;; - PFIRST
8596 ;; - PNEXT
8597 ;; -------------------------------------------------------------------------
8598
8599 (define_insn "@aarch64_sve_<sve_pred_op><mode>"
8600 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8601 (unspec:PRED_ALL
8602 [(match_operand:PRED_ALL 1 "register_operand" "Upa")
8603 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8604 (match_operand:PRED_ALL 3 "register_operand" "0")]
8605 SVE_PITER))
8606 (clobber (reg:CC_NZC CC_REGNUM))]
8607 "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
8608 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8609 )
8610
8611 ;; Same, but also producing a flags result.
8612 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
8613 [(set (reg:CC_NZC CC_REGNUM)
8614 (unspec:CC_NZC
8615 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8616 (match_operand 2)
8617 (match_operand:SI 3 "aarch64_sve_ptrue_flag")
8618 (unspec:PRED_ALL
8619 [(match_operand 4)
8620 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8621 (match_operand:PRED_ALL 6 "register_operand" "0")]
8622 SVE_PITER)]
8623 UNSPEC_PTEST))
8624 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8625 (unspec:PRED_ALL
8626 [(match_dup 4)
8627 (match_dup 5)
8628 (match_dup 6)]
8629 SVE_PITER))]
8630 "TARGET_SVE
8631 && <max_elem_bits> >= <elem_bits>
8632 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
8633 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8634 "&& !rtx_equal_p (operands[2], operands[4])"
8635 {
8636 operands[4] = operands[2];
8637 operands[5] = operands[3];
8638 }
8639 )
8640
8641 ;; Same, but with only the flags result being interesting.
8642 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
8643 [(set (reg:CC_NZC CC_REGNUM)
8644 (unspec:CC_NZC
8645 [(match_operand:VNx16BI 1 "register_operand" "Upa")
8646 (match_operand 2)
8647 (match_operand:SI 3 "aarch64_sve_ptrue_flag")
8648 (unspec:PRED_ALL
8649 [(match_operand 4)
8650 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8651 (match_operand:PRED_ALL 6 "register_operand" "0")]
8652 SVE_PITER)]
8653 UNSPEC_PTEST))
8654 (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
8655 "TARGET_SVE
8656 && <max_elem_bits> >= <elem_bits>
8657 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
8658 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8659 "&& !rtx_equal_p (operands[2], operands[4])"
8660 {
8661 operands[4] = operands[2];
8662 operands[5] = operands[3];
8663 }
8664 )
8665
8666 ;; =========================================================================
8667 ;; == Counting elements
8668 ;; =========================================================================
8669
8670 ;; -------------------------------------------------------------------------
8671 ;; ---- [INT] Count elements in a pattern (scalar)
8672 ;; -------------------------------------------------------------------------
8673 ;; Includes:
8674 ;; - CNTB
8675 ;; - CNTD
8676 ;; - CNTH
8677 ;; - CNTW
8678 ;; -------------------------------------------------------------------------
8679
8680 ;; Count the number of elements in an svpattern. Operand 1 is the pattern,
8681 ;; operand 2 is the number of elements that fit in a 128-bit block, and
8682 ;; operand 3 is a multiplier in the range [1, 16].
8683 ;;
8684 ;; Note that this pattern isn't used for SV_ALL (but would work for that too).
8685 (define_insn "aarch64_sve_cnt_pat"
8686 [(set (match_operand:DI 0 "register_operand" "=r")
8687 (zero_extend:DI
8688 (unspec:SI [(match_operand:DI 1 "const_int_operand")
8689 (match_operand:DI 2 "const_int_operand")
8690 (match_operand:DI 3 "const_int_operand")]
8691 UNSPEC_SVE_CNT_PAT)))]
8692 "TARGET_SVE"
8693 {
8694 return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
8695 }
8696 )
8697
8698 ;; -------------------------------------------------------------------------
8699 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
8700 ;; -------------------------------------------------------------------------
8701 ;; Includes:
8702 ;; - INC
8703 ;; - SQINC
8704 ;; - UQINC
8705 ;; -------------------------------------------------------------------------
8706
8707 ;; Increment a DImode register by the number of elements in an svpattern.
8708 ;; See aarch64_sve_cnt_pat for the counting behavior.
8709 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8710 [(set (match_operand:DI 0 "register_operand" "=r")
8711 (ANY_PLUS:DI (zero_extend:DI
8712 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8713 (match_operand:DI 3 "const_int_operand")
8714 (match_operand:DI 4 "const_int_operand")]
8715 UNSPEC_SVE_CNT_PAT))
8716 (match_operand:DI_ONLY 1 "register_operand" "0")))]
8717 "TARGET_SVE"
8718 {
8719 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
8720 operands + 2);
8721 }
8722 )
8723
8724 ;; Increment an SImode register by the number of elements in an svpattern
8725 ;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting
8726 ;; behavior.
8727 (define_insn "*aarch64_sve_incsi_pat"
8728 [(set (match_operand:SI 0 "register_operand" "=r")
8729 (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
8730 (match_operand:DI 3 "const_int_operand")
8731 (match_operand:DI 4 "const_int_operand")]
8732 UNSPEC_SVE_CNT_PAT)
8733 (match_operand:SI 1 "register_operand" "0")))]
8734 "TARGET_SVE"
8735 {
8736 return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
8737 }
8738 )
8739
8740 ;; Increment an SImode register by the number of elements in an svpattern
8741 ;; using saturating arithmetic, extending the result to 64 bits.
8742 ;;
8743 ;; See aarch64_sve_cnt_pat for the counting behavior.
8744 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8745 [(set (match_operand:DI 0 "register_operand" "=r")
8746 (<paired_extend>:DI
8747 (SAT_PLUS:SI
8748 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8749 (match_operand:DI 3 "const_int_operand")
8750 (match_operand:DI 4 "const_int_operand")]
8751 UNSPEC_SVE_CNT_PAT)
8752 (match_operand:SI_ONLY 1 "register_operand" "0"))))]
8753 "TARGET_SVE"
8754 {
8755 const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
8756 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
8757 operands + 2);
8758 }
8759 )
8760
8761 ;; -------------------------------------------------------------------------
8762 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
8763 ;; -------------------------------------------------------------------------
8764 ;; Includes:
8765 ;; - INC
8766 ;; - SQINC
8767 ;; - UQINC
8768 ;; -------------------------------------------------------------------------
8769
8770 ;; Increment a vector of DIs by the number of elements in an svpattern.
8771 ;; See aarch64_sve_cnt_pat for the counting behavior.
8772 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8773 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8774 (ANY_PLUS:VNx2DI
8775 (vec_duplicate:VNx2DI
8776 (zero_extend:DI
8777 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8778 (match_operand:DI 3 "const_int_operand")
8779 (match_operand:DI 4 "const_int_operand")]
8780 UNSPEC_SVE_CNT_PAT)))
8781 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
8782 "TARGET_SVE"
8783 {
8784 if (which_alternative == 1)
8785 output_asm_insn ("movprfx\t%0, %1", operands);
8786 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8787 operands + 2);
8788 }
8789 [(set_attr "movprfx" "*,yes")]
8790 )
8791
8792 ;; Increment a vector of SIs by the number of elements in an svpattern.
8793 ;; See aarch64_sve_cnt_pat for the counting behavior.
8794 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8795 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8796 (ANY_PLUS:VNx4SI
8797 (vec_duplicate:VNx4SI
8798 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8799 (match_operand:DI 3 "const_int_operand")
8800 (match_operand:DI 4 "const_int_operand")]
8801 UNSPEC_SVE_CNT_PAT))
8802 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
8803 "TARGET_SVE"
8804 {
8805 if (which_alternative == 1)
8806 output_asm_insn ("movprfx\t%0, %1", operands);
8807 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8808 operands + 2);
8809 }
8810 [(set_attr "movprfx" "*,yes")]
8811 )
8812
8813 ;; Increment a vector of HIs by the number of elements in an svpattern.
8814 ;; See aarch64_sve_cnt_pat for the counting behavior.
8815 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
8816 [(set (match_operand:VNx8HI 0 "register_operand")
8817 (ANY_PLUS:VNx8HI
8818 (vec_duplicate:VNx8HI
8819 (truncate:HI
8820 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8821 (match_operand:DI 3 "const_int_operand")
8822 (match_operand:DI 4 "const_int_operand")]
8823 UNSPEC_SVE_CNT_PAT)))
8824 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
8825 "TARGET_SVE"
8826 )
8827
8828 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
8829 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8830 (ANY_PLUS:VNx8HI
8831 (vec_duplicate:VNx8HI
8832 (match_operator:HI 5 "subreg_lowpart_operator"
8833 [(unspec:SI [(match_operand:DI 2 "const_int_operand")
8834 (match_operand:DI 3 "const_int_operand")
8835 (match_operand:DI 4 "const_int_operand")]
8836 UNSPEC_SVE_CNT_PAT)]))
8837 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
8838 "TARGET_SVE"
8839 {
8840 if (which_alternative == 1)
8841 output_asm_insn ("movprfx\t%0, %1", operands);
8842 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8843 operands + 2);
8844 }
8845 [(set_attr "movprfx" "*,yes")]
8846 )
8847
8848 ;; -------------------------------------------------------------------------
8849 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
8850 ;; -------------------------------------------------------------------------
8851 ;; Includes:
8852 ;; - DEC
8853 ;; - SQDEC
8854 ;; - UQDEC
8855 ;; -------------------------------------------------------------------------
8856
8857 ;; Decrement a DImode register by the number of elements in an svpattern.
8858 ;; See aarch64_sve_cnt_pat for the counting behavior.
8859 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8860 [(set (match_operand:DI 0 "register_operand" "=r")
8861 (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
8862 (zero_extend:DI
8863 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8864 (match_operand:DI 3 "const_int_operand")
8865 (match_operand:DI 4 "const_int_operand")]
8866 UNSPEC_SVE_CNT_PAT))))]
8867 "TARGET_SVE"
8868 {
8869 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
8870 operands + 2);
8871 }
8872 )
8873
8874 ;; Decrement an SImode register by the number of elements in an svpattern
8875 ;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting
8876 ;; behavior.
8877 (define_insn "*aarch64_sve_decsi_pat"
8878 [(set (match_operand:SI 0 "register_operand" "=r")
8879 (minus:SI (match_operand:SI 1 "register_operand" "0")
8880 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8881 (match_operand:DI 3 "const_int_operand")
8882 (match_operand:DI 4 "const_int_operand")]
8883 UNSPEC_SVE_CNT_PAT)))]
8884 "TARGET_SVE"
8885 {
8886 return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
8887 }
8888 )
8889
8890 ;; Decrement an SImode register by the number of elements in an svpattern
8891 ;; using saturating arithmetic, extending the result to 64 bits.
8892 ;;
8893 ;; See aarch64_sve_cnt_pat for the counting behavior.
8894 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8895 [(set (match_operand:DI 0 "register_operand" "=r")
8896 (<paired_extend>:DI
8897 (SAT_MINUS:SI
8898 (match_operand:SI_ONLY 1 "register_operand" "0")
8899 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8900 (match_operand:DI 3 "const_int_operand")
8901 (match_operand:DI 4 "const_int_operand")]
8902 UNSPEC_SVE_CNT_PAT))))]
8903 "TARGET_SVE"
8904 {
8905 const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
8906 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
8907 operands + 2);
8908 }
8909 )
8910
8911 ;; -------------------------------------------------------------------------
8912 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
8913 ;; -------------------------------------------------------------------------
8914 ;; Includes:
8915 ;; - DEC
8916 ;; - SQDEC
8917 ;; - UQDEC
8918 ;; -------------------------------------------------------------------------
8919
8920 ;; Decrement a vector of DIs by the number of elements in an svpattern.
8921 ;; See aarch64_sve_cnt_pat for the counting behavior.
8922 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8923 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8924 (ANY_MINUS:VNx2DI
8925 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
8926 (vec_duplicate:VNx2DI
8927 (zero_extend:DI
8928 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8929 (match_operand:DI 3 "const_int_operand")
8930 (match_operand:DI 4 "const_int_operand")]
8931 UNSPEC_SVE_CNT_PAT)))))]
8932 "TARGET_SVE"
8933 {
8934 if (which_alternative == 1)
8935 output_asm_insn ("movprfx\t%0, %1", operands);
8936 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8937 operands + 2);
8938 }
8939 [(set_attr "movprfx" "*,yes")]
8940 )
8941
8942 ;; Decrement a vector of SIs by the number of elements in an svpattern.
8943 ;; See aarch64_sve_cnt_pat for the counting behavior.
8944 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8945 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8946 (ANY_MINUS:VNx4SI
8947 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
8948 (vec_duplicate:VNx4SI
8949 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8950 (match_operand:DI 3 "const_int_operand")
8951 (match_operand:DI 4 "const_int_operand")]
8952 UNSPEC_SVE_CNT_PAT))))]
8953 "TARGET_SVE"
8954 {
8955 if (which_alternative == 1)
8956 output_asm_insn ("movprfx\t%0, %1", operands);
8957 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8958 operands + 2);
8959 }
8960 [(set_attr "movprfx" "*,yes")]
8961 )
8962
8963 ;; Decrement a vector of HIs by the number of elements in an svpattern.
8964 ;; See aarch64_sve_cnt_pat for the counting behavior.
8965 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
8966 [(set (match_operand:VNx8HI 0 "register_operand")
8967 (ANY_MINUS:VNx8HI
8968 (match_operand:VNx8HI_ONLY 1 "register_operand")
8969 (vec_duplicate:VNx8HI
8970 (truncate:HI
8971 (unspec:SI [(match_operand:DI 2 "const_int_operand")
8972 (match_operand:DI 3 "const_int_operand")
8973 (match_operand:DI 4 "const_int_operand")]
8974 UNSPEC_SVE_CNT_PAT)))))]
8975 "TARGET_SVE"
8976 )
8977
8978 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
8979 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8980 (ANY_MINUS:VNx8HI
8981 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
8982 (vec_duplicate:VNx8HI
8983 (match_operator:HI 5 "subreg_lowpart_operator"
8984 [(unspec:SI [(match_operand:DI 2 "const_int_operand")
8985 (match_operand:DI 3 "const_int_operand")
8986 (match_operand:DI 4 "const_int_operand")]
8987 UNSPEC_SVE_CNT_PAT)]))))]
8988 "TARGET_SVE"
8989 {
8990 if (which_alternative == 1)
8991 output_asm_insn ("movprfx\t%0, %1", operands);
8992 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8993 operands + 2);
8994 }
8995 [(set_attr "movprfx" "*,yes")]
8996 )
8997
8998 ;; -------------------------------------------------------------------------
8999 ;; ---- [INT] Count elements in a predicate (scalar)
9000 ;; -------------------------------------------------------------------------
9001 ;; Includes:
9002 ;; - CNTP
9003 ;; -------------------------------------------------------------------------
9004
9005 ;; Count the number of set bits in a predicate. Operand 3 is true if
9006 ;; operand 1 is known to be all-true.
9007 (define_insn "@aarch64_pred_cntp<mode>"
9008 [(set (match_operand:DI 0 "register_operand" "=r")
9009 (zero_extend:DI
9010 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
9011 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
9012 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
9013 UNSPEC_CNTP)))]
9014 "TARGET_SVE"
9015 "cntp\t%x0, %1, %3.<Vetype>")
9016
9017 ;; -------------------------------------------------------------------------
9018 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
9019 ;; -------------------------------------------------------------------------
9020 ;; Includes:
9021 ;; - INCP
9022 ;; - SQINCP
9023 ;; - UQINCP
9024 ;; -------------------------------------------------------------------------
9025
9026 ;; Increment a DImode register by the number of set bits in a predicate.
9027 ;; See aarch64_sve_cntp for a description of the operands.
9028 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
9029 [(set (match_operand:DI 0 "register_operand")
9030 (ANY_PLUS:DI
9031 (zero_extend:DI
9032 (unspec:SI [(match_dup 3)
9033 (const_int SVE_KNOWN_PTRUE)
9034 (match_operand:PRED_ALL 2 "register_operand")]
9035 UNSPEC_CNTP))
9036 (match_operand:DI_ONLY 1 "register_operand")))]
9037 "TARGET_SVE"
9038 {
9039 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9040 }
9041 )
9042
9043 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
9044 [(set (match_operand:DI 0 "register_operand" "=r")
9045 (ANY_PLUS:DI
9046 (zero_extend:DI
9047 (unspec:SI [(match_operand 3)
9048 (const_int SVE_KNOWN_PTRUE)
9049 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9050 UNSPEC_CNTP))
9051 (match_operand:DI_ONLY 1 "register_operand" "0")))]
9052 "TARGET_SVE"
9053 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
9054 "&& !CONSTANT_P (operands[3])"
9055 {
9056 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9057 }
9058 )
9059
9060 ;; Increment an SImode register by the number of set bits in a predicate
9061 ;; using modular arithmetic. See aarch64_sve_cntp for a description of
9062 ;; the operands.
9063 (define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
9064 [(set (match_operand:SI 0 "register_operand" "=r")
9065 (plus:SI
9066 (unspec:SI [(match_operand 3)
9067 (const_int SVE_KNOWN_PTRUE)
9068 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9069 UNSPEC_CNTP)
9070 (match_operand:SI 1 "register_operand" "0")))]
9071 "TARGET_SVE"
9072 "incp\t%x0, %2.<Vetype>"
9073 "&& !CONSTANT_P (operands[3])"
9074 {
9075 operands[3] = CONSTM1_RTX (<MODE>mode);
9076 }
9077 )
9078
9079 ;; Increment an SImode register by the number of set bits in a predicate
9080 ;; using saturating arithmetic, extending the result to 64 bits.
9081 ;;
9082 ;; See aarch64_sve_cntp for a description of the operands.
9083 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9084 [(set (match_operand:DI 0 "register_operand")
9085 (<paired_extend>:DI
9086 (SAT_PLUS:SI
9087 (unspec:SI [(match_dup 3)
9088 (const_int SVE_KNOWN_PTRUE)
9089 (match_operand:PRED_ALL 2 "register_operand")]
9090 UNSPEC_CNTP)
9091 (match_operand:SI_ONLY 1 "register_operand"))))]
9092 "TARGET_SVE"
9093 {
9094 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9095 }
9096 )
9097
9098 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9099 [(set (match_operand:DI 0 "register_operand" "=r")
9100 (<paired_extend>:DI
9101 (SAT_PLUS:SI
9102 (unspec:SI [(match_operand 3)
9103 (const_int SVE_KNOWN_PTRUE)
9104 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9105 UNSPEC_CNTP)
9106 (match_operand:SI_ONLY 1 "register_operand" "0"))))]
9107 "TARGET_SVE"
9108 {
9109 if (<CODE> == SS_PLUS)
9110 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
9111 else
9112 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
9113 }
9114 "&& !CONSTANT_P (operands[3])"
9115 {
9116 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9117 }
9118 )
9119
9120 ;; -------------------------------------------------------------------------
9121 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
9122 ;; -------------------------------------------------------------------------
9123 ;; Includes:
9124 ;; - INCP
9125 ;; - SQINCP
9126 ;; - UQINCP
9127 ;; -------------------------------------------------------------------------
9128
9129 ;; Increment a vector of DIs by the number of set bits in a predicate.
9130 ;; See aarch64_sve_cntp for a description of the operands.
9131 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9132 [(set (match_operand:VNx2DI 0 "register_operand")
9133 (ANY_PLUS:VNx2DI
9134 (vec_duplicate:VNx2DI
9135 (zero_extend:DI
9136 (unspec:SI
9137 [(match_dup 3)
9138 (const_int SVE_KNOWN_PTRUE)
9139 (match_operand:<VPRED> 2 "register_operand")]
9140 UNSPEC_CNTP)))
9141 (match_operand:VNx2DI_ONLY 1 "register_operand")))]
9142 "TARGET_SVE"
9143 {
9144 operands[3] = CONSTM1_RTX (<VPRED>mode);
9145 }
9146 )
9147
9148 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9149 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9150 (ANY_PLUS:VNx2DI
9151 (vec_duplicate:VNx2DI
9152 (zero_extend:DI
9153 (unspec:SI
9154 [(match_operand 3)
9155 (const_int SVE_KNOWN_PTRUE)
9156 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9157 UNSPEC_CNTP)))
9158 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
9159 "TARGET_SVE"
9160 "@
9161 <inc_dec>p\t%0.d, %2
9162 movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
9163 "&& !CONSTANT_P (operands[3])"
9164 {
9165 operands[3] = CONSTM1_RTX (<VPRED>mode);
9166 }
9167 [(set_attr "movprfx" "*,yes")]
9168 )
9169
9170 ;; Increment a vector of SIs by the number of set bits in a predicate.
9171 ;; See aarch64_sve_cntp for a description of the operands.
9172 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9173 [(set (match_operand:VNx4SI 0 "register_operand")
9174 (ANY_PLUS:VNx4SI
9175 (vec_duplicate:VNx4SI
9176 (unspec:SI
9177 [(match_dup 3)
9178 (const_int SVE_KNOWN_PTRUE)
9179 (match_operand:<VPRED> 2 "register_operand")]
9180 UNSPEC_CNTP))
9181 (match_operand:VNx4SI_ONLY 1 "register_operand")))]
9182 "TARGET_SVE"
9183 {
9184 operands[3] = CONSTM1_RTX (<VPRED>mode);
9185 }
9186 )
9187
9188 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9189 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9190 (ANY_PLUS:VNx4SI
9191 (vec_duplicate:VNx4SI
9192 (unspec:SI
9193 [(match_operand 3)
9194 (const_int SVE_KNOWN_PTRUE)
9195 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9196 UNSPEC_CNTP))
9197 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
9198 "TARGET_SVE"
9199 "@
9200 <inc_dec>p\t%0.s, %2
9201 movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
9202 "&& !CONSTANT_P (operands[3])"
9203 {
9204 operands[3] = CONSTM1_RTX (<VPRED>mode);
9205 }
9206 [(set_attr "movprfx" "*,yes")]
9207 )
9208
9209 ;; Increment a vector of HIs by the number of set bits in a predicate.
9210 ;; See aarch64_sve_cntp for a description of the operands.
9211 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9212 [(set (match_operand:VNx8HI 0 "register_operand")
9213 (ANY_PLUS:VNx8HI
9214 (vec_duplicate:VNx8HI
9215 (truncate:HI
9216 (unspec:SI
9217 [(match_dup 3)
9218 (const_int SVE_KNOWN_PTRUE)
9219 (match_operand:<VPRED> 2 "register_operand")]
9220 UNSPEC_CNTP)))
9221 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
9222 "TARGET_SVE"
9223 {
9224 operands[3] = CONSTM1_RTX (<VPRED>mode);
9225 }
9226 )
9227
9228 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9229 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
9230 (ANY_PLUS:VNx8HI
9231 (vec_duplicate:VNx8HI
9232 (match_operator:HI 3 "subreg_lowpart_operator"
9233 [(unspec:SI
9234 [(match_operand 4)
9235 (const_int SVE_KNOWN_PTRUE)
9236 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9237 UNSPEC_CNTP)]))
9238 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
9239 "TARGET_SVE"
9240 "@
9241 <inc_dec>p\t%0.h, %2
9242 movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
9243 "&& !CONSTANT_P (operands[4])"
9244 {
9245 operands[4] = CONSTM1_RTX (<VPRED>mode);
9246 }
9247 [(set_attr "movprfx" "*,yes")]
9248 )
9249
9250 ;; -------------------------------------------------------------------------
9251 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
9252 ;; -------------------------------------------------------------------------
9253 ;; Includes:
9254 ;; - DECP
9255 ;; - SQDECP
9256 ;; - UQDECP
9257 ;; -------------------------------------------------------------------------
9258
9259 ;; Decrement a DImode register by the number of set bits in a predicate.
9260 ;; See aarch64_sve_cntp for a description of the operands.
9261 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
9262 [(set (match_operand:DI 0 "register_operand")
9263 (ANY_MINUS:DI
9264 (match_operand:DI_ONLY 1 "register_operand")
9265 (zero_extend:DI
9266 (unspec:SI [(match_dup 3)
9267 (const_int SVE_KNOWN_PTRUE)
9268 (match_operand:PRED_ALL 2 "register_operand")]
9269 UNSPEC_CNTP))))]
9270 "TARGET_SVE"
9271 {
9272 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9273 }
9274 )
9275
9276 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
9277 [(set (match_operand:DI 0 "register_operand" "=r")
9278 (ANY_MINUS:DI
9279 (match_operand:DI_ONLY 1 "register_operand" "0")
9280 (zero_extend:DI
9281 (unspec:SI [(match_operand 3)
9282 (const_int SVE_KNOWN_PTRUE)
9283 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9284 UNSPEC_CNTP))))]
9285 "TARGET_SVE"
9286 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
9287 "&& !CONSTANT_P (operands[3])"
9288 {
9289 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9290 }
9291 )
9292
9293 ;; Decrement an SImode register by the number of set bits in a predicate
9294 ;; using modular arithmetic. See aarch64_sve_cntp for a description of the
9295 ;; operands.
9296 (define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
9297 [(set (match_operand:SI 0 "register_operand" "=r")
9298 (minus:SI
9299 (match_operand:SI 1 "register_operand" "0")
9300 (unspec:SI [(match_operand 3)
9301 (const_int SVE_KNOWN_PTRUE)
9302 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9303 UNSPEC_CNTP)))]
9304 "TARGET_SVE"
9305 "decp\t%x0, %2.<Vetype>"
9306 "&& !CONSTANT_P (operands[3])"
9307 {
9308 operands[3] = CONSTM1_RTX (<MODE>mode);
9309 }
9310 )
9311
9312 ;; Decrement an SImode register by the number of set bits in a predicate
9313 ;; using saturating arithmetic, extending the result to 64 bits.
9314 ;;
9315 ;; See aarch64_sve_cntp for a description of the operands.
9316 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9317 [(set (match_operand:DI 0 "register_operand")
9318 (<paired_extend>:DI
9319 (SAT_MINUS:SI
9320 (match_operand:SI_ONLY 1 "register_operand")
9321 (unspec:SI [(match_dup 3)
9322 (const_int SVE_KNOWN_PTRUE)
9323 (match_operand:PRED_ALL 2 "register_operand")]
9324 UNSPEC_CNTP))))]
9325 "TARGET_SVE"
9326 {
9327 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9328 }
9329 )
9330
9331 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9332 [(set (match_operand:DI 0 "register_operand" "=r")
9333 (<paired_extend>:DI
9334 (SAT_MINUS:SI
9335 (match_operand:SI_ONLY 1 "register_operand" "0")
9336 (unspec:SI [(match_operand 3)
9337 (const_int SVE_KNOWN_PTRUE)
9338 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9339 UNSPEC_CNTP))))]
9340 "TARGET_SVE"
9341 {
9342 if (<CODE> == SS_MINUS)
9343 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
9344 else
9345 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
9346 }
9347 "&& !CONSTANT_P (operands[3])"
9348 {
9349 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9350 }
9351 )
9352
9353 ;; -------------------------------------------------------------------------
9354 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
9355 ;; -------------------------------------------------------------------------
9356 ;; Includes:
9357 ;; - DECP
9358 ;; - SQDECP
9359 ;; - UQDECP
9360 ;; -------------------------------------------------------------------------
9361
9362 ;; Decrement a vector of DIs by the number of set bits in a predicate.
9363 ;; See aarch64_sve_cntp for a description of the operands.
9364 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9365 [(set (match_operand:VNx2DI 0 "register_operand")
9366 (ANY_MINUS:VNx2DI
9367 (match_operand:VNx2DI_ONLY 1 "register_operand")
9368 (vec_duplicate:VNx2DI
9369 (zero_extend:DI
9370 (unspec:SI
9371 [(match_dup 3)
9372 (const_int SVE_KNOWN_PTRUE)
9373 (match_operand:<VPRED> 2 "register_operand")]
9374 UNSPEC_CNTP)))))]
9375 "TARGET_SVE"
9376 {
9377 operands[3] = CONSTM1_RTX (<VPRED>mode);
9378 }
9379 )
9380
9381 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9382 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9383 (ANY_MINUS:VNx2DI
9384 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
9385 (vec_duplicate:VNx2DI
9386 (zero_extend:DI
9387 (unspec:SI
9388 [(match_operand 3)
9389 (const_int SVE_KNOWN_PTRUE)
9390 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9391 UNSPEC_CNTP)))))]
9392 "TARGET_SVE"
9393 "@
9394 <inc_dec>p\t%0.d, %2
9395 movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
9396 "&& !CONSTANT_P (operands[3])"
9397 {
9398 operands[3] = CONSTM1_RTX (<VPRED>mode);
9399 }
9400 [(set_attr "movprfx" "*,yes")]
9401 )
9402
9403 ;; Decrement a vector of SIs by the number of set bits in a predicate.
9404 ;; See aarch64_sve_cntp for a description of the operands.
9405 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9406 [(set (match_operand:VNx4SI 0 "register_operand")
9407 (ANY_MINUS:VNx4SI
9408 (match_operand:VNx4SI_ONLY 1 "register_operand")
9409 (vec_duplicate:VNx4SI
9410 (unspec:SI
9411 [(match_dup 3)
9412 (const_int SVE_KNOWN_PTRUE)
9413 (match_operand:<VPRED> 2 "register_operand")]
9414 UNSPEC_CNTP))))]
9415 "TARGET_SVE"
9416 {
9417 operands[3] = CONSTM1_RTX (<VPRED>mode);
9418 }
9419 )
9420
9421 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9422 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9423 (ANY_MINUS:VNx4SI
9424 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
9425 (vec_duplicate:VNx4SI
9426 (unspec:SI
9427 [(match_operand 3)
9428 (const_int SVE_KNOWN_PTRUE)
9429 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9430 UNSPEC_CNTP))))]
9431 "TARGET_SVE"
9432 "@
9433 <inc_dec>p\t%0.s, %2
9434 movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
9435 "&& !CONSTANT_P (operands[3])"
9436 {
9437 operands[3] = CONSTM1_RTX (<VPRED>mode);
9438 }
9439 [(set_attr "movprfx" "*,yes")]
9440 )
9441
9442 ;; Decrement a vector of HIs by the number of set bits in a predicate.
9443 ;; See aarch64_sve_cntp for a description of the operands.
9444 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9445 [(set (match_operand:VNx8HI 0 "register_operand")
9446 (ANY_MINUS:VNx8HI
9447 (match_operand:VNx8HI_ONLY 1 "register_operand")
9448 (vec_duplicate:VNx8HI
9449 (truncate:HI
9450 (unspec:SI
9451 [(match_dup 3)
9452 (const_int SVE_KNOWN_PTRUE)
9453 (match_operand:<VPRED> 2 "register_operand")]
9454 UNSPEC_CNTP)))))]
9455 "TARGET_SVE"
9456 {
9457 operands[3] = CONSTM1_RTX (<VPRED>mode);
9458 }
9459 )
9460
9461 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9462 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
9463 (ANY_MINUS:VNx8HI
9464 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
9465 (vec_duplicate:VNx8HI
9466 (match_operator:HI 3 "subreg_lowpart_operator"
9467 [(unspec:SI
9468 [(match_operand 4)
9469 (const_int SVE_KNOWN_PTRUE)
9470 (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9471 UNSPEC_CNTP)]))))]
9472 "TARGET_SVE"
9473 "@
9474 <inc_dec>p\t%0.h, %2
9475 movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
9476 "&& !CONSTANT_P (operands[4])"
9477 {
9478 operands[4] = CONSTM1_RTX (<VPRED>mode);
9479 }
9480 [(set_attr "movprfx" "*,yes")]
9481 )