gcc/config/aarch64/aarch64-sve.md

   1 ;; Machine description for AArch64 SVE.
   2 ;; Copyright (C) 2009-2020 Free Software Foundation, Inc.
   3 ;; Contributed by ARM Ltd.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21 ;; The file is organised into the following sections (search for the full
  22 ;; line):
  23 ;;
  24 ;; == General notes
  25 ;; ---- Note on the handling of big-endian SVE
  26 ;; ---- Description of UNSPEC_PTEST
  27 ;; ---- Description of UNSPEC_PRED_Z
  28 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
  29 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
  30 ;; ---- Note on FFR handling
  31 ;;
  32 ;; == Moves
  33 ;; ---- Moves of single vectors
  34 ;; ---- Moves of multiple vectors
  35 ;; ---- Moves of predicates
  36 ;; ---- Moves relating to the FFR
  37 ;;
  38 ;; == Loads
  39 ;; ---- Normal contiguous loads
  40 ;; ---- Extending contiguous loads
  41 ;; ---- First-faulting contiguous loads
  42 ;; ---- First-faulting extending contiguous loads
  43 ;; ---- Non-temporal contiguous loads
  44 ;; ---- Normal gather loads
  45 ;; ---- Extending gather loads
  46 ;; ---- First-faulting gather loads
  47 ;; ---- First-faulting extending gather loads
  48 ;;
  49 ;; == Prefetches
  50 ;; ---- Contiguous prefetches
  51 ;; ---- Gather prefetches
  52 ;;
  53 ;; == Stores
  54 ;; ---- Normal contiguous stores
  55 ;; ---- Truncating contiguous stores
  56 ;; ---- Non-temporal contiguous stores
  57 ;; ---- Normal scatter stores
  58 ;; ---- Truncating scatter stores
  59 ;;
  60 ;; == Vector creation
  61 ;; ---- [INT,FP] Duplicate element
  62 ;; ---- [INT,FP] Initialize from individual elements
  63 ;; ---- [INT] Linear series
  64 ;; ---- [PRED] Duplicate element
  65 ;;
  66 ;; == Vector decomposition
  67 ;; ---- [INT,FP] Extract index
  68 ;; ---- [INT,FP] Extract active element
  69 ;; ---- [PRED] Extract index
  70 ;;
  71 ;; == Unary arithmetic
  72 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
  73 ;; ---- [INT] General unary arithmetic corresponding to unspecs
  74 ;; ---- [INT] Sign and zero extension
  75 ;; ---- [INT] Truncation
  76 ;; ---- [INT] Logical inverse
  77 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
  78 ;; ---- [FP] General unary arithmetic corresponding to unspecs
  79 ;; ---- [FP] Square root
  80 ;; ---- [FP] Reciprocal square root
  81 ;; ---- [PRED] Inverse
  82
  83 ;; == Binary arithmetic
  84 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
  85 ;; ---- [INT] Addition
  86 ;; ---- [INT] Subtraction
  87 ;; ---- [INT] Take address
  88 ;; ---- [INT] Absolute difference
  89 ;; ---- [INT] Saturating addition and subtraction
  90 ;; ---- [INT] Highpart multiplication
  91 ;; ---- [INT] Division
  92 ;; ---- [INT] Binary logical operations
  93 ;; ---- [INT] Binary logical operations (inverted second input)
  94 ;; ---- [INT] Shifts (rounding towards -Inf)
  95 ;; ---- [INT] Shifts (rounding towards 0)
  96 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
  97 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
  98 ;; ---- [FP] General binary arithmetic corresponding to unspecs
  99 ;; ---- [FP] Addition
 100 ;; ---- [FP] Complex addition
 101 ;; ---- [FP] Subtraction
 102 ;; ---- [FP] Absolute difference
 103 ;; ---- [FP] Multiplication
 104 ;; ---- [FP] Division
 105 ;; ---- [FP] Binary logical operations
 106 ;; ---- [FP] Sign copying
 107 ;; ---- [FP] Maximum and minimum
 108 ;; ---- [PRED] Binary logical operations
 109 ;; ---- [PRED] Binary logical operations (inverted second input)
 110 ;; ---- [PRED] Binary logical operations (inverted result)
 111 ;;
 112 ;; == Ternary arithmetic
 113 ;; ---- [INT] MLA and MAD
 114 ;; ---- [INT] MLS and MSB
 115 ;; ---- [INT] Dot product
 116 ;; ---- [INT] Sum of absolute differences
 117 ;; ---- [INT] Matrix multiply-accumulate
 118 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
 119 ;; ---- [FP] Complex multiply-add
 120 ;; ---- [FP] Trigonometric multiply-add
 121 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
 122 ;; ---- [FP] Matrix multiply-accumulate
 123 ;;
 124 ;; == Comparisons and selects
 125 ;; ---- [INT,FP] Select based on predicates
 126 ;; ---- [INT,FP] Compare and select
 127 ;; ---- [INT] Comparisons
 128 ;; ---- [INT] While tests
 129 ;; ---- [FP] Direct comparisons
 130 ;; ---- [FP] Absolute comparisons
 131 ;; ---- [PRED] Select
 132 ;; ---- [PRED] Test bits
 133 ;;
 134 ;; == Reductions
 135 ;; ---- [INT,FP] Conditional reductions
 136 ;; ---- [INT] Tree reductions
 137 ;; ---- [FP] Tree reductions
 138 ;; ---- [FP] Left-to-right reductions
 139 ;;
 140 ;; == Permutes
 141 ;; ---- [INT,FP] General permutes
 142 ;; ---- [INT,FP] Special-purpose unary permutes
 143 ;; ---- [INT,FP] Special-purpose binary permutes
 144 ;; ---- [PRED] Special-purpose unary permutes
 145 ;; ---- [PRED] Special-purpose binary permutes
 146 ;;
 147 ;; == Conversions
 148 ;; ---- [INT<-INT] Packs
 149 ;; ---- [INT<-INT] Unpacks
 150 ;; ---- [INT<-FP] Conversions
 151 ;; ---- [INT<-FP] Packs
 152 ;; ---- [INT<-FP] Unpacks
 153 ;; ---- [FP<-INT] Conversions
 154 ;; ---- [FP<-INT] Packs
 155 ;; ---- [FP<-INT] Unpacks
 156 ;; ---- [FP<-FP] Packs
 157 ;; ---- [FP<-FP] Packs (bfloat16)
 158 ;; ---- [FP<-FP] Unpacks
 159 ;; ---- [PRED<-PRED] Packs
 160 ;; ---- [PRED<-PRED] Unpacks
 161 ;;
 162 ;; == Vector partitioning
 163 ;; ---- [PRED] Unary partitioning
 164 ;; ---- [PRED] Binary partitioning
 165 ;; ---- [PRED] Scalarization
 166 ;;
 167 ;; == Counting elements
 168 ;; ---- [INT] Count elements in a pattern (scalar)
 169 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
 170 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
 171 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
 172 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
 173 ;; ---- [INT] Count elements in a predicate (scalar)
 174 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
 175 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
 176 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
 177 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
 178
 179 ;; =========================================================================
 180 ;; == General notes
 181 ;; =========================================================================
 182 ;;
 183 ;; -------------------------------------------------------------------------
 184 ;; ---- Note on the handling of big-endian SVE
 185 ;; -------------------------------------------------------------------------
 186 ;;
 187 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
 188 ;; same way as movdi or movti would: the first byte of memory goes
 189 ;; into the most significant byte of the register and the last byte
 190 ;; of memory goes into the least significant byte of the register.
 191 ;; This is the most natural ordering for Advanced SIMD and matches
 192 ;; the ABI layout for 64-bit and 128-bit vector types.
 193 ;;
 194 ;; As a result, the order of bytes within the register is what GCC
 195 ;; expects for a big-endian target, and subreg offsets therefore work
 196 ;; as expected, with the first element in memory having subreg offset 0
 197 ;; and the last element in memory having the subreg offset associated
 198 ;; with a big-endian lowpart.  However, this ordering also means that
 199 ;; GCC's lane numbering does not match the architecture's numbering:
 200 ;; GCC always treats the element at the lowest address in memory
 201 ;; (subreg offset 0) as element 0, while the architecture treats
 202 ;; the least significant end of the register as element 0.
 203 ;;
 204 ;; The situation for SVE is different.  We want the layout of the
 205 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
 206 ;; logically, a mov<mode> load must be indistinguishable from a
 207 ;; maskload<mode> whose mask is all true.  We therefore need the
 208 ;; register layout to match LD1 rather than LDR.  The ABI layout of
 209 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
 210 ;;
 211 ;; As a result, the architecture lane numbering matches GCC's lane
 212 ;; numbering, with element 0 always being the first in memory.
 213 ;; However:
 214 ;;
 215 ;; - Applying a subreg offset to a register does not give the element
 216 ;;   that GCC expects: the first element in memory has the subreg offset
 217 ;;   associated with a big-endian lowpart while the last element in memory
 218 ;;   has subreg offset 0.  We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
 219 ;;
 220 ;; - We cannot use LDR and STR for spill slots that might be accessed
 221 ;;   via subregs, since although the elements have the order GCC expects,
 222 ;;   the order of the bytes within the elements is different.  We instead
 223 ;;   access spill slots via LD1 and ST1, using secondary reloads to
 224 ;;   reserve a predicate register.
 225 ;;
 226 ;; -------------------------------------------------------------------------
 227 ;; ---- Description of UNSPEC_PTEST
 228 ;; -------------------------------------------------------------------------
 229 ;;
 230 ;; SVE provides a PTEST instruction for testing the active lanes of a
 231 ;; predicate and setting the flags based on the result.  The associated
 232 ;; condition code tests are:
 233 ;;
 234 ;; - any   (= ne): at least one active bit is set
 235 ;; - none  (= eq): all active bits are clear (*)
 236 ;; - first (= mi): the first active bit is set
 237 ;; - nfrst (= pl): the first active bit is clear (*)
 238 ;; - last  (= cc): the last active bit is set
 239 ;; - nlast (= cs): the last active bit is clear (*)
 240 ;;
 241 ;; where the conditions marked (*) are also true when there are no active
 242 ;; lanes (i.e. when the governing predicate is a PFALSE).  The flags results
 243 ;; of a PTEST use the condition code mode CC_NZC.
 244 ;;
 245 ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
 246 ;; This means that for other predicate modes, we need a governing predicate
 247 ;; in which all bits are defined.
 248 ;;
 249 ;; For example, most predicated .H operations ignore the odd bits of the
 250 ;; governing predicate, so that an active lane is represented by the
 251 ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
 252 ;; any value.  To test a .H predicate, we instead need "10" and "00"
 253 ;; respectively, so that the condition only tests the even bits of the
 254 ;; predicate.
 255 ;;
 256 ;; Several instructions set the flags as a side-effect, in the same way
 257 ;; that a separate PTEST would.  It's important for code quality that we
 258 ;; use these flags results as often as possible, particularly in the case
 259 ;; of WHILE* and RDFFR.
 260 ;;
 261 ;; Also, some of the instructions that set the flags are unpredicated
 262 ;; and instead implicitly test all .B, .H, .S or .D elements, as though
 263 ;; they were predicated on a PTRUE of that size.  For example, a .S
 264 ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
 265 ;; would.
 266 ;;
 267 ;; We therefore need to represent PTEST operations in a way that
 268 ;; makes it easy to combine them with both predicated and unpredicated
 269 ;; operations, while using a VNx16BI governing predicate for all
 270 ;; predicate modes.  We do this using:
 271 ;;
 272 ;;   (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
 273 ;;
 274 ;; where:
 275 ;;
 276 ;; - GP is the real VNx16BI governing predicate
 277 ;;
 278 ;; - CAST_GP is GP cast to the mode of OP.  All bits dropped by casting
 279 ;;   GP to CAST_GP are guaranteed to be clear in GP.
 280 ;;
 281 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
 282 ;;   SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
 283 ;;   SVE_MAYBE_NOT_PTRUE otherwise.
 284 ;;
 285 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
 286 ;;
 287 ;; -------------------------------------------------------------------------
 288 ;; ---- Description of UNSPEC_PRED_Z
 289 ;; -------------------------------------------------------------------------
 290 ;;
 291 ;; SVE integer comparisons are predicated and return zero for inactive
 292 ;; lanes.  Sometimes we use them with predicates that are all-true and
 293 ;; sometimes we use them with general predicates.
 294 ;;
 295 ;; The integer comparisons also set the flags and so build-in the effect
 296 ;; of a PTEST.  We therefore want to be able to combine integer comparison
 297 ;; patterns with PTESTs of the result.  One difficulty with doing this is
 298 ;; that (as noted above) the PTEST is always a .B operation and so can place
 299 ;; stronger requirements on the governing predicate than the comparison does.
 300 ;;
 301 ;; For example, when applying a separate PTEST to the result of a full-vector
 302 ;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
 303 ;; .B PTRUE.  In constrast, the comparison might be predicated on either
 304 ;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
 305 ;; bits don't matter for .H operations.
 306 ;;
 307 ;; We therefore can't rely on a full-vector comparison using the same
 308 ;; predicate register as a following PTEST.  We instead need to remember
 309 ;; whether a comparison is known to be a full-vector comparison and use
 310 ;; this information in addition to a check for equal predicate registers.
 311 ;; At the same time, it's useful to have a common representation for all
 312 ;; integer comparisons, so that they can be handled by a single set of
 313 ;; patterns.
 314 ;;
 315 ;; We therefore take a similar approach to UNSPEC_PTEST above and use:
 316 ;;
 317 ;;   (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
 318 ;;
 319 ;; where:
 320 ;;
 321 ;; - GP is the governing predicate, of mode <M:VPRED>
 322 ;;
 323 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
 324 ;;   SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
 325 ;;   otherwise
 326 ;;
 327 ;; - CODE is the comparison code
 328 ;;
 329 ;; - OP0 and OP1 are the values being compared, of mode M
 330 ;;
 331 ;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
 332 ;;
 333 ;; -------------------------------------------------------------------------
 334 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
 335 ;; -------------------------------------------------------------------------
 336 ;;
 337 ;; Many SVE integer operations are predicated.  We can generate them
 338 ;; from four sources:
 339 ;;
 340 ;; (1) Using normal unpredicated optabs.  In this case we need to create
 341 ;;     an all-true predicate register to act as the governing predicate
 342 ;;     for the SVE instruction.  There are no inactive lanes, and thus
 343 ;;     the values of inactive lanes don't matter.
 344 ;;
 345 ;; (2) Using _x ACLE functions.  In this case the function provides a
 346 ;;     specific predicate and some lanes might be inactive.  However,
 347 ;;     as for (1), the values of the inactive lanes don't matter.
 348 ;;     We can make extra lanes active without changing the behavior
 349 ;;     (although for code-quality reasons we should avoid doing so
 350 ;;     needlessly).
 351 ;;
 352 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
 353 ;;     These optabs have a predicate operand that specifies which lanes are
 354 ;;     active and another operand that provides the values of inactive lanes.
 355 ;;
 356 ;; (4) Using _m and _z ACLE functions.  These functions map to the same
 357 ;;     patterns as (3), with the _z functions setting inactive lanes to zero
 358 ;;     and the _m functions setting the inactive lanes to one of the function
 359 ;;     arguments.
 360 ;;
 361 ;; For (1) and (2) we need a way of attaching the predicate to a normal
 362 ;; unpredicated integer operation.  We do this using:
 363 ;;
 364 ;;   (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
 365 ;;
 366 ;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
 367 ;; is a predicate of mode <M:VPRED>.  PRED might or might not be a PTRUE;
 368 ;; it always is for (1), but might not be for (2).
 369 ;;
 370 ;; The unspec as a whole has the same value as (code:M ...) when PRED is
 371 ;; all-true.  It is always semantically valid to replace PRED with a PTRUE,
 372 ;; but as noted above, we should only do so if there's a specific benefit.
 373 ;;
 374 ;; (The "_X" in the unspec is named after the ACLE functions in (2).)
 375 ;;
 376 ;; For (3) and (4) we can simply use the SVE port's normal representation
 377 ;; of a predicate-based select:
 378 ;;
 379 ;;   (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
 380 ;;
 381 ;; where INACTIVE specifies the values of inactive lanes.
 382 ;;
 383 ;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
 384 ;; than inserting the integer operation directly.  This is mostly useful
 385 ;; if we want the combine pass to merge an integer operation with an explicit
 386 ;; vcond_mask (in other words, with a following SEL instruction).  However,
 387 ;; it's generally better to merge such operations at the gimple level
 388 ;; using (3).
 389 ;;
 390 ;; -------------------------------------------------------------------------
 391 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
 392 ;; -------------------------------------------------------------------------
 393 ;;
 394 ;; Most SVE floating-point operations are predicated.  We can generate
 395 ;; them from four sources:
 396 ;;
 397 ;; (1) Using normal unpredicated optabs.  In this case we need to create
 398 ;;     an all-true predicate register to act as the governing predicate
 399 ;;     for the SVE instruction.  There are no inactive lanes, and thus
 400 ;;     the values of inactive lanes don't matter.
 401 ;;
 402 ;; (2) Using _x ACLE functions.  In this case the function provides a
 403 ;;     specific predicate and some lanes might be inactive.  However,
 404 ;;     as for (1), the values of the inactive lanes don't matter.
 405 ;;
 406 ;;     The instruction must have the same exception behavior as the
 407 ;;     function call unless things like command-line flags specifically
 408 ;;     allow otherwise.  For example, with -ffast-math, it is OK to
 409 ;;     raise exceptions for inactive lanes, but normally it isn't.
 410 ;;
 411 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
 412 ;;     These optabs have a predicate operand that specifies which lanes are
 413 ;;     active and another operand that provides the values of inactive lanes.
 414 ;;
 415 ;; (4) Using _m and _z ACLE functions.  These functions map to the same
 416 ;;     patterns as (3), with the _z functions setting inactive lanes to zero
 417 ;;     and the _m functions setting the inactive lanes to one of the function
 418 ;;     arguments.
 419 ;;
 420 ;; So:
 421 ;;
 422 ;; - In (1), the predicate is known to be all true and the pattern can use
 423 ;;   unpredicated operations where available.
 424 ;;
 425 ;; - In (2), the predicate might or might not be all true.  The pattern can
 426 ;;   use unpredicated instructions if the predicate is all-true or if things
 427 ;;   like command-line flags allow exceptions for inactive lanes.
 428 ;;
 429 ;; - (3) and (4) represent a native SVE predicated operation.  Some lanes
 430 ;;   might be inactive and inactive lanes of the result must have specific
 431 ;;   values.  There is no scope for using unpredicated instructions (and no
 432 ;;   reason to want to), so the question about command-line flags doesn't
 433 ;;   arise.
 434 ;;
 435 ;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
 436 ;; in combination with a separate predicate operand, e.g.
 437 ;;
 438 ;;   (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
 439 ;;            (sqrt:SVE_FULL_F 2 "register_operand" "w")]
 440 ;;           ....)
 441 ;;
 442 ;; because (sqrt ...) can raise an exception for any lane, including
 443 ;; inactive ones.  We therefore need to use an unspec instead.
 444 ;;
 445 ;; Also, (2) requires some way of distinguishing the case in which the
 446 ;; predicate might have inactive lanes and cannot be changed from the
 447 ;; case in which the predicate has no inactive lanes or can be changed.
 448 ;; This information is also useful when matching combined FP patterns
 449 ;; in which the predicates might not be equal.
 450 ;;
 451 ;; We therefore model FP operations as an unspec of the form:
 452 ;;
 453 ;;   (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
 454 ;;
 455 ;; where:
 456 ;;
 457 ;; - PRED is the governing predicate.
 458 ;;
 459 ;; - STRICTNESS is a CONST_INT that conceptually has mode SI.  It has the
 460 ;;   value SVE_STRICT_GP if PRED might have inactive lanes and if those
 461 ;;   lanes must remain inactive.  It has the value SVE_RELAXED_GP otherwise.
 462 ;;
 463 ;; - OP0 OP1 ... are the normal input operands to the operation.
 464 ;;
 465 ;; - MNEMONIC is the mnemonic of the associated SVE instruction.
 466 ;;
 467 ;; -------------------------------------------------------------------------
 468 ;; ---- Note on FFR handling
 469 ;; -------------------------------------------------------------------------
 470 ;;
 471 ;; Logically we want to divide FFR-related instructions into regions
 472 ;; that contain exactly one of:
 473 ;;
 474 ;; - a single write to the FFR
 475 ;; - any number of reads from the FFR (but only one read is likely)
 476 ;; - any number of LDFF1 and LDNF1 instructions
 477 ;;
 478 ;; However, LDFF1 and LDNF1 instructions should otherwise behave like
 479 ;; normal loads as far as possible.  This means that they should be
 480 ;; schedulable within a region in the same way that LD1 would be,
 481 ;; and they should be deleted as dead if the result is unused.  The loads
 482 ;; should therefore not write to the FFR, since that would both serialize
 483 ;; the loads with respect to each other and keep the loads live for any
 484 ;; later RDFFR.
 485 ;;
 486 ;; We get around this by using a fake "FFR token" (FFRT) to help describe
 487 ;; the dependencies.  Writing to the FFRT starts a new "FFRT region",
 488 ;; while using the FFRT keeps the instruction within its region.
 489 ;; Specifically:
 490 ;;
 491 ;; - Writes start a new FFRT region as well as setting the FFR:
 492 ;;
 493 ;;       W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
 494 ;;
 495 ;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
 496 ;;   loads stay within the same FFRT region:
 497 ;;
 498 ;;       L1: load data while using the FFRT
 499 ;;
 500 ;;   In addition, any FFRT region that includes a load also has at least one
 501 ;;   instance of:
 502 ;;
 503 ;;       L2: FFR = update(FFR, FFRT)  [type == no_insn]
 504 ;;
 505 ;;   to make it clear that the region both reads from and writes to the FFR.
 506 ;;
 507 ;; - Reads do the following:
 508 ;;
 509 ;;       R1: FFRT = FFR               [type == no_insn]
 510 ;;       R2: read from the FFRT
 511 ;;       R3: FFRT = update(FFRT)      [type == no_insn]
 512 ;;
 513 ;;   R1 and R3 both create new FFRT regions, so that previous LDFF1s and
 514 ;;   LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
 515 ;;   cannot move backwards across R3.
 516 ;;
 517 ;; This way, writes are only kept alive by later loads or reads,
 518 ;; and write/read pairs fold normally.  For two consecutive reads,
 519 ;; the first R3 is made dead by the second R1, which in turn becomes
 520 ;; redundant with the first R1.  We then have:
 521 ;;
 522 ;;     first R1: FFRT = FFR
 523 ;;     first read from the FFRT
 524 ;;     second read from the FFRT
 525 ;;     second R3: FFRT = update(FFRT)
 526 ;;
 527 ;; i.e. the two FFRT regions collapse into a single one with two
 528 ;; independent reads.
 529 ;;
 530 ;; The model still prevents some valid optimizations though.  For example,
 531 ;; if all loads in an FFRT region are deleted as dead, nothing would remove
 532 ;; the L2 instructions.
 533
 534 ;; =========================================================================
 535 ;; == Moves
 536 ;; =========================================================================
 537
 538 ;; -------------------------------------------------------------------------
 539 ;; ---- Moves of single vectors
 540 ;; -------------------------------------------------------------------------
 541 ;; Includes:
 542 ;; - MOV  (including aliases)
 543 ;; - LD1B (contiguous form)
 544 ;; - LD1D (    "    "     )
 545 ;; - LD1H (    "    "     )
 546 ;; - LD1W (    "    "     )
 547 ;; - LDR
 548 ;; - ST1B (contiguous form)
 549 ;; - ST1D (    "    "     )
 550 ;; - ST1H (    "    "     )
 551 ;; - ST1W (    "    "     )
 552 ;; - STR
 553 ;; -------------------------------------------------------------------------
 554
 555 (define_expand "mov<mode>"
 556   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 557         (match_operand:SVE_ALL 1 "general_operand"))]
 558   "TARGET_SVE"
 559   {
 560     /* Use the predicated load and store patterns where possible.
 561        This is required for big-endian targets (see the comment at the
 562        head of the file) and increases the addressing choices for
 563        little-endian.  */
 564     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
 565         && can_create_pseudo_p ())
 566       {
 567         aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
 568         DONE;
 569       }
 570
 571     if (CONSTANT_P (operands[1]))
 572       {
 573         aarch64_expand_mov_immediate (operands[0], operands[1]);
 574         DONE;
 575       }
 576
 577     /* Optimize subregs on big-endian targets: we can use REV[BHW]
 578        instead of going through memory.  */
 579     if (BYTES_BIG_ENDIAN
 580         && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
 581       DONE;
 582   }
 583 )
 584
 585 (define_expand "movmisalign<mode>"
 586   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
 587         (match_operand:SVE_ALL 1 "general_operand"))]
 588   "TARGET_SVE"
 589   {
 590     /* Equivalent to a normal move for our purpooses.  */
 591     emit_move_insn (operands[0], operands[1]);
 592     DONE;
 593   }
 594 )
 595
 596 ;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
 597 ;; little-endian ordering is acceptable.  Only allow memory operations during
 598 ;; and after RA; before RA we want the predicated load and store patterns to
 599 ;; be used instead.
 600 (define_insn "*aarch64_sve_mov<mode>_ldr_str"
 601   [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
 602         (match_operand:SVE_FULL 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
 603   "TARGET_SVE
 604    && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
 605    && ((lra_in_progress || reload_completed)
 606        || (register_operand (operands[0], <MODE>mode)
 607            && nonmemory_operand (operands[1], <MODE>mode)))"
 608   "@
 609    ldr\t%0, %1
 610    str\t%1, %0
 611    mov\t%0.d, %1.d
 612    * return aarch64_output_sve_mov_immediate (operands[1]);"
 613 )
 614
 615 ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
 616 ;; or vectors for which little-endian ordering isn't acceptable.  Memory
 617 ;; accesses require secondary reloads.
 618 (define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
 619   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w")
 620         (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand" "w, Dn"))]
 621   "TARGET_SVE
 622    && <MODE>mode != VNx16QImode
 623    && (BYTES_BIG_ENDIAN
 624        || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
 625   "@
 626    mov\t%0.d, %1.d
 627    * return aarch64_output_sve_mov_immediate (operands[1]);"
 628 )
 629
 630 ;; Handle memory reloads for modes that can't use LDR and STR.  We use
 631 ;; byte PTRUE for all modes to try to encourage reuse.  This pattern
 632 ;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
 633 (define_expand "aarch64_sve_reload_mem"
 634   [(parallel
 635      [(set (match_operand 0)
 636            (match_operand 1))
 637       (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
 638   "TARGET_SVE"
 639   {
 640     /* Create a PTRUE.  */
 641     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
 642
 643     /* Refer to the PTRUE in the appropriate mode for this move.  */
 644     machine_mode mode = GET_MODE (operands[0]);
 645     rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
 646
 647     /* Emit a predicated load or store.  */
 648     aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
 649     DONE;
 650   }
 651 )
 652
 653 ;; A predicated move in which the predicate is known to be all-true.
 654 ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
 655 ;; so changes to this pattern will need changes there as well.
 656 (define_insn_and_split "@aarch64_pred_mov<mode>"
 657   [(set (match_operand:SVE_ALL 0 "nonimmediate_operand" "=w, w, m")
 658         (unspec:SVE_ALL
 659           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 660            (match_operand:SVE_ALL 2 "nonimmediate_operand" "w, m, w")]
 661           UNSPEC_PRED_X))]
 662   "TARGET_SVE
 663    && (register_operand (operands[0], <MODE>mode)
 664        || register_operand (operands[2], <MODE>mode))"
 665   "@
 666    #
 667    ld1<Vesize>\t%0.<Vctype>, %1/z, %2
 668    st1<Vesize>\t%2.<Vctype>, %1, %0"
 669   "&& register_operand (operands[0], <MODE>mode)
 670    && register_operand (operands[2], <MODE>mode)"
 671   [(set (match_dup 0) (match_dup 2))]
 672 )
 673
 674 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
 675 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
 676 ;; for details.  We use a special predicate for operand 2 to reduce
 677 ;; the number of patterns.
 678 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
 679   [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
 680         (unspec:SVE_ALL
 681           [(match_operand:VNx16BI 1 "register_operand" "Upl")
 682            (match_operand 2 "aarch64_any_register_operand" "w")]
 683           UNSPEC_REV_SUBREG))]
 684   "TARGET_SVE && BYTES_BIG_ENDIAN"
 685   "#"
 686   "&& reload_completed"
 687   [(const_int 0)]
 688   {
 689     aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
 690     DONE;
 691   }
 692 )
 693
 694 ;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
 695 ;; This is equivalent to a subreg on little-endian targets but not for
 696 ;; big-endian; see the comment at the head of the file for details.
 697 (define_expand "@aarch64_sve_reinterpret<mode>"
 698   [(set (match_operand:SVE_ALL 0 "register_operand")
 699         (unspec:SVE_ALL
 700           [(match_operand 1 "aarch64_any_register_operand")]
 701           UNSPEC_REINTERPRET))]
 702   "TARGET_SVE"
 703   {
 704     machine_mode src_mode = GET_MODE (operands[1]);
 705     if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
 706       {
 707         emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
 708         DONE;
 709       }
 710   }
 711 )
 712
 713 ;; A pattern for handling type punning on big-endian targets.  We use a
 714 ;; special predicate for operand 1 to reduce the number of patterns.
 715 (define_insn_and_split "*aarch64_sve_reinterpret<mode>"
 716   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 717         (unspec:SVE_ALL
 718           [(match_operand 1 "aarch64_any_register_operand" "w")]
 719           UNSPEC_REINTERPRET))]
 720   "TARGET_SVE"
 721   "#"
 722   "&& reload_completed"
 723   [(set (match_dup 0) (match_dup 1))]
 724   {
 725     operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
 726   }
 727 )
 728
 729 ;; -------------------------------------------------------------------------
 730 ;; ---- Moves of multiple vectors
 731 ;; -------------------------------------------------------------------------
 732 ;; All patterns in this section are synthetic and split to real
 733 ;; instructions after reload.
 734 ;; -------------------------------------------------------------------------
 735
 736 (define_expand "mov<mode>"
 737   [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
 738         (match_operand:SVE_STRUCT 1 "general_operand"))]
 739   "TARGET_SVE"
 740   {
 741     /* Big-endian loads and stores need to be done via LD1 and ST1;
 742        see the comment at the head of the file for details.  */
 743     if ((MEM_P (operands[0]) || MEM_P (operands[1]))
 744         && BYTES_BIG_ENDIAN)
 745       {
 746         gcc_assert (can_create_pseudo_p ());
 747         aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
 748         DONE;
 749       }
 750
 751     if (CONSTANT_P (operands[1]))
 752       {
 753         aarch64_expand_mov_immediate (operands[0], operands[1]);
 754         DONE;
 755       }
 756   }
 757 )
 758
 759 ;; Unpredicated structure moves (little-endian).
 760 (define_insn "*aarch64_sve_mov<mode>_le"
 761   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
 762         (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
 763   "TARGET_SVE && !BYTES_BIG_ENDIAN"
 764   "#"
 765   [(set_attr "length" "<insn_length>")]
 766 )
 767
 768 ;; Unpredicated structure moves (big-endian).  Memory accesses require
 769 ;; secondary reloads.
 770 (define_insn "*aarch64_sve_mov<mode>_be"
 771   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
 772         (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
 773   "TARGET_SVE && BYTES_BIG_ENDIAN"
 774   "#"
 775   [(set_attr "length" "<insn_length>")]
 776 )
 777
 778 ;; Split unpredicated structure moves into pieces.  This is the same
 779 ;; for both big-endian and little-endian code, although it only needs
 780 ;; to handle memory operands for little-endian code.
 781 (define_split
 782   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
 783         (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
 784   "TARGET_SVE && reload_completed"
 785   [(const_int 0)]
 786   {
 787     rtx dest = operands[0];
 788     rtx src = operands[1];
 789     if (REG_P (dest) && REG_P (src))
 790       aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
 791     else
 792       for (unsigned int i = 0; i < <vector_count>; ++i)
 793         {
 794           rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
 795                                              i * BYTES_PER_SVE_VECTOR);
 796           rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
 797                                             i * BYTES_PER_SVE_VECTOR);
 798           emit_insn (gen_rtx_SET (subdest, subsrc));
 799         }
 800     DONE;
 801   }
 802 )
 803
 804 ;; Predicated structure moves.  This works for both endiannesses but in
 805 ;; practice is only useful for big-endian.
 806 (define_insn_and_split "@aarch64_pred_mov<mode>"
 807   [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
 808         (unspec:SVE_STRUCT
 809           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 810            (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
 811           UNSPEC_PRED_X))]
 812   "TARGET_SVE
 813    && (register_operand (operands[0], <MODE>mode)
 814        || register_operand (operands[2], <MODE>mode))"
 815   "#"
 816   "&& reload_completed"
 817   [(const_int 0)]
 818   {
 819     for (unsigned int i = 0; i < <vector_count>; ++i)
 820       {
 821         rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
 822                                            <MODE>mode,
 823                                            i * BYTES_PER_SVE_VECTOR);
 824         rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
 825                                           <MODE>mode,
 826                                           i * BYTES_PER_SVE_VECTOR);
 827         aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
 828       }
 829     DONE;
 830   }
 831   [(set_attr "length" "<insn_length>")]
 832 )
 833
 834 ;; -------------------------------------------------------------------------
 835 ;; ---- Moves of predicates
 836 ;; -------------------------------------------------------------------------
 837 ;; Includes:
 838 ;; - MOV
 839 ;; - LDR
 840 ;; - PFALSE
 841 ;; - PTRUE
 842 ;; - PTRUES
 843 ;; - STR
 844 ;; -------------------------------------------------------------------------
 845
 846 (define_expand "mov<mode>"
 847   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
 848         (match_operand:PRED_ALL 1 "general_operand"))]
 849   "TARGET_SVE"
 850   {
 851     if (GET_CODE (operands[0]) == MEM)
 852       operands[1] = force_reg (<MODE>mode, operands[1]);
 853
 854     if (CONSTANT_P (operands[1]))
 855       {
 856         aarch64_expand_mov_immediate (operands[0], operands[1]);
 857         DONE;
 858       }
 859   }
 860 )
 861
 862 (define_insn "*aarch64_sve_mov<mode>"
 863   [(set (match_operand:PRED_ALL 0 "nonimmediate_operand" "=Upa, m, Upa, Upa")
 864         (match_operand:PRED_ALL 1 "aarch64_mov_operand" "Upa, Upa, m, Dn"))]
 865   "TARGET_SVE
 866    && (register_operand (operands[0], <MODE>mode)
 867        || register_operand (operands[1], <MODE>mode))"
 868   "@
 869    mov\t%0.b, %1.b
 870    str\t%1, %0
 871    ldr\t%0, %1
 872    * return aarch64_output_sve_mov_immediate (operands[1]);"
 873 )
 874
 875 ;; Match PTRUES Pn.B when both the predicate and flags are useful.
 876 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
 877   [(set (reg:CC_NZC CC_REGNUM)
 878         (unspec:CC_NZC
 879           [(match_operand 2)
 880            (match_operand 3)
 881            (const_int SVE_KNOWN_PTRUE)
 882            (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 883              [(unspec:VNx16BI
 884                 [(match_operand:SI 4 "const_int_operand")
 885                  (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
 886                 UNSPEC_PTRUE)])]
 887           UNSPEC_PTEST))
 888    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 889         (match_dup 1))]
 890   "TARGET_SVE"
 891   {
 892     return aarch64_output_sve_ptrues (operands[1]);
 893   }
 894   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 895   {
 896     operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
 897   }
 898 )
 899
 900 ;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
 901 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
 902   [(set (reg:CC_NZC CC_REGNUM)
 903         (unspec:CC_NZC
 904           [(match_operand 2)
 905            (match_operand 3)
 906            (const_int SVE_KNOWN_PTRUE)
 907            (subreg:PRED_HSD
 908              (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 909                [(unspec:VNx16BI
 910                   [(match_operand:SI 4 "const_int_operand")
 911                    (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
 912                   UNSPEC_PTRUE)]) 0)]
 913           UNSPEC_PTEST))
 914    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
 915         (match_dup 1))]
 916   "TARGET_SVE"
 917   {
 918     return aarch64_output_sve_ptrues (operands[1]);
 919   }
 920   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 921   {
 922     operands[2] = CONSTM1_RTX (VNx16BImode);
 923     operands[3] = CONSTM1_RTX (<MODE>mode);
 924   }
 925 )
 926
 927 ;; Match PTRUES Pn.B when only the flags result is useful (which is
 928 ;; a way of testing VL).
 929 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
 930   [(set (reg:CC_NZC CC_REGNUM)
 931         (unspec:CC_NZC
 932           [(match_operand 2)
 933            (match_operand 3)
 934            (const_int SVE_KNOWN_PTRUE)
 935            (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 936              [(unspec:VNx16BI
 937                 [(match_operand:SI 4 "const_int_operand")
 938                  (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
 939                 UNSPEC_PTRUE)])]
 940           UNSPEC_PTEST))
 941    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
 942   "TARGET_SVE"
 943   {
 944     return aarch64_output_sve_ptrues (operands[1]);
 945   }
 946   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 947   {
 948     operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
 949   }
 950 )
 951
 952 ;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
 953 ;; a way of testing VL).
 954 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
 955   [(set (reg:CC_NZC CC_REGNUM)
 956         (unspec:CC_NZC
 957           [(match_operand 2)
 958            (match_operand 3)
 959            (const_int SVE_KNOWN_PTRUE)
 960            (subreg:PRED_HSD
 961              (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
 962                [(unspec:VNx16BI
 963                   [(match_operand:SI 4 "const_int_operand")
 964                    (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
 965                   UNSPEC_PTRUE)]) 0)]
 966           UNSPEC_PTEST))
 967    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
 968   "TARGET_SVE"
 969   {
 970     return aarch64_output_sve_ptrues (operands[1]);
 971   }
 972   "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
 973   {
 974     operands[2] = CONSTM1_RTX (VNx16BImode);
 975     operands[3] = CONSTM1_RTX (<MODE>mode);
 976   }
 977 )
 978
 979 ;; -------------------------------------------------------------------------
 980 ;; ---- Moves relating to the FFR
 981 ;; -------------------------------------------------------------------------
 982 ;; RDFFR
 983 ;; RDFFRS
 984 ;; SETFFR
 985 ;; WRFFR
 986 ;; -------------------------------------------------------------------------
 987
 988 ;; [W1 in the block comment above about FFR handling]
 989 ;;
 990 ;; Write to the FFR and start a new FFRT scheduling region.
 991 (define_insn "aarch64_wrffr"
 992   [(set (reg:VNx16BI FFR_REGNUM)
 993         (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one" "Dm, Upa"))
 994    (set (reg:VNx16BI FFRT_REGNUM)
 995         (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
 996   "TARGET_SVE"
 997   "@
 998    setffr
 999    wrffr\t%0.b"
1000 )
1001
1002 ;; [L2 in the block comment above about FFR handling]
1003 ;;
1004 ;; Introduce a read from and write to the FFR in the current FFRT region,
1005 ;; so that the FFR value is live on entry to the region and so that the FFR
1006 ;; value visibly changes within the region.  This is used (possibly multiple
1007 ;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
1008 (define_insn "aarch64_update_ffr_for_load"
1009   [(set (reg:VNx16BI FFR_REGNUM)
1010         (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
1011                          (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
1012   "TARGET_SVE"
1013   ""
1014   [(set_attr "type" "no_insn")]
1015 )
1016
1017 ;; [R1 in the block comment above about FFR handling]
1018 ;;
1019 ;; Notionally copy the FFR to the FFRT, so that the current FFR value
1020 ;; can be read from there by the RDFFR instructions below.  This acts
1021 ;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
1022 ;; creates a natural dependency with earlier writes.
1023 (define_insn "aarch64_copy_ffr_to_ffrt"
1024   [(set (reg:VNx16BI FFRT_REGNUM)
1025         (reg:VNx16BI FFR_REGNUM))]
1026   "TARGET_SVE"
1027   ""
1028   [(set_attr "type" "no_insn")]
1029 )
1030
1031 ;; [R2 in the block comment above about FFR handling]
1032 ;;
1033 ;; Read the FFR via the FFRT.
1034 (define_insn "aarch64_rdffr"
1035   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1036         (reg:VNx16BI FFRT_REGNUM))]
1037   "TARGET_SVE"
1038   "rdffr\t%0.b"
1039 )
1040
1041 ;; Likewise with zero predication.
1042 (define_insn "aarch64_rdffr_z"
1043   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1044         (and:VNx16BI
1045           (reg:VNx16BI FFRT_REGNUM)
1046           (match_operand:VNx16BI 1 "register_operand" "Upa")))]
1047   "TARGET_SVE"
1048   "rdffr\t%0.b, %1/z"
1049 )
1050
1051 ;; Read the FFR to test for a fault, without using the predicate result.
1052 (define_insn "*aarch64_rdffr_z_ptest"
1053   [(set (reg:CC_NZC CC_REGNUM)
1054         (unspec:CC_NZC
1055           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1056            (match_dup 1)
1057            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1058            (and:VNx16BI
1059              (reg:VNx16BI FFRT_REGNUM)
1060              (match_dup 1))]
1061           UNSPEC_PTEST))
1062    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1063   "TARGET_SVE"
1064   "rdffrs\t%0.b, %1/z"
1065 )
1066
1067 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1068 (define_insn "*aarch64_rdffr_ptest"
1069   [(set (reg:CC_NZC CC_REGNUM)
1070         (unspec:CC_NZC
1071           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1072            (match_dup 1)
1073            (const_int SVE_KNOWN_PTRUE)
1074            (reg:VNx16BI FFRT_REGNUM)]
1075           UNSPEC_PTEST))
1076    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1077   "TARGET_SVE"
1078   "rdffrs\t%0.b, %1/z"
1079 )
1080
1081 ;; Read the FFR with zero predication and test the result.
1082 (define_insn "*aarch64_rdffr_z_cc"
1083   [(set (reg:CC_NZC CC_REGNUM)
1084         (unspec:CC_NZC
1085           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1086            (match_dup 1)
1087            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1088            (and:VNx16BI
1089              (reg:VNx16BI FFRT_REGNUM)
1090              (match_dup 1))]
1091           UNSPEC_PTEST))
1092    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1093         (and:VNx16BI
1094           (reg:VNx16BI FFRT_REGNUM)
1095           (match_dup 1)))]
1096   "TARGET_SVE"
1097   "rdffrs\t%0.b, %1/z"
1098 )
1099
1100 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1101 (define_insn "*aarch64_rdffr_cc"
1102   [(set (reg:CC_NZC CC_REGNUM)
1103         (unspec:CC_NZC
1104           [(match_operand:VNx16BI 1 "register_operand" "Upa")
1105            (match_dup 1)
1106            (const_int SVE_KNOWN_PTRUE)
1107            (reg:VNx16BI FFRT_REGNUM)]
1108           UNSPEC_PTEST))
1109    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1110         (reg:VNx16BI FFRT_REGNUM))]
1111   "TARGET_SVE"
1112   "rdffrs\t%0.b, %1/z"
1113 )
1114
1115 ;; [R3 in the block comment above about FFR handling]
1116 ;;
1117 ;; Arbitrarily update the FFRT after a read from the FFR.  This acts as
1118 ;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
1119 (define_insn "aarch64_update_ffrt"
1120   [(set (reg:VNx16BI FFRT_REGNUM)
1121         (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
1122   "TARGET_SVE"
1123   ""
1124   [(set_attr "type" "no_insn")]
1125 )
1126
1127 ;; =========================================================================
1128 ;; == Loads
1129 ;; =========================================================================
1130
1131 ;; -------------------------------------------------------------------------
1132 ;; ---- Normal contiguous loads
1133 ;; -------------------------------------------------------------------------
1134 ;; Includes contiguous forms of:
1135 ;; - LD1B
1136 ;; - LD1D
1137 ;; - LD1H
1138 ;; - LD1W
1139 ;; - LD2B
1140 ;; - LD2D
1141 ;; - LD2H
1142 ;; - LD2W
1143 ;; - LD3B
1144 ;; - LD3D
1145 ;; - LD3H
1146 ;; - LD3W
1147 ;; - LD4B
1148 ;; - LD4D
1149 ;; - LD4H
1150 ;; - LD4W
1151 ;; -------------------------------------------------------------------------
1152
1153 ;; Predicated LD1.
1154 (define_insn "maskload<mode><vpred>"
1155   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1156         (unspec:SVE_ALL
1157           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1158            (match_operand:SVE_ALL 1 "memory_operand" "m")]
1159           UNSPEC_LD1_SVE))]
1160   "TARGET_SVE"
1161   "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
1162 )
1163
1164 ;; Unpredicated LD[234].
1165 (define_expand "vec_load_lanes<mode><vsingle>"
1166   [(set (match_operand:SVE_STRUCT 0 "register_operand")
1167         (unspec:SVE_STRUCT
1168           [(match_dup 2)
1169            (match_operand:SVE_STRUCT 1 "memory_operand")]
1170           UNSPEC_LDN))]
1171   "TARGET_SVE"
1172   {
1173     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1174   }
1175 )
1176
1177 ;; Predicated LD[234].
1178 (define_insn "vec_mask_load_lanes<mode><vsingle>"
1179   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
1180         (unspec:SVE_STRUCT
1181           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1182            (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
1183           UNSPEC_LDN))]
1184   "TARGET_SVE"
1185   "ld<vector_count><Vesize>\t%0, %2/z, %1"
1186 )
1187
1188 ;; -------------------------------------------------------------------------
1189 ;; ---- Extending contiguous loads
1190 ;; -------------------------------------------------------------------------
1191 ;; Includes contiguous forms of:
1192 ;; LD1B
1193 ;; LD1H
1194 ;; LD1SB
1195 ;; LD1SH
1196 ;; LD1SW
1197 ;; LD1W
1198 ;; -------------------------------------------------------------------------
1199
1200 ;; Predicated load and extend, with 8 elements per 128-bit block.
1201 (define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1202   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1203         (unspec:SVE_HSDI
1204           [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1205            (ANY_EXTEND:SVE_HSDI
1206              (unspec:SVE_PARTIAL_I
1207                [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1208                 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
1209                UNSPEC_LD1_SVE))]
1210           UNSPEC_PRED_X))]
1211   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1212   "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1213   "&& !CONSTANT_P (operands[3])"
1214   {
1215     operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1216   }
1217 )
1218
1219 ;; -------------------------------------------------------------------------
1220 ;; ---- First-faulting contiguous loads
1221 ;; -------------------------------------------------------------------------
1222 ;; Includes contiguous forms of:
1223 ;; - LDFF1B
1224 ;; - LDFF1D
1225 ;; - LDFF1H
1226 ;; - LDFF1W
1227 ;; - LDNF1B
1228 ;; - LDNF1D
1229 ;; - LDNF1H
1230 ;; - LDNF1W
1231 ;; -------------------------------------------------------------------------
1232
1233 ;; Contiguous non-extending first-faulting or non-faulting loads.
1234 (define_insn "@aarch64_ld<fn>f1<mode>"
1235   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1236         (unspec:SVE_FULL
1237           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1238            (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1239            (reg:VNx16BI FFRT_REGNUM)]
1240           SVE_LDFF1_LDNF1))]
1241   "TARGET_SVE"
1242   "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
1243 )
1244
1245 ;; -------------------------------------------------------------------------
1246 ;; ---- First-faulting extending contiguous loads
1247 ;; -------------------------------------------------------------------------
1248 ;; Includes contiguous forms of:
1249 ;; - LDFF1B
1250 ;; - LDFF1H
1251 ;; - LDFF1SB
1252 ;; - LDFF1SH
1253 ;; - LDFF1SW
1254 ;; - LDFF1W
1255 ;; - LDNF1B
1256 ;; - LDNF1H
1257 ;; - LDNF1SB
1258 ;; - LDNF1SH
1259 ;; - LDNF1SW
1260 ;; - LDNF1W
1261 ;; -------------------------------------------------------------------------
1262
1263 ;; Predicated first-faulting or non-faulting load and extend.
1264 (define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1265   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1266         (unspec:SVE_HSDI
1267           [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1268            (ANY_EXTEND:SVE_HSDI
1269              (unspec:SVE_PARTIAL_I
1270                [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1271                 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1272                 (reg:VNx16BI FFRT_REGNUM)]
1273                SVE_LDFF1_LDNF1))]
1274           UNSPEC_PRED_X))]
1275   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1276   "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1277   "&& !CONSTANT_P (operands[3])"
1278   {
1279     operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1280   }
1281 )
1282
1283 ;; -------------------------------------------------------------------------
1284 ;; ---- Non-temporal contiguous loads
1285 ;; -------------------------------------------------------------------------
1286 ;; Includes:
1287 ;; - LDNT1B
1288 ;; - LDNT1D
1289 ;; - LDNT1H
1290 ;; - LDNT1W
1291 ;; -------------------------------------------------------------------------
1292
1293 ;; Predicated contiguous non-temporal load.
1294 (define_insn "@aarch64_ldnt1<mode>"
1295   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1296         (unspec:SVE_FULL
1297           [(match_operand:<VPRED> 2 "register_operand" "Upl")
1298            (match_operand:SVE_FULL 1 "memory_operand" "m")]
1299           UNSPEC_LDNT1_SVE))]
1300   "TARGET_SVE"
1301   "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
1302 )
1303
1304 ;; -------------------------------------------------------------------------
1305 ;; ---- Normal gather loads
1306 ;; -------------------------------------------------------------------------
1307 ;; Includes gather forms of:
1308 ;; - LD1D
1309 ;; - LD1W
1310 ;; -------------------------------------------------------------------------
1311
1312 ;; Unpredicated gather loads.
1313 (define_expand "gather_load<mode><v_int_container>"
1314   [(set (match_operand:SVE_24 0 "register_operand")
1315         (unspec:SVE_24
1316           [(match_dup 5)
1317            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1318            (match_operand:<V_INT_CONTAINER> 2 "register_operand")
1319            (match_operand:DI 3 "const_int_operand")
1320            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1321            (mem:BLK (scratch))]
1322           UNSPEC_LD1_GATHER))]
1323   "TARGET_SVE"
1324   {
1325     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
1326   }
1327 )
1328
1329 ;; Predicated gather loads for 32-bit elements.  Operand 3 is true for
1330 ;; unsigned extension and false for signed extension.
1331 (define_insn "mask_gather_load<mode><v_int_container>"
1332   [(set (match_operand:SVE_4 0 "register_operand" "=w, w, w, w, w, w")
1333         (unspec:SVE_4
1334           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1335            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
1336            (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1337            (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1338            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1339            (mem:BLK (scratch))]
1340           UNSPEC_LD1_GATHER))]
1341   "TARGET_SVE"
1342   "@
1343    ld1<Vesize>\t%0.s, %5/z, [%2.s]
1344    ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1345    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1346    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1347    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1348    ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1349 )
1350
1351 ;; Predicated gather loads for 64-bit elements.  The value of operand 3
1352 ;; doesn't matter in this case.
1353 (define_insn "mask_gather_load<mode><v_int_container>"
1354   [(set (match_operand:SVE_2 0 "register_operand" "=w, w, w, w")
1355         (unspec:SVE_2
1356           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1357            (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
1358            (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1359            (match_operand:DI 3 "const_int_operand")
1360            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
1361            (mem:BLK (scratch))]
1362           UNSPEC_LD1_GATHER))]
1363   "TARGET_SVE"
1364   "@
1365    ld1<Vesize>\t%0.d, %5/z, [%2.d]
1366    ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1367    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1368    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1369 )
1370
1371 ;; Likewise, but with the offset being extended from 32 bits.
1372 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
1373   [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1374         (unspec:SVE_2
1375           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1376            (match_operand:DI 1 "register_operand" "rk, rk")
1377            (unspec:VNx2DI
1378              [(match_operand 6)
1379               (ANY_EXTEND:VNx2DI
1380                 (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1381              UNSPEC_PRED_X)
1382            (match_operand:DI 3 "const_int_operand")
1383            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1384            (mem:BLK (scratch))]
1385           UNSPEC_LD1_GATHER))]
1386   "TARGET_SVE"
1387   "@
1388    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
1389    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]"
1390   "&& !CONSTANT_P (operands[6])"
1391   {
1392     operands[6] = CONSTM1_RTX (VNx2BImode);
1393   }
1394 )
1395
1396 ;; Likewise, but with the offset being truncated to 32 bits and then
1397 ;; sign-extended.
1398 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
1399   [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1400         (unspec:SVE_2
1401           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1402            (match_operand:DI 1 "register_operand" "rk, rk")
1403            (unspec:VNx2DI
1404              [(match_operand 6)
1405               (sign_extend:VNx2DI
1406                 (truncate:VNx2SI
1407                   (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1408              UNSPEC_PRED_X)
1409            (match_operand:DI 3 "const_int_operand")
1410            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1411            (mem:BLK (scratch))]
1412           UNSPEC_LD1_GATHER))]
1413   "TARGET_SVE"
1414   "@
1415    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1416    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1417   "&& !CONSTANT_P (operands[6])"
1418   {
1419     operands[6] = CONSTM1_RTX (VNx2BImode);
1420   }
1421 )
1422
1423 ;; Likewise, but with the offset being truncated to 32 bits and then
1424 ;; zero-extended.
1425 (define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
1426   [(set (match_operand:SVE_2 0 "register_operand" "=w, w")
1427         (unspec:SVE_2
1428           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1429            (match_operand:DI 1 "register_operand" "rk, rk")
1430            (and:VNx2DI
1431              (match_operand:VNx2DI 2 "register_operand" "w, w")
1432              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1433            (match_operand:DI 3 "const_int_operand")
1434            (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
1435            (mem:BLK (scratch))]
1436           UNSPEC_LD1_GATHER))]
1437   "TARGET_SVE"
1438   "@
1439    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1440    ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1441 )
1442
1443 ;; -------------------------------------------------------------------------
1444 ;; ---- Extending gather loads
1445 ;; -------------------------------------------------------------------------
1446 ;; Includes gather forms of:
1447 ;; - LD1B
1448 ;; - LD1H
1449 ;; - LD1SB
1450 ;; - LD1SH
1451 ;; - LD1SW
1452 ;; - LD1W
1453 ;; -------------------------------------------------------------------------
1454
1455 ;; Predicated extending gather loads for 32-bit elements.  Operand 3 is
1456 ;; true for unsigned extension and false for signed extension.
1457 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
1458   [(set (match_operand:SVE_4HSI 0 "register_operand" "=w, w, w, w, w, w")
1459         (unspec:SVE_4HSI
1460           [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1461            (ANY_EXTEND:SVE_4HSI
1462              (unspec:SVE_4BHI
1463                [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1464                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>" "Z, vg<SVE_4BHI:Vesize>, rk, rk, rk, rk")
1465                 (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1466                 (match_operand:DI 3 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
1467                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1468                 (mem:BLK (scratch))]
1469                UNSPEC_LD1_GATHER))]
1470           UNSPEC_PRED_X))]
1471   "TARGET_SVE && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
1472   "@
1473    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
1474    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1475    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1476    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1477    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1478    ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1479   "&& !CONSTANT_P (operands[6])"
1480   {
1481     operands[6] = CONSTM1_RTX (VNx4BImode);
1482   }
1483 )
1484
1485 ;; Predicated extending gather loads for 64-bit elements.  The value of
1486 ;; operand 3 doesn't matter in this case.
1487 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
1488   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w, w, w")
1489         (unspec:SVE_2HSDI
1490           [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1491            (ANY_EXTEND:SVE_2HSDI
1492              (unspec:SVE_2BHSI
1493                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1494                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>" "Z, vg<SVE_2BHSI:Vesize>, rk, rk")
1495                 (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1496                 (match_operand:DI 3 "const_int_operand")
1497                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, Ui1, Ui1, i")
1498                 (mem:BLK (scratch))]
1499                UNSPEC_LD1_GATHER))]
1500           UNSPEC_PRED_X))]
1501   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1502   "@
1503    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
1504    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1505    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
1506    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1507   "&& !CONSTANT_P (operands[6])"
1508   {
1509     operands[6] = CONSTM1_RTX (VNx2BImode);
1510   }
1511 )
1512
1513 ;; Likewise, but with the offset being extended from 32 bits.
1514 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
1515   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1516         (unspec:SVE_2HSDI
1517           [(match_operand 6)
1518            (ANY_EXTEND:SVE_2HSDI
1519              (unspec:SVE_2BHSI
1520                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1521                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1522                 (unspec:VNx2DI
1523                   [(match_operand 7)
1524                    (ANY_EXTEND2:VNx2DI
1525                      (match_operand:VNx2SI 2 "register_operand" "w, w"))]
1526                   UNSPEC_PRED_X)
1527                 (match_operand:DI 3 "const_int_operand")
1528                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1529                 (mem:BLK (scratch))]
1530                UNSPEC_LD1_GATHER))]
1531           UNSPEC_PRED_X))]
1532   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1533   "@
1534    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
1535    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]"
1536   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1537   {
1538     operands[6] = CONSTM1_RTX (VNx2BImode);
1539     operands[7] = CONSTM1_RTX (VNx2BImode);
1540   }
1541 )
1542
1543 ;; Likewise, but with the offset being truncated to 32 bits and then
1544 ;; sign-extended.
1545 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
1546   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1547         (unspec:SVE_2HSDI
1548           [(match_operand 6)
1549            (ANY_EXTEND:SVE_2HSDI
1550              (unspec:SVE_2BHSI
1551                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1552                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1553                 (unspec:VNx2DI
1554                   [(match_operand 7)
1555                    (sign_extend:VNx2DI
1556                      (truncate:VNx2SI
1557                        (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1558                   UNSPEC_PRED_X)
1559                 (match_operand:DI 3 "const_int_operand")
1560                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1561                 (mem:BLK (scratch))]
1562                UNSPEC_LD1_GATHER))]
1563           UNSPEC_PRED_X))]
1564   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1565   "@
1566    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1567    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1568   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1569   {
1570     operands[6] = CONSTM1_RTX (VNx2BImode);
1571     operands[7] = CONSTM1_RTX (VNx2BImode);
1572   }
1573 )
1574
1575 ;; Likewise, but with the offset being truncated to 32 bits and then
1576 ;; zero-extended.
1577 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
1578   [(set (match_operand:SVE_2HSDI 0 "register_operand" "=w, w")
1579         (unspec:SVE_2HSDI
1580           [(match_operand 7)
1581            (ANY_EXTEND:SVE_2HSDI
1582              (unspec:SVE_2BHSI
1583                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1584                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1585                 (and:VNx2DI
1586                   (match_operand:VNx2DI 2 "register_operand" "w, w")
1587                   (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1588                 (match_operand:DI 3 "const_int_operand")
1589                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>" "Ui1, i")
1590                 (mem:BLK (scratch))]
1591                UNSPEC_LD1_GATHER))]
1592           UNSPEC_PRED_X))]
1593   "TARGET_SVE && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1594   "@
1595    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1596    ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1597   "&& !CONSTANT_P (operands[7])"
1598   {
1599     operands[7] = CONSTM1_RTX (VNx2BImode);
1600   }
1601 )
1602
1603 ;; -------------------------------------------------------------------------
1604 ;; ---- First-faulting gather loads
1605 ;; -------------------------------------------------------------------------
1606 ;; Includes gather forms of:
1607 ;; - LDFF1D
1608 ;; - LDFF1W
1609 ;; -------------------------------------------------------------------------
1610
1611 ;; Predicated first-faulting gather loads for 32-bit elements.  Operand
1612 ;; 3 is true for unsigned extension and false for signed extension.
1613 (define_insn "@aarch64_ldff1_gather<mode>"
1614   [(set (match_operand:SVE_FULL_S 0 "register_operand" "=w, w, w, w, w, w")
1615         (unspec:SVE_FULL_S
1616           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1617            (match_operand:DI 1 "aarch64_sve_gather_offset_w" "Z, vgw, rk, rk, rk, rk")
1618            (match_operand:VNx4SI 2 "register_operand" "w, w, w, w, w, w")
1619            (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1620            (match_operand:DI 4 "aarch64_gather_scale_operand_w" "Ui1, Ui1, Ui1, Ui1, i, i")
1621            (mem:BLK (scratch))
1622            (reg:VNx16BI FFRT_REGNUM)]
1623           UNSPEC_LDFF1_GATHER))]
1624   "TARGET_SVE"
1625   "@
1626    ldff1w\t%0.s, %5/z, [%2.s]
1627    ldff1w\t%0.s, %5/z, [%2.s, #%1]
1628    ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
1629    ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
1630    ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1631    ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1632 )
1633
1634 ;; Predicated first-faulting gather loads for 64-bit elements.  The value
1635 ;; of operand 3 doesn't matter in this case.
1636 (define_insn "@aarch64_ldff1_gather<mode>"
1637   [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w, w, w")
1638         (unspec:SVE_FULL_D
1639           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1640            (match_operand:DI 1 "aarch64_sve_gather_offset_d" "Z, vgd, rk, rk")
1641            (match_operand:VNx2DI 2 "register_operand" "w, w, w, w")
1642            (match_operand:DI 3 "const_int_operand")
1643            (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, Ui1, Ui1, i")
1644            (mem:BLK (scratch))
1645            (reg:VNx16BI FFRT_REGNUM)]
1646           UNSPEC_LDFF1_GATHER))]
1647   "TARGET_SVE"
1648   "@
1649    ldff1d\t%0.d, %5/z, [%2.d]
1650    ldff1d\t%0.d, %5/z, [%2.d, #%1]
1651    ldff1d\t%0.d, %5/z, [%1, %2.d]
1652    ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1653 )
1654
1655 ;; Likewise, but with the offset being sign-extended from 32 bits.
1656 (define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
1657   [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1658         (unspec:SVE_FULL_D
1659           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1660            (match_operand:DI 1 "register_operand" "rk, rk")
1661            (unspec:VNx2DI
1662              [(match_operand 6)
1663               (sign_extend:VNx2DI
1664                 (truncate:VNx2SI
1665                   (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1666              UNSPEC_PRED_X)
1667            (match_operand:DI 3 "const_int_operand")
1668            (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1669            (mem:BLK (scratch))
1670            (reg:VNx16BI FFRT_REGNUM)]
1671           UNSPEC_LDFF1_GATHER))]
1672   "TARGET_SVE"
1673   "@
1674    ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
1675    ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1676   "&& !CONSTANT_P (operands[6])"
1677   {
1678     operands[6] = CONSTM1_RTX (VNx2BImode);
1679   }
1680 )
1681
1682 ;; Likewise, but with the offset being zero-extended from 32 bits.
1683 (define_insn "*aarch64_ldff1_gather<mode>_uxtw"
1684   [(set (match_operand:SVE_FULL_D 0 "register_operand" "=w, w")
1685         (unspec:SVE_FULL_D
1686           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1687            (match_operand:DI 1 "register_operand" "rk, rk")
1688            (and:VNx2DI
1689              (match_operand:VNx2DI 2 "register_operand" "w, w")
1690              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1691            (match_operand:DI 3 "const_int_operand")
1692            (match_operand:DI 4 "aarch64_gather_scale_operand_d" "Ui1, i")
1693            (mem:BLK (scratch))
1694            (reg:VNx16BI FFRT_REGNUM)]
1695           UNSPEC_LDFF1_GATHER))]
1696   "TARGET_SVE"
1697   "@
1698    ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
1699    ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1700 )
1701
1702 ;; -------------------------------------------------------------------------
1703 ;; ---- First-faulting extending gather loads
1704 ;; -------------------------------------------------------------------------
1705 ;; Includes gather forms of:
1706 ;; - LDFF1B
1707 ;; - LDFF1H
1708 ;; - LDFF1SB
1709 ;; - LDFF1SH
1710 ;; - LDFF1SW
1711 ;; - LDFF1W
1712 ;; -------------------------------------------------------------------------
1713
1714 ;; Predicated extending first-faulting gather loads for 32-bit elements.
1715 ;; Operand 3 is true for unsigned extension and false for signed extension.
1716 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
1717   [(set (match_operand:VNx4_WIDE 0 "register_operand" "=w, w, w, w, w, w")
1718         (unspec:VNx4_WIDE
1719           [(match_operand:VNx4BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm, UplDnm, UplDnm")
1720            (ANY_EXTEND:VNx4_WIDE
1721              (unspec:VNx4_NARROW
1722                [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1723                 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
1724                 (match_operand:VNx4_WIDE 2 "register_operand" "w, w, w, w, w, w")
1725                 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1726                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1727                 (mem:BLK (scratch))
1728                 (reg:VNx16BI FFRT_REGNUM)]
1729                UNSPEC_LDFF1_GATHER))]
1730           UNSPEC_PRED_X))]
1731   "TARGET_SVE"
1732   "@
1733    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
1734    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1735    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1736    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1737    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1738    ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]"
1739   "&& !CONSTANT_P (operands[6])"
1740   {
1741     operands[6] = CONSTM1_RTX (VNx4BImode);
1742   }
1743 )
1744
1745 ;; Predicated extending first-faulting gather loads for 64-bit elements.
1746 ;; The value of operand 3 doesn't matter in this case.
1747 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
1748   [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w, w, w")
1749         (unspec:VNx2_WIDE
1750           [(match_operand:VNx2BI 6 "general_operand" "UplDnm, UplDnm, UplDnm, UplDnm")
1751            (ANY_EXTEND:VNx2_WIDE
1752              (unspec:VNx2_NARROW
1753                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
1754                 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
1755                 (match_operand:VNx2_WIDE 2 "register_operand" "w, w, w, w")
1756                 (match_operand:DI 3 "const_int_operand")
1757                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
1758                 (mem:BLK (scratch))
1759                 (reg:VNx16BI FFRT_REGNUM)]
1760                UNSPEC_LDFF1_GATHER))]
1761           UNSPEC_PRED_X))]
1762   "TARGET_SVE"
1763   "@
1764    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
1765    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1766    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
1767    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]"
1768   "&& !CONSTANT_P (operands[6])"
1769   {
1770     operands[6] = CONSTM1_RTX (VNx2BImode);
1771   }
1772 )
1773
1774 ;; Likewise, but with the offset being sign-extended from 32 bits.
1775 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
1776   [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1777         (unspec:VNx2_WIDE
1778           [(match_operand 6)
1779            (ANY_EXTEND:VNx2_WIDE
1780              (unspec:VNx2_NARROW
1781                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1782                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1783                 (unspec:VNx2DI
1784                   [(match_operand 7)
1785                    (sign_extend:VNx2DI
1786                      (truncate:VNx2SI
1787                        (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1788                   UNSPEC_PRED_X)
1789                 (match_operand:DI 3 "const_int_operand")
1790                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1791                 (mem:BLK (scratch))
1792                 (reg:VNx16BI FFRT_REGNUM)]
1793                UNSPEC_LDFF1_GATHER))]
1794           UNSPEC_PRED_X))]
1795   "TARGET_SVE"
1796   "@
1797    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1798    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]"
1799   "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1800   {
1801     operands[6] = CONSTM1_RTX (VNx2BImode);
1802     operands[7] = CONSTM1_RTX (VNx2BImode);
1803   }
1804 )
1805
1806 ;; Likewise, but with the offset being zero-extended from 32 bits.
1807 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
1808   [(set (match_operand:VNx2_WIDE 0 "register_operand" "=w, w")
1809         (unspec:VNx2_WIDE
1810           [(match_operand 7)
1811            (ANY_EXTEND:VNx2_WIDE
1812              (unspec:VNx2_NARROW
1813                [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
1814                 (match_operand:DI 1 "aarch64_reg_or_zero" "rk, rk")
1815                 (and:VNx2DI
1816                   (match_operand:VNx2DI 2 "register_operand" "w, w")
1817                   (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1818                 (match_operand:DI 3 "const_int_operand")
1819                 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
1820                 (mem:BLK (scratch))
1821                 (reg:VNx16BI FFRT_REGNUM)]
1822                UNSPEC_LDFF1_GATHER))]
1823           UNSPEC_PRED_X))]
1824   "TARGET_SVE"
1825   "@
1826    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1827    ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]"
1828   "&& !CONSTANT_P (operands[7])"
1829   {
1830     operands[7] = CONSTM1_RTX (VNx2BImode);
1831   }
1832 )
1833
1834 ;; =========================================================================
1835 ;; == Prefetches
1836 ;; =========================================================================
1837
1838 ;; -------------------------------------------------------------------------
1839 ;; ---- Contiguous prefetches
1840 ;; -------------------------------------------------------------------------
1841 ;; Includes contiguous forms of:
1842 ;; - PRFB
1843 ;; - PRFD
1844 ;; - PRFH
1845 ;; - PRFW
1846 ;; -------------------------------------------------------------------------
1847
1848 ;; Contiguous predicated prefetches.  Operand 2 gives the real prefetch
1849 ;; operation (as an svprfop), with operands 3 and 4 providing distilled
1850 ;; information.
1851 (define_insn "@aarch64_sve_prefetch<mode>"
1852   [(prefetch (unspec:DI
1853                [(match_operand:<VPRED> 0 "register_operand" "Upl")
1854                 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
1855                 (match_operand:DI 2 "const_int_operand")]
1856                UNSPEC_SVE_PREFETCH)
1857              (match_operand:DI 3 "const_int_operand")
1858              (match_operand:DI 4 "const_int_operand"))]
1859   "TARGET_SVE"
1860   {
1861     operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
1862     return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
1863   }
1864 )
1865
1866 ;; -------------------------------------------------------------------------
1867 ;; ---- Gather prefetches
1868 ;; -------------------------------------------------------------------------
1869 ;; Includes gather forms of:
1870 ;; - PRFB
1871 ;; - PRFD
1872 ;; - PRFH
1873 ;; - PRFW
1874 ;; -------------------------------------------------------------------------
1875
1876 ;; Predicated gather prefetches for 32-bit bases and offsets.  The operands
1877 ;; are:
1878 ;; 0: the governing predicate
1879 ;; 1: the scalar component of the address
1880 ;; 2: the vector component of the address
1881 ;; 3: 1 for zero extension, 0 for sign extension
1882 ;; 4: the scale multiplier
1883 ;; 5: a vector zero that identifies the mode of data being accessed
1884 ;; 6: the prefetch operator (an svprfop)
1885 ;; 7: the normal RTL prefetch rw flag
1886 ;; 8: the normal RTL prefetch locality value
1887 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
1888   [(prefetch (unspec:DI
1889                [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
1890                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
1891                 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
1892                 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
1893                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
1894                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1895                 (match_operand:DI 6 "const_int_operand")]
1896                UNSPEC_SVE_PREFETCH_GATHER)
1897              (match_operand:DI 7 "const_int_operand")
1898              (match_operand:DI 8 "const_int_operand"))]
1899   "TARGET_SVE"
1900   {
1901     static const char *const insns[][2] = {
1902       "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
1903       "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
1904       "prfb", "%0, [%1, %2.s, sxtw]",
1905       "prfb", "%0, [%1, %2.s, uxtw]",
1906       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
1907       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
1908     };
1909     const char *const *parts = insns[which_alternative];
1910     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1911   }
1912 )
1913
1914 ;; Predicated gather prefetches for 64-bit elements.  The value of operand 3
1915 ;; doesn't matter in this case.
1916 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
1917   [(prefetch (unspec:DI
1918                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
1919                 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
1920                 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
1921                 (match_operand:DI 3 "const_int_operand")
1922                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
1923                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1924                 (match_operand:DI 6 "const_int_operand")]
1925                UNSPEC_SVE_PREFETCH_GATHER)
1926              (match_operand:DI 7 "const_int_operand")
1927              (match_operand:DI 8 "const_int_operand"))]
1928   "TARGET_SVE"
1929   {
1930     static const char *const insns[][2] = {
1931       "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
1932       "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
1933       "prfb", "%0, [%1, %2.d]",
1934       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
1935     };
1936     const char *const *parts = insns[which_alternative];
1937     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1938   }
1939 )
1940
1941 ;; Likewise, but with the offset being sign-extended from 32 bits.
1942 (define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
1943   [(prefetch (unspec:DI
1944                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
1945                 (match_operand:DI 1 "register_operand" "rk, rk")
1946                 (unspec:VNx2DI_ONLY
1947                   [(match_operand 9)
1948                    (sign_extend:VNx2DI
1949                      (truncate:VNx2SI
1950                        (match_operand:VNx2DI 2 "register_operand" "w, w")))]
1951                   UNSPEC_PRED_X)
1952                 (match_operand:DI 3 "const_int_operand")
1953                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
1954                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1955                 (match_operand:DI 6 "const_int_operand")]
1956                UNSPEC_SVE_PREFETCH_GATHER)
1957              (match_operand:DI 7 "const_int_operand")
1958              (match_operand:DI 8 "const_int_operand"))]
1959   "TARGET_SVE"
1960   {
1961     static const char *const insns[][2] = {
1962       "prfb", "%0, [%1, %2.d, sxtw]",
1963       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
1964     };
1965     const char *const *parts = insns[which_alternative];
1966     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1967   }
1968   "&& !rtx_equal_p (operands[0], operands[9])"
1969   {
1970     operands[9] = copy_rtx (operands[0]);
1971   }
1972 )
1973
1974 ;; Likewise, but with the offset being zero-extended from 32 bits.
1975 (define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
1976   [(prefetch (unspec:DI
1977                [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
1978                 (match_operand:DI 1 "register_operand" "rk, rk")
1979                 (and:VNx2DI_ONLY
1980                   (match_operand:VNx2DI 2 "register_operand" "w, w")
1981                   (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
1982                 (match_operand:DI 3 "const_int_operand")
1983                 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
1984                 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
1985                 (match_operand:DI 6 "const_int_operand")]
1986                UNSPEC_SVE_PREFETCH_GATHER)
1987              (match_operand:DI 7 "const_int_operand")
1988              (match_operand:DI 8 "const_int_operand"))]
1989   "TARGET_SVE"
1990   {
1991     static const char *const insns[][2] = {
1992       "prfb", "%0, [%1, %2.d, uxtw]",
1993       "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
1994     };
1995     const char *const *parts = insns[which_alternative];
1996     return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
1997   }
1998 )
1999
2000 ;; =========================================================================
2001 ;; == Stores
2002 ;; =========================================================================
2003
2004 ;; -------------------------------------------------------------------------
2005 ;; ---- Normal contiguous stores
2006 ;; -------------------------------------------------------------------------
2007 ;; Includes contiguous forms of:
2008 ;; - ST1B
2009 ;; - ST1D
2010 ;; - ST1H
2011 ;; - ST1W
2012 ;; - ST2B
2013 ;; - ST2D
2014 ;; - ST2H
2015 ;; - ST2W
2016 ;; - ST3B
2017 ;; - ST3D
2018 ;; - ST3H
2019 ;; - ST3W
2020 ;; - ST4B
2021 ;; - ST4D
2022 ;; - ST4H
2023 ;; - ST4W
2024 ;; -------------------------------------------------------------------------
2025
2026 ;; Predicated ST1.
2027 (define_insn "maskstore<mode><vpred>"
2028   [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
2029         (unspec:SVE_ALL
2030           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2031            (match_operand:SVE_ALL 1 "register_operand" "w")
2032            (match_dup 0)]
2033           UNSPEC_ST1_SVE))]
2034   "TARGET_SVE"
2035   "st1<Vesize>\t%1.<Vctype>, %2, %0"
2036 )
2037
2038 ;; Unpredicated ST[234].  This is always a full update, so the dependence
2039 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
2040 ;; There doesn't seem to be any obvious benefit to treating the all-true
2041 ;; case differently though.  In particular, it's very unlikely that we'll
2042 ;; only find out during RTL that a store_lanes is dead.
2043 (define_expand "vec_store_lanes<mode><vsingle>"
2044   [(set (match_operand:SVE_STRUCT 0 "memory_operand")
2045         (unspec:SVE_STRUCT
2046           [(match_dup 2)
2047            (match_operand:SVE_STRUCT 1 "register_operand")
2048            (match_dup 0)]
2049           UNSPEC_STN))]
2050   "TARGET_SVE"
2051   {
2052     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2053   }
2054 )
2055
2056 ;; Predicated ST[234].
2057 (define_insn "vec_mask_store_lanes<mode><vsingle>"
2058   [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
2059         (unspec:SVE_STRUCT
2060           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2061            (match_operand:SVE_STRUCT 1 "register_operand" "w")
2062            (match_dup 0)]
2063           UNSPEC_STN))]
2064   "TARGET_SVE"
2065   "st<vector_count><Vesize>\t%1, %2, %0"
2066 )
2067
2068 ;; -------------------------------------------------------------------------
2069 ;; ---- Truncating contiguous stores
2070 ;; -------------------------------------------------------------------------
2071 ;; Includes:
2072 ;; - ST1B
2073 ;; - ST1H
2074 ;; - ST1W
2075 ;; -------------------------------------------------------------------------
2076
2077 ;; Predicated truncate and store, with 8 elements per 128-bit block.
2078 (define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
2079   [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
2080         (unspec:VNx8_NARROW
2081           [(match_operand:VNx8BI 2 "register_operand" "Upl")
2082            (truncate:VNx8_NARROW
2083              (match_operand:VNx8_WIDE 1 "register_operand" "w"))
2084            (match_dup 0)]
2085           UNSPEC_ST1_SVE))]
2086   "TARGET_SVE"
2087   "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
2088 )
2089
2090 ;; Predicated truncate and store, with 4 elements per 128-bit block.
2091 (define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2092   [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
2093         (unspec:VNx4_NARROW
2094           [(match_operand:VNx4BI 2 "register_operand" "Upl")
2095            (truncate:VNx4_NARROW
2096              (match_operand:VNx4_WIDE 1 "register_operand" "w"))
2097            (match_dup 0)]
2098           UNSPEC_ST1_SVE))]
2099   "TARGET_SVE"
2100   "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
2101 )
2102
2103 ;; Predicated truncate and store, with 2 elements per 128-bit block.
2104 (define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2105   [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
2106         (unspec:VNx2_NARROW
2107           [(match_operand:VNx2BI 2 "register_operand" "Upl")
2108            (truncate:VNx2_NARROW
2109              (match_operand:VNx2_WIDE 1 "register_operand" "w"))
2110            (match_dup 0)]
2111           UNSPEC_ST1_SVE))]
2112   "TARGET_SVE"
2113   "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
2114 )
2115
2116 ;; -------------------------------------------------------------------------
2117 ;; ---- Non-temporal contiguous stores
2118 ;; -------------------------------------------------------------------------
2119 ;; Includes:
2120 ;; - STNT1B
2121 ;; - STNT1D
2122 ;; - STNT1H
2123 ;; - STNT1W
2124 ;; -------------------------------------------------------------------------
2125
2126 (define_insn "@aarch64_stnt1<mode>"
2127   [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
2128         (unspec:SVE_FULL
2129           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2130            (match_operand:SVE_FULL 1 "register_operand" "w")
2131            (match_dup 0)]
2132           UNSPEC_STNT1_SVE))]
2133   "TARGET_SVE"
2134   "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
2135 )
2136
2137 ;; -------------------------------------------------------------------------
2138 ;; ---- Normal scatter stores
2139 ;; -------------------------------------------------------------------------
2140 ;; Includes scatter forms of:
2141 ;; - ST1D
2142 ;; - ST1W
2143 ;; -------------------------------------------------------------------------
2144
2145 ;; Unpredicated scatter stores.
2146 (define_expand "scatter_store<mode><v_int_container>"
2147   [(set (mem:BLK (scratch))
2148         (unspec:BLK
2149           [(match_dup 5)
2150            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2151            (match_operand:<V_INT_CONTAINER> 1 "register_operand")
2152            (match_operand:DI 2 "const_int_operand")
2153            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2154            (match_operand:SVE_24 4 "register_operand")]
2155           UNSPEC_ST1_SCATTER))]
2156   "TARGET_SVE"
2157   {
2158     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
2159   }
2160 )
2161
2162 ;; Predicated scatter stores for 32-bit elements.  Operand 2 is true for
2163 ;; unsigned extension and false for signed extension.
2164 (define_insn "mask_scatter_store<mode><v_int_container>"
2165   [(set (mem:BLK (scratch))
2166         (unspec:BLK
2167           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2168            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgw, rk, rk, rk, rk")
2169            (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2170            (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2171            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2172            (match_operand:SVE_4 4 "register_operand" "w, w, w, w, w, w")]
2173           UNSPEC_ST1_SCATTER))]
2174   "TARGET_SVE"
2175   "@
2176    st1<Vesize>\t%4.s, %5, [%1.s]
2177    st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2178    st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2179    st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2180    st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2181    st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2182 )
2183
2184 ;; Predicated scatter stores for 64-bit elements.  The value of operand 2
2185 ;; doesn't matter in this case.
2186 (define_insn "mask_scatter_store<mode><v_int_container>"
2187   [(set (mem:BLK (scratch))
2188         (unspec:BLK
2189           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2190            (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>" "Z, vgd, rk, rk")
2191            (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2192            (match_operand:DI 2 "const_int_operand")
2193            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, Ui1, Ui1, i")
2194            (match_operand:SVE_2 4 "register_operand" "w, w, w, w")]
2195           UNSPEC_ST1_SCATTER))]
2196   "TARGET_SVE"
2197   "@
2198    st1<Vesize>\t%4.d, %5, [%1.d]
2199    st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2200    st1<Vesize>\t%4.d, %5, [%0, %1.d]
2201    st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2202 )
2203
2204 ;; Likewise, but with the offset being extended from 32 bits.
2205 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
2206   [(set (mem:BLK (scratch))
2207         (unspec:BLK
2208           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2209            (match_operand:DI 0 "register_operand" "rk, rk")
2210            (unspec:VNx2DI
2211              [(match_operand 6)
2212               (ANY_EXTEND:VNx2DI
2213                 (match_operand:VNx2SI 1 "register_operand" "w, w"))]
2214              UNSPEC_PRED_X)
2215            (match_operand:DI 2 "const_int_operand")
2216            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2217            (match_operand:SVE_2 4 "register_operand" "w, w")]
2218           UNSPEC_ST1_SCATTER))]
2219   "TARGET_SVE"
2220   "@
2221    st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
2222    st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]"
2223   "&& !CONSTANT_P (operands[6])"
2224   {
2225     operands[6] = CONSTM1_RTX (<VPRED>mode);
2226   }
2227 )
2228
2229 ;; Likewise, but with the offset being truncated to 32 bits and then
2230 ;; sign-extended.
2231 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
2232   [(set (mem:BLK (scratch))
2233         (unspec:BLK
2234           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2235            (match_operand:DI 0 "register_operand" "rk, rk")
2236            (unspec:VNx2DI
2237              [(match_operand 6)
2238               (sign_extend:VNx2DI
2239                 (truncate:VNx2SI
2240                   (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2241              UNSPEC_PRED_X)
2242            (match_operand:DI 2 "const_int_operand")
2243            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2244            (match_operand:SVE_2 4 "register_operand" "w, w")]
2245           UNSPEC_ST1_SCATTER))]
2246   "TARGET_SVE"
2247   "@
2248    st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2249    st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2250   "&& !CONSTANT_P (operands[6])"
2251   {
2252     operands[6] = CONSTM1_RTX (<VPRED>mode);
2253   }
2254 )
2255
2256 ;; Likewise, but with the offset being truncated to 32 bits and then
2257 ;; zero-extended.
2258 (define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
2259   [(set (mem:BLK (scratch))
2260         (unspec:BLK
2261           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2262            (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2263            (and:VNx2DI
2264              (match_operand:VNx2DI 1 "register_operand" "w, w")
2265              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2266            (match_operand:DI 2 "const_int_operand")
2267            (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>" "Ui1, i")
2268            (match_operand:SVE_2 4 "register_operand" "w, w")]
2269           UNSPEC_ST1_SCATTER))]
2270   "TARGET_SVE"
2271   "@
2272    st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2273    st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2274 )
2275
2276 ;; -------------------------------------------------------------------------
2277 ;; ---- Truncating scatter stores
2278 ;; -------------------------------------------------------------------------
2279 ;; Includes scatter forms of:
2280 ;; - ST1B
2281 ;; - ST1H
2282 ;; - ST1W
2283 ;; -------------------------------------------------------------------------
2284
2285 ;; Predicated truncating scatter stores for 32-bit elements.  Operand 2 is
2286 ;; true for unsigned extension and false for signed extension.
2287 (define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2288   [(set (mem:BLK (scratch))
2289         (unspec:BLK
2290           [(match_operand:VNx4BI 5 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2291            (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
2292            (match_operand:VNx4SI 1 "register_operand" "w, w, w, w, w, w")
2293            (match_operand:DI 2 "const_int_operand" "Ui1, Ui1, Z, Ui1, Z, Ui1")
2294            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2295            (truncate:VNx4_NARROW
2296              (match_operand:VNx4_WIDE 4 "register_operand" "w, w, w, w, w, w"))]
2297           UNSPEC_ST1_SCATTER))]
2298   "TARGET_SVE"
2299   "@
2300    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
2301    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
2302    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2303    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2304    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2305    st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]"
2306 )
2307
2308 ;; Predicated truncating scatter stores for 64-bit elements.  The value of
2309 ;; operand 2 doesn't matter in this case.
2310 (define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2311   [(set (mem:BLK (scratch))
2312         (unspec:BLK
2313           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl, Upl, Upl")
2314            (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
2315            (match_operand:VNx2DI 1 "register_operand" "w, w, w, w")
2316            (match_operand:DI 2 "const_int_operand")
2317            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
2318            (truncate:VNx2_NARROW
2319              (match_operand:VNx2_WIDE 4 "register_operand" "w, w, w, w"))]
2320           UNSPEC_ST1_SCATTER))]
2321   "TARGET_SVE"
2322   "@
2323    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
2324    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
2325    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
2326    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]"
2327 )
2328
2329 ;; Likewise, but with the offset being sign-extended from 32 bits.
2330 (define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
2331   [(set (mem:BLK (scratch))
2332         (unspec:BLK
2333           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2334            (match_operand:DI 0 "register_operand" "rk, rk")
2335            (unspec:VNx2DI
2336              [(match_operand 6)
2337               (sign_extend:VNx2DI
2338                 (truncate:VNx2SI
2339                   (match_operand:VNx2DI 1 "register_operand" "w, w")))]
2340              UNSPEC_PRED_X)
2341            (match_operand:DI 2 "const_int_operand")
2342            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2343            (truncate:VNx2_NARROW
2344              (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2345           UNSPEC_ST1_SCATTER))]
2346   "TARGET_SVE"
2347   "@
2348    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2349    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]"
2350   "&& !rtx_equal_p (operands[5], operands[6])"
2351   {
2352     operands[6] = copy_rtx (operands[5]);
2353   }
2354 )
2355
2356 ;; Likewise, but with the offset being zero-extended from 32 bits.
2357 (define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
2358   [(set (mem:BLK (scratch))
2359         (unspec:BLK
2360           [(match_operand:VNx2BI 5 "register_operand" "Upl, Upl")
2361            (match_operand:DI 0 "aarch64_reg_or_zero" "rk, rk")
2362            (and:VNx2DI
2363              (match_operand:VNx2DI 1 "register_operand" "w, w")
2364              (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2365            (match_operand:DI 2 "const_int_operand")
2366            (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2367            (truncate:VNx2_NARROW
2368              (match_operand:VNx2_WIDE 4 "register_operand" "w, w"))]
2369           UNSPEC_ST1_SCATTER))]
2370   "TARGET_SVE"
2371   "@
2372    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2373    st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]"
2374 )
2375
2376 ;; =========================================================================
2377 ;; == Vector creation
2378 ;; =========================================================================
2379
2380 ;; -------------------------------------------------------------------------
2381 ;; ---- [INT,FP] Duplicate element
2382 ;; -------------------------------------------------------------------------
2383 ;; Includes:
2384 ;; - DUP
2385 ;; - MOV
2386 ;; - LD1RB
2387 ;; - LD1RD
2388 ;; - LD1RH
2389 ;; - LD1RW
2390 ;; - LD1ROB (F64MM)
2391 ;; - LD1ROD (F64MM)
2392 ;; - LD1ROH (F64MM)
2393 ;; - LD1ROW (F64MM)
2394 ;; - LD1RQB
2395 ;; - LD1RQD
2396 ;; - LD1RQH
2397 ;; - LD1RQW
2398 ;; -------------------------------------------------------------------------
2399
2400 (define_expand "vec_duplicate<mode>"
2401   [(parallel
2402     [(set (match_operand:SVE_ALL 0 "register_operand")
2403           (vec_duplicate:SVE_ALL
2404             (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2405      (clobber (scratch:VNx16BI))])]
2406   "TARGET_SVE"
2407   {
2408     if (MEM_P (operands[1]))
2409       {
2410         rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
2411         emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
2412                                        CONST0_RTX (<MODE>mode)));
2413         DONE;
2414       }
2415   }
2416 )
2417
2418 ;; Accept memory operands for the benefit of combine, and also in case
2419 ;; the scalar input gets spilled to memory during RA.  We want to split
2420 ;; the load at the first opportunity in order to allow the PTRUE to be
2421 ;; optimized with surrounding code.
2422 (define_insn_and_split "*vec_duplicate<mode>_reg"
2423   [(set (match_operand:SVE_ALL 0 "register_operand" "=w, w, w")
2424         (vec_duplicate:SVE_ALL
2425           (match_operand:<VEL> 1 "aarch64_sve_dup_operand" "r, w, Uty")))
2426    (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
2427   "TARGET_SVE"
2428   "@
2429    mov\t%0.<Vetype>, %<vwcore>1
2430    mov\t%0.<Vetype>, %<Vetype>1
2431    #"
2432   "&& MEM_P (operands[1])"
2433   [(const_int 0)]
2434   {
2435     if (GET_CODE (operands[2]) == SCRATCH)
2436       operands[2] = gen_reg_rtx (VNx16BImode);
2437     emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2438     rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2439     emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
2440                                    CONST0_RTX (<MODE>mode)));
2441     DONE;
2442   }
2443   [(set_attr "length" "4,4,8")]
2444 )
2445
2446 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
2447 (define_insn "@aarch64_vec_duplicate_vq<mode>_le"
2448   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2449         (vec_duplicate:SVE_FULL
2450           (match_operand:<V128> 1 "register_operand" "w")))]
2451   "TARGET_SVE && !BYTES_BIG_ENDIAN"
2452   {
2453     operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2454     return "dup\t%0.q, %1.q[0]";
2455   }
2456 )
2457
2458 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
2459 ;; The SVE register layout puts memory lane N into (architectural)
2460 ;; register lane N, whereas the Advanced SIMD layout puts the memory
2461 ;; lsb into the register lsb.  We therefore have to describe this in rtl
2462 ;; terms as a reverse of the V128 vector followed by a duplicate.
2463 (define_insn "@aarch64_vec_duplicate_vq<mode>_be"
2464   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2465         (vec_duplicate:SVE_FULL
2466           (vec_select:<V128>
2467             (match_operand:<V128> 1 "register_operand" "w")
2468             (match_operand 2 "descending_int_parallel"))))]
2469   "TARGET_SVE
2470    && BYTES_BIG_ENDIAN
2471    && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
2472                 GET_MODE_NUNITS (<V128>mode) - 1)"
2473   {
2474     operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2475     return "dup\t%0.q, %1.q[0]";
2476   }
2477 )
2478
2479 ;; This is used for vec_duplicate<mode>s from memory, but can also
2480 ;; be used by combine to optimize selects of a vec_duplicate<mode>
2481 ;; with zero.
2482 (define_insn "sve_ld1r<mode>"
2483   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2484         (unspec:SVE_ALL
2485           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2486            (vec_duplicate:SVE_ALL
2487              (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
2488            (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
2489           UNSPEC_SEL))]
2490   "TARGET_SVE"
2491   "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
2492 )
2493
2494 ;; Load 128 bits from memory under predicate control and duplicate to
2495 ;; fill a vector.
2496 (define_insn "@aarch64_sve_ld1rq<mode>"
2497   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2498         (unspec:SVE_FULL
2499           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2500            (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
2501           UNSPEC_LD1RQ))]
2502   "TARGET_SVE"
2503   {
2504     operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2505     return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
2506   }
2507 )
2508
2509 (define_insn "@aarch64_sve_ld1ro<mode>"
2510   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2511         (unspec:SVE_FULL
2512           [(match_operand:<VPRED> 2 "register_operand" "Upl")
2513            (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
2514                                "UO<Vesize>")]
2515           UNSPEC_LD1RO))]
2516   "TARGET_SVE_F64MM"
2517   {
2518     operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2519     return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
2520   }
2521 )
2522
2523 ;; -------------------------------------------------------------------------
2524 ;; ---- [INT,FP] Initialize from individual elements
2525 ;; -------------------------------------------------------------------------
2526 ;; Includes:
2527 ;; - INSR
2528 ;; -------------------------------------------------------------------------
2529
2530 (define_expand "vec_init<mode><Vel>"
2531   [(match_operand:SVE_FULL 0 "register_operand")
2532     (match_operand 1 "")]
2533   "TARGET_SVE"
2534   {
2535     aarch64_sve_expand_vector_init (operands[0], operands[1]);
2536     DONE;
2537   }
2538 )
2539
2540 ;; Shift an SVE vector left and insert a scalar into element 0.
2541 (define_insn "vec_shl_insert_<mode>"
2542   [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??&w, ?&w")
2543         (unspec:SVE_FULL
2544           [(match_operand:SVE_FULL 1 "register_operand" "0, 0, w, w")
2545            (match_operand:<VEL> 2 "aarch64_reg_or_zero" "rZ, w, rZ, w")]
2546           UNSPEC_INSR))]
2547   "TARGET_SVE"
2548   "@
2549    insr\t%0.<Vetype>, %<vwcore>2
2550    insr\t%0.<Vetype>, %<Vetype>2
2551    movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
2552    movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2"
2553   [(set_attr "movprfx" "*,*,yes,yes")]
2554 )
2555
2556 ;; -------------------------------------------------------------------------
2557 ;; ---- [INT] Linear series
2558 ;; -------------------------------------------------------------------------
2559 ;; Includes:
2560 ;; - INDEX
2561 ;; -------------------------------------------------------------------------
2562
2563 (define_insn "vec_series<mode>"
2564   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w")
2565         (vec_series:SVE_I
2566           (match_operand:<VEL> 1 "aarch64_sve_index_operand" "Usi, r, r")
2567           (match_operand:<VEL> 2 "aarch64_sve_index_operand" "r, Usi, r")))]
2568   "TARGET_SVE"
2569   "@
2570    index\t%0.<Vctype>, #%1, %<vccore>2
2571    index\t%0.<Vctype>, %<vccore>1, #%2
2572    index\t%0.<Vctype>, %<vccore>1, %<vccore>2"
2573 )
2574
2575 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
2576 ;; of an INDEX instruction.
2577 (define_insn "*vec_series<mode>_plus"
2578   [(set (match_operand:SVE_I 0 "register_operand" "=w")
2579         (plus:SVE_I
2580           (vec_duplicate:SVE_I
2581             (match_operand:<VEL> 1 "register_operand" "r"))
2582           (match_operand:SVE_I 2 "immediate_operand")))]
2583   "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
2584   {
2585     operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
2586     return "index\t%0.<Vctype>, %<vccore>1, #%2";
2587   }
2588 )
2589
2590 ;; -------------------------------------------------------------------------
2591 ;; ---- [PRED] Duplicate element
2592 ;; -------------------------------------------------------------------------
2593 ;; The patterns in this section are synthetic.
2594 ;; -------------------------------------------------------------------------
2595
2596 ;; Implement a predicate broadcast by shifting the low bit of the scalar
2597 ;; input into the top bit and using a WHILELO.  An alternative would be to
2598 ;; duplicate the input and do a compare with zero.
2599 (define_expand "vec_duplicate<mode>"
2600   [(set (match_operand:PRED_ALL 0 "register_operand")
2601         (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
2602   "TARGET_SVE"
2603   {
2604     rtx tmp = gen_reg_rtx (DImode);
2605     rtx op1 = gen_lowpart (DImode, operands[1]);
2606     emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
2607     emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
2608     DONE;
2609   }
2610 )
2611
2612 ;; =========================================================================
2613 ;; == Vector decomposition
2614 ;; =========================================================================
2615
2616 ;; -------------------------------------------------------------------------
2617 ;; ---- [INT,FP] Extract index
2618 ;; -------------------------------------------------------------------------
2619 ;; Includes:
2620 ;; - DUP    (Advanced SIMD)
2621 ;; - DUP    (SVE)
2622 ;; - EXT    (SVE)
2623 ;; - ST1    (Advanced SIMD)
2624 ;; - UMOV   (Advanced SIMD)
2625 ;; -------------------------------------------------------------------------
2626
2627 (define_expand "vec_extract<mode><Vel>"
2628   [(set (match_operand:<VEL> 0 "register_operand")
2629         (vec_select:<VEL>
2630           (match_operand:SVE_FULL 1 "register_operand")
2631           (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
2632   "TARGET_SVE"
2633   {
2634     poly_int64 val;
2635     if (poly_int_rtx_p (operands[2], &val)
2636         && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
2637       {
2638         /* The last element can be extracted with a LASTB and a false
2639            predicate.  */
2640         rtx sel = aarch64_pfalse_reg (<VPRED>mode);
2641         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2642         DONE;
2643       }
2644     if (!CONST_INT_P (operands[2]))
2645       {
2646         /* Create an index with operand[2] as the base and -1 as the step.
2647            It will then be zero for the element we care about.  */
2648         rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
2649         index = force_reg (<VEL_INT>mode, index);
2650         rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
2651         emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
2652
2653         /* Get a predicate that is true for only that element.  */
2654         rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
2655         rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
2656         rtx sel = gen_reg_rtx (<VPRED>mode);
2657         emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
2658
2659         /* Select the element using LASTB.  */
2660         emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2661         DONE;
2662       }
2663   }
2664 )
2665
2666 ;; Extract element zero.  This is a special case because we want to force
2667 ;; the registers to be the same for the second alternative, and then
2668 ;; split the instruction into nothing after RA.
2669 (define_insn_and_split "*vec_extract<mode><Vel>_0"
2670   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2671         (vec_select:<VEL>
2672           (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
2673           (parallel [(const_int 0)])))]
2674   "TARGET_SVE"
2675   {
2676     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2677     switch (which_alternative)
2678       {
2679         case 0:
2680           return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
2681         case 1:
2682           return "#";
2683         case 2:
2684           return "st1\\t{%1.<Vetype>}[0], %0";
2685         default:
2686           gcc_unreachable ();
2687       }
2688   }
2689   "&& reload_completed
2690    && REG_P (operands[0])
2691    && REGNO (operands[0]) == REGNO (operands[1])"
2692   [(const_int 0)]
2693   {
2694     emit_note (NOTE_INSN_DELETED);
2695     DONE;
2696   }
2697   [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
2698 )
2699
2700 ;; Extract an element from the Advanced SIMD portion of the register.
2701 ;; We don't just reuse the aarch64-simd.md pattern because we don't
2702 ;; want any change in lane number on big-endian targets.
2703 (define_insn "*vec_extract<mode><Vel>_v128"
2704   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2705         (vec_select:<VEL>
2706           (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
2707           (parallel [(match_operand:SI 2 "const_int_operand")])))]
2708   "TARGET_SVE
2709    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
2710   {
2711     operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
2712     switch (which_alternative)
2713       {
2714         case 0:
2715           return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2716         case 1:
2717           return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2718         case 2:
2719           return "st1\\t{%1.<Vetype>}[%2], %0";
2720         default:
2721           gcc_unreachable ();
2722       }
2723   }
2724   [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
2725 )
2726
2727 ;; Extract an element in the range of DUP.  This pattern allows the
2728 ;; source and destination to be different.
2729 (define_insn "*vec_extract<mode><Vel>_dup"
2730   [(set (match_operand:<VEL> 0 "register_operand" "=w")
2731         (vec_select:<VEL>
2732           (match_operand:SVE_FULL 1 "register_operand" "w")
2733           (parallel [(match_operand:SI 2 "const_int_operand")])))]
2734   "TARGET_SVE
2735    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
2736   {
2737     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2738     return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
2739   }
2740 )
2741
2742 ;; Extract an element outside the range of DUP.  This pattern requires the
2743 ;; source and destination to be the same.
2744 (define_insn "*vec_extract<mode><Vel>_ext"
2745   [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
2746         (vec_select:<VEL>
2747           (match_operand:SVE_FULL 1 "register_operand" "0, w")
2748           (parallel [(match_operand:SI 2 "const_int_operand")])))]
2749   "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
2750   {
2751     operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
2752     operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
2753     return (which_alternative == 0
2754             ? "ext\t%0.b, %0.b, %0.b, #%2"
2755             : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
2756   }
2757   [(set_attr "movprfx" "*,yes")]
2758 )
2759
2760 ;; -------------------------------------------------------------------------
2761 ;; ---- [INT,FP] Extract active element
2762 ;; -------------------------------------------------------------------------
2763 ;; Includes:
2764 ;; - LASTA
2765 ;; - LASTB
2766 ;; -------------------------------------------------------------------------
2767
2768 ;; Extract the last active element of operand 1 into operand 0.
2769 ;; If no elements are active, extract the last inactive element instead.
2770 (define_insn "@extract_<last_op>_<mode>"
2771   [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
2772         (unspec:<VEL>
2773           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2774            (match_operand:SVE_FULL 2 "register_operand" "w, w")]
2775           LAST))]
2776   "TARGET_SVE"
2777   "@
2778    last<ab>\t%<vwcore>0, %1, %2.<Vetype>
2779    last<ab>\t%<Vetype>0, %1, %2.<Vetype>"
2780 )
2781
2782 ;; -------------------------------------------------------------------------
2783 ;; ---- [PRED] Extract index
2784 ;; -------------------------------------------------------------------------
2785 ;; The patterns in this section are synthetic.
2786 ;; -------------------------------------------------------------------------
2787
2788 ;; Handle extractions from a predicate by converting to an integer vector
2789 ;; and extracting from there.
2790 (define_expand "vec_extract<vpred><Vel>"
2791   [(match_operand:<VEL> 0 "register_operand")
2792    (match_operand:<VPRED> 1 "register_operand")
2793    (match_operand:SI 2 "nonmemory_operand")
2794    ;; Dummy operand to which we can attach the iterator.
2795    (reg:SVE_FULL_I V0_REGNUM)]
2796   "TARGET_SVE"
2797   {
2798     rtx tmp = gen_reg_rtx (<MODE>mode);
2799     emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
2800                                              CONST1_RTX (<MODE>mode),
2801                                              CONST0_RTX (<MODE>mode)));
2802     emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
2803     DONE;
2804   }
2805 )
2806
2807 ;; =========================================================================
2808 ;; == Unary arithmetic
2809 ;; =========================================================================
2810
2811 ;; -------------------------------------------------------------------------
2812 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
2813 ;; -------------------------------------------------------------------------
2814 ;; Includes:
2815 ;; - ABS
2816 ;; - CLS (= clrsb)
2817 ;; - CLZ
2818 ;; - CNT (= popcount)
2819 ;; - NEG
2820 ;; - NOT
2821 ;; -------------------------------------------------------------------------
2822
2823 ;; Unpredicated integer unary arithmetic.
2824 (define_expand "<optab><mode>2"
2825   [(set (match_operand:SVE_I 0 "register_operand")
2826         (unspec:SVE_I
2827           [(match_dup 2)
2828            (SVE_INT_UNARY:SVE_I
2829              (match_operand:SVE_I 1 "register_operand"))]
2830           UNSPEC_PRED_X))]
2831   "TARGET_SVE"
2832   {
2833     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2834   }
2835 )
2836
2837 ;; Integer unary arithmetic predicated with a PTRUE.
2838 (define_insn "@aarch64_pred_<optab><mode>"
2839   [(set (match_operand:SVE_I 0 "register_operand" "=w")
2840         (unspec:SVE_I
2841           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2842            (SVE_INT_UNARY:SVE_I
2843              (match_operand:SVE_I 2 "register_operand" "w"))]
2844           UNSPEC_PRED_X))]
2845   "TARGET_SVE"
2846   "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2847 )
2848
2849 ;; Predicated integer unary arithmetic with merging.
2850 (define_expand "@cond_<optab><mode>"
2851   [(set (match_operand:SVE_FULL_I 0 "register_operand")
2852         (unspec:SVE_FULL_I
2853           [(match_operand:<VPRED> 1 "register_operand")
2854            (SVE_INT_UNARY:SVE_FULL_I
2855              (match_operand:SVE_FULL_I 2 "register_operand"))
2856            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
2857           UNSPEC_SEL))]
2858   "TARGET_SVE"
2859 )
2860
2861 ;; Predicated integer unary arithmetic, merging with the first input.
2862 (define_insn "*cond_<optab><mode>_2"
2863   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
2864         (unspec:SVE_FULL_I
2865           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
2866            (SVE_INT_UNARY:SVE_FULL_I
2867              (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
2868            (match_dup 2)]
2869           UNSPEC_SEL))]
2870   "TARGET_SVE"
2871   "@
2872    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
2873    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2874   [(set_attr "movprfx" "*,yes")]
2875 )
2876
2877 ;; Predicated integer unary arithmetic, merging with an independent value.
2878 ;;
2879 ;; The earlyclobber isn't needed for the first alternative, but omitting
2880 ;; it would only help the case in which operands 2 and 3 are the same,
2881 ;; which is handled above rather than here.  Marking all the alternatives
2882 ;; as earlyclobber helps to make the instruction more regular to the
2883 ;; register allocator.
2884 (define_insn "*cond_<optab><mode>_any"
2885   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
2886         (unspec:SVE_FULL_I
2887           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2888            (SVE_INT_UNARY:SVE_FULL_I
2889              (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w"))
2890            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2891           UNSPEC_SEL))]
2892   "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
2893   "@
2894    <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2895    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2896    movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2897   [(set_attr "movprfx" "*,yes,yes")]
2898 )
2899
2900 ;; -------------------------------------------------------------------------
2901 ;; ---- [INT] General unary arithmetic corresponding to unspecs
2902 ;; -------------------------------------------------------------------------
2903 ;; Includes
2904 ;; - RBIT
2905 ;; - REVB
2906 ;; - REVH
2907 ;; - REVW
2908 ;; -------------------------------------------------------------------------
2909
2910 ;; Predicated integer unary operations.
2911 (define_insn "@aarch64_pred_<optab><mode>"
2912   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
2913         (unspec:SVE_FULL_I
2914           [(match_operand:<VPRED> 1 "register_operand" "Upl")
2915            (unspec:SVE_FULL_I
2916              [(match_operand:SVE_FULL_I 2 "register_operand" "w")]
2917              SVE_INT_UNARY)]
2918           UNSPEC_PRED_X))]
2919   "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
2920   "<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2921 )
2922
2923 ;; Predicated integer unary operations with merging.
2924 (define_insn "@cond_<optab><mode>"
2925   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, ?&w")
2926         (unspec:SVE_FULL_I
2927           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
2928            (unspec:SVE_FULL_I
2929              [(match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")]
2930              SVE_INT_UNARY)
2931            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
2932           UNSPEC_SEL))]
2933   "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
2934   "@
2935    <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2936    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
2937    movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
2938   [(set_attr "movprfx" "*,yes,yes")]
2939 )
2940
2941 ;; -------------------------------------------------------------------------
2942 ;; ---- [INT] Sign and zero extension
2943 ;; -------------------------------------------------------------------------
2944 ;; Includes:
2945 ;; - SXTB
2946 ;; - SXTH
2947 ;; - SXTW
2948 ;; - UXTB
2949 ;; - UXTH
2950 ;; - UXTW
2951 ;; -------------------------------------------------------------------------
2952
2953 ;; Unpredicated sign and zero extension from a narrower mode.
2954 (define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
2955   [(set (match_operand:SVE_HSDI 0 "register_operand")
2956         (unspec:SVE_HSDI
2957           [(match_dup 2)
2958            (ANY_EXTEND:SVE_HSDI
2959              (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
2960           UNSPEC_PRED_X))]
2961   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2962   {
2963     operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
2964   }
2965 )
2966
2967 ;; Predicated sign and zero extension from a narrower mode.
2968 (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
2969   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
2970         (unspec:SVE_HSDI
2971           [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand" "Upl")
2972            (ANY_EXTEND:SVE_HSDI
2973              (match_operand:SVE_PARTIAL_I 2 "register_operand" "w"))]
2974           UNSPEC_PRED_X))]
2975   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2976   "<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>"
2977 )
2978
2979 ;; Predicated truncate-and-sign-extend operations.
2980 (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
2981   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
2982         (unspec:SVE_FULL_HSDI
2983           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
2984            (sign_extend:SVE_FULL_HSDI
2985              (truncate:SVE_PARTIAL_I
2986                (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")))]
2987           UNSPEC_PRED_X))]
2988   "TARGET_SVE
2989    && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
2990   "sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
2991 )
2992
2993 ;; Predicated truncate-and-sign-extend operations with merging.
2994 (define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
2995   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w, ?&w, ?&w")
2996         (unspec:SVE_FULL_HSDI
2997           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
2998            (sign_extend:SVE_FULL_HSDI
2999              (truncate:SVE_PARTIAL_I
3000                (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")))
3001            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3002           UNSPEC_SEL))]
3003   "TARGET_SVE
3004    && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3005   "@
3006    sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3007    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3008    movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
3009   [(set_attr "movprfx" "*,yes,yes")]
3010 )
3011
3012 ;; Predicated truncate-and-zero-extend operations, merging with the
3013 ;; first input.
3014 ;;
3015 ;; The canonical form of this operation is an AND of a constant rather
3016 ;; than (zero_extend (truncate ...)).
3017 (define_insn "*cond_uxt<mode>_2"
3018   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3019         (unspec:SVE_FULL_I
3020           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3021            (and:SVE_FULL_I
3022              (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3023              (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate"))
3024            (match_dup 2)]
3025           UNSPEC_SEL))]
3026   "TARGET_SVE"
3027   "@
3028    uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
3029    movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3030   [(set_attr "movprfx" "*,yes")]
3031 )
3032
3033 ;; Predicated truncate-and-zero-extend operations, merging with an
3034 ;; independent value.
3035 ;;
3036 ;; The earlyclobber isn't needed for the first alternative, but omitting
3037 ;; it would only help the case in which operands 2 and 4 are the same,
3038 ;; which is handled above rather than here.  Marking all the alternatives
3039 ;; as early-clobber helps to make the instruction more regular to the
3040 ;; register allocator.
3041 (define_insn "*cond_uxt<mode>_any"
3042   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
3043         (unspec:SVE_FULL_I
3044           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3045            (and:SVE_FULL_I
3046              (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
3047              (match_operand:SVE_FULL_I 3 "aarch64_sve_uxt_immediate"))
3048            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3049           UNSPEC_SEL))]
3050   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
3051   "@
3052    uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3053    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3054    movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>"
3055   [(set_attr "movprfx" "*,yes,yes")]
3056 )
3057
3058 ;; -------------------------------------------------------------------------
3059 ;; ---- [INT] Truncation
3060 ;; -------------------------------------------------------------------------
3061 ;; The patterns in this section are synthetic.
3062 ;; -------------------------------------------------------------------------
3063
3064 ;; Truncate to a partial SVE vector from either a full vector or a
3065 ;; wider partial vector.  This is a no-op, because we can just ignore
3066 ;; the unused upper bits of the source.
3067 (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
3068   [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
3069         (truncate:SVE_PARTIAL_I
3070           (match_operand:SVE_HSDI 1 "register_operand" "w")))]
3071   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3072   "#"
3073   "&& reload_completed"
3074   [(set (match_dup 0) (match_dup 1))]
3075   {
3076     operands[1] = aarch64_replace_reg_mode (operands[1],
3077                                             <SVE_PARTIAL_I:MODE>mode);
3078   }
3079 )
3080
3081 ;; -------------------------------------------------------------------------
3082 ;; ---- [INT] Logical inverse
3083 ;; -------------------------------------------------------------------------
3084 ;; Includes:
3085 ;; - CNOT
3086 ;; -------------------------------------------------------------------------
3087
3088 ;; Predicated logical inverse.
3089 (define_expand "@aarch64_pred_cnot<mode>"
3090   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3091         (unspec:SVE_FULL_I
3092           [(unspec:<VPRED>
3093              [(match_operand:<VPRED> 1 "register_operand")
3094               (match_operand:SI 2 "aarch64_sve_ptrue_flag")
3095               (eq:<VPRED>
3096                 (match_operand:SVE_FULL_I 3 "register_operand")
3097                 (match_dup 4))]
3098              UNSPEC_PRED_Z)
3099            (match_dup 5)
3100            (match_dup 4)]
3101           UNSPEC_SEL))]
3102   "TARGET_SVE"
3103   {
3104     operands[4] = CONST0_RTX (<MODE>mode);
3105     operands[5] = CONST1_RTX (<MODE>mode);
3106   }
3107 )
3108
3109 (define_insn "*cnot<mode>"
3110   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
3111         (unspec:SVE_FULL_I
3112           [(unspec:<VPRED>
3113              [(match_operand:<VPRED> 1 "register_operand" "Upl")
3114               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
3115               (eq:<VPRED>
3116                 (match_operand:SVE_FULL_I 2 "register_operand" "w")
3117                 (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3118              UNSPEC_PRED_Z)
3119            (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3120            (match_dup 3)]
3121           UNSPEC_SEL))]
3122   "TARGET_SVE"
3123   "cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3124 )
3125
3126 ;; Predicated logical inverse with merging.
3127 (define_expand "@cond_cnot<mode>"
3128   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3129         (unspec:SVE_FULL_I
3130           [(match_operand:<VPRED> 1 "register_operand")
3131            (unspec:SVE_FULL_I
3132              [(unspec:<VPRED>
3133                 [(match_dup 4)
3134                  (const_int SVE_KNOWN_PTRUE)
3135                  (eq:<VPRED>
3136                    (match_operand:SVE_FULL_I 2 "register_operand")
3137                    (match_dup 5))]
3138                 UNSPEC_PRED_Z)
3139               (match_dup 6)
3140               (match_dup 5)]
3141              UNSPEC_SEL)
3142            (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3143           UNSPEC_SEL))]
3144   "TARGET_SVE"
3145   {
3146     operands[4] = CONSTM1_RTX (<VPRED>mode);
3147     operands[5] = CONST0_RTX (<MODE>mode);
3148     operands[6] = CONST1_RTX (<MODE>mode);
3149   }
3150 )
3151
3152 ;; Predicated logical inverse, merging with the first input.
3153 (define_insn_and_rewrite "*cond_cnot<mode>_2"
3154   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3155         (unspec:SVE_FULL_I
3156           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3157            ;; Logical inverse of operand 2 (as above).
3158            (unspec:SVE_FULL_I
3159              [(unspec:<VPRED>
3160                 [(match_operand 5)
3161                  (const_int SVE_KNOWN_PTRUE)
3162                  (eq:<VPRED>
3163                    (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3164                    (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3165                 UNSPEC_PRED_Z)
3166               (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3167               (match_dup 3)]
3168              UNSPEC_SEL)
3169            (match_dup 2)]
3170           UNSPEC_SEL))]
3171   "TARGET_SVE"
3172   "@
3173    cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
3174    movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3175   "&& !CONSTANT_P (operands[5])"
3176   {
3177     operands[5] = CONSTM1_RTX (<VPRED>mode);
3178   }
3179   [(set_attr "movprfx" "*,yes")]
3180 )
3181
3182 ;; Predicated logical inverse, merging with an independent value.
3183 ;;
3184 ;; The earlyclobber isn't needed for the first alternative, but omitting
3185 ;; it would only help the case in which operands 2 and 6 are the same,
3186 ;; which is handled above rather than here.  Marking all the alternatives
3187 ;; as earlyclobber helps to make the instruction more regular to the
3188 ;; register allocator.
3189 (define_insn_and_rewrite "*cond_cnot<mode>_any"
3190   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, ?&w, ?&w")
3191         (unspec:SVE_FULL_I
3192           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3193            ;; Logical inverse of operand 2 (as above).
3194            (unspec:SVE_FULL_I
3195              [(unspec:<VPRED>
3196                 [(match_operand 5)
3197                  (const_int SVE_KNOWN_PTRUE)
3198                  (eq:<VPRED>
3199                    (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
3200                    (match_operand:SVE_FULL_I 3 "aarch64_simd_imm_zero"))]
3201                 UNSPEC_PRED_Z)
3202               (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_one")
3203               (match_dup 3)]
3204              UNSPEC_SEL)
3205            (match_operand:SVE_FULL_I 6 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3206           UNSPEC_SEL))]
3207   "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
3208   "@
3209    cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3210    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3211    movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>"
3212   "&& !CONSTANT_P (operands[5])"
3213   {
3214     operands[5] = CONSTM1_RTX (<VPRED>mode);
3215   }
3216   [(set_attr "movprfx" "*,yes,yes")]
3217 )
3218
3219 ;; -------------------------------------------------------------------------
3220 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
3221 ;; -------------------------------------------------------------------------
3222 ;; Includes:
3223 ;; - FEXPA
3224 ;; -------------------------------------------------------------------------
3225
3226 ;; Unpredicated unary operations that take an integer and return a float.
3227 (define_insn "@aarch64_sve_<optab><mode>"
3228   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3229         (unspec:SVE_FULL_F
3230           [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
3231           SVE_FP_UNARY_INT))]
3232   "TARGET_SVE"
3233   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3234 )
3235
3236 ;; -------------------------------------------------------------------------
3237 ;; ---- [FP] General unary arithmetic corresponding to unspecs
3238 ;; -------------------------------------------------------------------------
3239 ;; Includes:
3240 ;; - FABS
3241 ;; - FNEG
3242 ;; - FRECPE
3243 ;; - FRECPX
3244 ;; - FRINTA
3245 ;; - FRINTI
3246 ;; - FRINTM
3247 ;; - FRINTN
3248 ;; - FRINTP
3249 ;; - FRINTX
3250 ;; - FRINTZ
3251 ;; - FRSQRTE
3252 ;; - FSQRT
3253 ;; -------------------------------------------------------------------------
3254
3255 ;; Unpredicated floating-point unary operations.
3256 (define_insn "@aarch64_sve_<optab><mode>"
3257   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3258         (unspec:SVE_FULL_F
3259           [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
3260           SVE_FP_UNARY))]
3261   "TARGET_SVE"
3262   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3263 )
3264
3265 ;; Unpredicated floating-point unary operations.
3266 (define_expand "<optab><mode>2"
3267   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3268         (unspec:SVE_FULL_F
3269           [(match_dup 2)
3270            (const_int SVE_RELAXED_GP)
3271            (match_operand:SVE_FULL_F 1 "register_operand")]
3272           SVE_COND_FP_UNARY_OPTAB))]
3273   "TARGET_SVE"
3274   {
3275     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3276   }
3277 )
3278
3279 ;; Predicated floating-point unary operations.
3280 (define_insn "@aarch64_pred_<optab><mode>"
3281   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3282         (unspec:SVE_FULL_F
3283           [(match_operand:<VPRED> 1 "register_operand" "Upl")
3284            (match_operand:SI 3 "aarch64_sve_gp_strictness")
3285            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
3286           SVE_COND_FP_UNARY))]
3287   "TARGET_SVE"
3288   "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3289 )
3290
3291 ;; Predicated floating-point unary arithmetic with merging.
3292 (define_expand "@cond_<optab><mode>"
3293   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3294         (unspec:SVE_FULL_F
3295           [(match_operand:<VPRED> 1 "register_operand")
3296            (unspec:SVE_FULL_F
3297              [(match_dup 1)
3298               (const_int SVE_STRICT_GP)
3299               (match_operand:SVE_FULL_F 2 "register_operand")]
3300              SVE_COND_FP_UNARY)
3301            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3302           UNSPEC_SEL))]
3303   "TARGET_SVE"
3304 )
3305
3306 ;; Predicated floating-point unary arithmetic, merging with the first input.
3307 (define_insn_and_rewrite "*cond_<optab><mode>_2"
3308   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
3309         (unspec:SVE_FULL_F
3310           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3311            (unspec:SVE_FULL_F
3312              [(match_operand 3)
3313               (match_operand:SI 4 "aarch64_sve_gp_strictness")
3314               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")]
3315              SVE_COND_FP_UNARY)
3316            (match_dup 2)]
3317           UNSPEC_SEL))]
3318   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[3], operands[1])"
3319   "@
3320    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3321    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3322   "&& !rtx_equal_p (operands[1], operands[3])"
3323   {
3324     operands[3] = copy_rtx (operands[1]);
3325   }
3326   [(set_attr "movprfx" "*,yes")]
3327 )
3328
3329 ;; Predicated floating-point unary arithmetic, merging with an independent
3330 ;; value.
3331 ;;
3332 ;; The earlyclobber isn't needed for the first alternative, but omitting
3333 ;; it would only help the case in which operands 2 and 3 are the same,
3334 ;; which is handled above rather than here.  Marking all the alternatives
3335 ;; as earlyclobber helps to make the instruction more regular to the
3336 ;; register allocator.
3337 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3338   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, ?&w, ?&w")
3339         (unspec:SVE_FULL_F
3340           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
3341            (unspec:SVE_FULL_F
3342              [(match_operand 4)
3343               (match_operand:SI 5 "aarch64_sve_gp_strictness")
3344               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
3345              SVE_COND_FP_UNARY)
3346            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
3347           UNSPEC_SEL))]
3348   "TARGET_SVE
3349    && !rtx_equal_p (operands[2], operands[3])
3350    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
3351   "@
3352    <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3353    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3354    movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>"
3355   "&& !rtx_equal_p (operands[1], operands[4])"
3356   {
3357     operands[4] = copy_rtx (operands[1]);
3358   }
3359   [(set_attr "movprfx" "*,yes,yes")]
3360 )
3361
3362 ;; -------------------------------------------------------------------------
3363 ;; ---- [FP] Square root
3364 ;; -------------------------------------------------------------------------
3365
3366 (define_expand "sqrt<mode>2"
3367   [(set (match_operand:SVE_FULL_F 0 "register_operand")
3368         (unspec:SVE_FULL_F
3369           [(match_dup 2)
3370            (const_int SVE_RELAXED_GP)
3371            (match_operand:SVE_FULL_F 1 "register_operand")]
3372           UNSPEC_COND_FSQRT))]
3373   "TARGET_SVE"
3374 {
3375   if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
3376     DONE;
3377   operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3378 })
3379
3380 ;; -------------------------------------------------------------------------
3381 ;; ---- [FP] Reciprocal square root
3382 ;; -------------------------------------------------------------------------
3383
3384 (define_expand "rsqrt<mode>2"
3385   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3386         (unspec:SVE_FULL_SDF
3387           [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3388           UNSPEC_RSQRT))]
3389   "TARGET_SVE"
3390 {
3391   aarch64_emit_approx_sqrt (operands[0], operands[1], true);
3392   DONE;
3393 })
3394
3395 (define_expand "@aarch64_rsqrte<mode>"
3396   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3397         (unspec:SVE_FULL_SDF
3398           [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3399           UNSPEC_RSQRTE))]
3400   "TARGET_SVE"
3401 )
3402
3403 (define_expand "@aarch64_rsqrts<mode>"
3404   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3405         (unspec:SVE_FULL_SDF
3406           [(match_operand:SVE_FULL_SDF 1 "register_operand")
3407            (match_operand:SVE_FULL_SDF 2 "register_operand")]
3408           UNSPEC_RSQRTS))]
3409   "TARGET_SVE"
3410 )
3411
3412 ;; -------------------------------------------------------------------------
3413 ;; ---- [PRED] Inverse
3414 ;; -------------------------------------------------------------------------
3415 ;; Includes:
3416 ;; - NOT
3417 ;; -------------------------------------------------------------------------
3418
3419 ;; Unpredicated predicate inverse.
3420 (define_expand "one_cmpl<mode>2"
3421   [(set (match_operand:PRED_ALL 0 "register_operand")
3422         (and:PRED_ALL
3423           (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
3424           (match_dup 2)))]
3425   "TARGET_SVE"
3426   {
3427     operands[2] = aarch64_ptrue_reg (<MODE>mode);
3428   }
3429 )
3430
3431 ;; Predicated predicate inverse.
3432 (define_insn "*one_cmpl<mode>3"
3433   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3434         (and:PRED_ALL
3435           (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3436           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3437   "TARGET_SVE"
3438   "not\t%0.b, %1/z, %2.b"
3439 )
3440
3441 ;; =========================================================================
3442 ;; == Binary arithmetic
3443 ;; =========================================================================
3444
3445 ;; -------------------------------------------------------------------------
3446 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
3447 ;; -------------------------------------------------------------------------
3448 ;; Includes:
3449 ;; - ADD    (merging form only)
3450 ;; - AND    (merging form only)
3451 ;; - ASR    (merging form only)
3452 ;; - EOR    (merging form only)
3453 ;; - LSL    (merging form only)
3454 ;; - LSR    (merging form only)
3455 ;; - MUL
3456 ;; - ORR    (merging form only)
3457 ;; - SMAX
3458 ;; - SMIN
3459 ;; - SQADD  (SVE2 merging form only)
3460 ;; - SQSUB  (SVE2 merging form only)
3461 ;; - SUB    (merging form only)
3462 ;; - UMAX
3463 ;; - UMIN
3464 ;; - UQADD  (SVE2 merging form only)
3465 ;; - UQSUB  (SVE2 merging form only)
3466 ;; -------------------------------------------------------------------------
3467
3468 ;; Unpredicated integer binary operations that have an immediate form.
3469 (define_expand "<optab><mode>3"
3470   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3471         (unspec:SVE_FULL_I
3472           [(match_dup 3)
3473            (SVE_INT_BINARY_IMM:SVE_FULL_I
3474              (match_operand:SVE_FULL_I 1 "register_operand")
3475              (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
3476           UNSPEC_PRED_X))]
3477   "TARGET_SVE"
3478   {
3479     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3480   }
3481 )
3482
3483 ;; Integer binary operations that have an immediate form, predicated
3484 ;; with a PTRUE.  We don't actually need the predicate for the first
3485 ;; and third alternatives, but using Upa or X isn't likely to gain much
3486 ;; and would make the instruction seem less uniform to the register
3487 ;; allocator.
3488 (define_insn_and_split "@aarch64_pred_<optab><mode>"
3489   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w")
3490         (unspec:SVE_FULL_I
3491           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
3492            (SVE_INT_BINARY_IMM:SVE_FULL_I
3493              (match_operand:SVE_FULL_I 2 "register_operand" "%0, 0, w, w")
3494              (match_operand:SVE_FULL_I 3 "aarch64_sve_<sve_imm_con>_operand" "<sve_imm_con>, w, <sve_imm_con>, w"))]
3495           UNSPEC_PRED_X))]
3496   "TARGET_SVE"
3497   "@
3498    #
3499    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3500    #
3501    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3502   ; Split the unpredicated form after reload, so that we don't have
3503   ; the unnecessary PTRUE.
3504   "&& reload_completed
3505    && !register_operand (operands[3], <MODE>mode)"
3506   [(set (match_dup 0)
3507         (SVE_INT_BINARY_IMM:SVE_FULL_I (match_dup 2) (match_dup 3)))]
3508   ""
3509   [(set_attr "movprfx" "*,*,yes,yes")]
3510 )
3511
3512 ;; Unpredicated binary operations with a constant (post-RA only).
3513 ;; These are generated by splitting a predicated instruction whose
3514 ;; predicate is unused.
3515 (define_insn "*post_ra_<optab><mode>3"
3516   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3517         (SVE_INT_BINARY_IMM:SVE_FULL_I
3518           (match_operand:SVE_FULL_I 1 "register_operand" "0, w")
3519           (match_operand:SVE_FULL_I 2 "aarch64_sve_<sve_imm_con>_immediate")))]
3520   "TARGET_SVE && reload_completed"
3521   "@
3522    <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2
3523    movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, #%<sve_imm_prefix>2"
3524   [(set_attr "movprfx" "*,yes")]
3525 )
3526
3527 ;; Predicated integer operations with merging.
3528 (define_expand "@cond_<optab><mode>"
3529   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3530         (unspec:SVE_FULL_I
3531           [(match_operand:<VPRED> 1 "register_operand")
3532            (SVE_INT_BINARY:SVE_FULL_I
3533              (match_operand:SVE_FULL_I 2 "register_operand")
3534              (match_operand:SVE_FULL_I 3 "<sve_pred_int_rhs2_operand>"))
3535            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3536           UNSPEC_SEL))]
3537   "TARGET_SVE"
3538 )
3539
3540 ;; Predicated integer operations, merging with the first input.
3541 (define_insn "*cond_<optab><mode>_2"
3542   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3543         (unspec:SVE_FULL_I
3544           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3545            (SVE_INT_BINARY:SVE_FULL_I
3546              (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3547              (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
3548            (match_dup 2)]
3549           UNSPEC_SEL))]
3550   "TARGET_SVE"
3551   "@
3552    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3553    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3554   [(set_attr "movprfx" "*,yes")]
3555 )
3556
3557 ;; Predicated integer operations, merging with the second input.
3558 (define_insn "*cond_<optab><mode>_3"
3559   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3560         (unspec:SVE_FULL_I
3561           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3562            (SVE_INT_BINARY:SVE_FULL_I
3563              (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
3564              (match_operand:SVE_FULL_I 3 "register_operand" "0, w"))
3565            (match_dup 3)]
3566           UNSPEC_SEL))]
3567   "TARGET_SVE"
3568   "@
3569    <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3570    movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
3571   [(set_attr "movprfx" "*,yes")]
3572 )
3573
3574 ;; Predicated integer operations, merging with an independent value.
3575 (define_insn_and_rewrite "*cond_<optab><mode>_any"
3576   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3577         (unspec:SVE_FULL_I
3578           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3579            (SVE_INT_BINARY:SVE_FULL_I
3580              (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
3581              (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))
3582            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3583           UNSPEC_SEL))]
3584   "TARGET_SVE
3585    && !rtx_equal_p (operands[2], operands[4])
3586    && !rtx_equal_p (operands[3], operands[4])"
3587   "@
3588    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3589    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3590    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3591    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3592    #"
3593   "&& reload_completed
3594    && register_operand (operands[4], <MODE>mode)
3595    && !rtx_equal_p (operands[0], operands[4])"
3596   {
3597     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3598                                              operands[4], operands[1]));
3599     operands[4] = operands[2] = operands[0];
3600   }
3601   [(set_attr "movprfx" "yes")]
3602 )
3603
3604 ;; -------------------------------------------------------------------------
3605 ;; ---- [INT] Addition
3606 ;; -------------------------------------------------------------------------
3607 ;; Includes:
3608 ;; - ADD
3609 ;; - DECB
3610 ;; - DECD
3611 ;; - DECH
3612 ;; - DECW
3613 ;; - INCB
3614 ;; - INCD
3615 ;; - INCH
3616 ;; - INCW
3617 ;; - SUB
3618 ;; -------------------------------------------------------------------------
3619
3620 (define_insn "add<mode>3"
3621   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, w, ?w, ?w, w")
3622         (plus:SVE_I
3623           (match_operand:SVE_I 1 "register_operand" "%0, 0, 0, w, w, w")
3624           (match_operand:SVE_I 2 "aarch64_sve_add_operand" "vsa, vsn, vsi, vsa, vsn, w")))]
3625   "TARGET_SVE"
3626   "@
3627    add\t%0.<Vetype>, %0.<Vetype>, #%D2
3628    sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3629    * return aarch64_output_sve_vector_inc_dec (\"%0.<Vetype>\", operands[2]);
3630    movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
3631    movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
3632    add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3633   [(set_attr "movprfx" "*,*,*,yes,yes,*")]
3634 )
3635
3636 ;; Merging forms are handled through SVE_INT_BINARY.
3637
3638 ;; -------------------------------------------------------------------------
3639 ;; ---- [INT] Subtraction
3640 ;; -------------------------------------------------------------------------
3641 ;; Includes:
3642 ;; - SUB
3643 ;; - SUBR
3644 ;; -------------------------------------------------------------------------
3645
3646 (define_insn "sub<mode>3"
3647   [(set (match_operand:SVE_I 0 "register_operand" "=w, w, ?&w")
3648         (minus:SVE_I
3649           (match_operand:SVE_I 1 "aarch64_sve_arith_operand" "w, vsa, vsa")
3650           (match_operand:SVE_I 2 "register_operand" "w, 0, w")))]
3651   "TARGET_SVE"
3652   "@
3653    sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
3654    subr\t%0.<Vetype>, %0.<Vetype>, #%D1
3655    movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1"
3656   [(set_attr "movprfx" "*,*,yes")]
3657 )
3658
3659 ;; Merging forms are handled through SVE_INT_BINARY.
3660
3661 ;; -------------------------------------------------------------------------
3662 ;; ---- [INT] Take address
3663 ;; -------------------------------------------------------------------------
3664 ;; Includes:
3665 ;; - ADR
3666 ;; -------------------------------------------------------------------------
3667
3668 ;; An unshifted and unscaled ADR.  This is functionally equivalent to an ADD,
3669 ;; but the svadrb intrinsics should preserve the user's choice.
3670 (define_insn "@aarch64_adr<mode>"
3671   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3672         (unspec:SVE_FULL_SDI
3673           [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
3674            (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
3675           UNSPEC_ADR))]
3676   "TARGET_SVE"
3677   "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
3678 )
3679
3680 ;; Same, but with the offset being sign-extended from the low 32 bits.
3681 (define_insn_and_rewrite "*aarch64_adr_sxtw"
3682   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3683         (unspec:VNx2DI
3684           [(match_operand:VNx2DI 1 "register_operand" "w")
3685            (unspec:VNx2DI
3686              [(match_operand 3)
3687               (sign_extend:VNx2DI
3688                 (truncate:VNx2SI
3689                   (match_operand:VNx2DI 2 "register_operand" "w")))]
3690              UNSPEC_PRED_X)]
3691           UNSPEC_ADR))]
3692   "TARGET_SVE"
3693   "adr\t%0.d, [%1.d, %2.d, sxtw]"
3694   "&& !CONSTANT_P (operands[3])"
3695   {
3696     operands[3] = CONSTM1_RTX (VNx2BImode);
3697   }
3698 )
3699
3700 ;; Same, but with the offset being zero-extended from the low 32 bits.
3701 (define_insn "*aarch64_adr_uxtw_unspec"
3702   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3703         (unspec:VNx2DI
3704           [(match_operand:VNx2DI 1 "register_operand" "w")
3705            (and:VNx2DI
3706              (match_operand:VNx2DI 2 "register_operand" "w")
3707              (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
3708           UNSPEC_ADR))]
3709   "TARGET_SVE"
3710   "adr\t%0.d, [%1.d, %2.d, uxtw]"
3711 )
3712
3713 ;; Same, matching as a PLUS rather than unspec.
3714 (define_insn "*aarch64_adr_uxtw_and"
3715   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3716         (plus:VNx2DI
3717           (and:VNx2DI
3718             (match_operand:VNx2DI 2 "register_operand" "w")
3719             (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
3720           (match_operand:VNx2DI 1 "register_operand" "w")))]
3721   "TARGET_SVE"
3722   "adr\t%0.d, [%1.d, %2.d, uxtw]"
3723 )
3724
3725 ;; ADR with a nonzero shift.
3726 (define_expand "@aarch64_adr<mode>_shift"
3727   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
3728         (plus:SVE_FULL_SDI
3729           (unspec:SVE_FULL_SDI
3730             [(match_dup 4)
3731              (ashift:SVE_FULL_SDI
3732                (match_operand:SVE_FULL_SDI 2 "register_operand")
3733                (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3734             UNSPEC_PRED_X)
3735           (match_operand:SVE_FULL_SDI 1 "register_operand")))]
3736   "TARGET_SVE"
3737   {
3738     operands[4] = CONSTM1_RTX (<VPRED>mode);
3739   }
3740 )
3741
3742 (define_insn_and_rewrite "*aarch64_adr<mode>_shift"
3743   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3744         (plus:SVE_FULL_SDI
3745           (unspec:SVE_FULL_SDI
3746             [(match_operand 4)
3747              (ashift:SVE_FULL_SDI
3748                (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
3749                (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
3750             UNSPEC_PRED_X)
3751           (match_operand:SVE_FULL_SDI 1 "register_operand" "w")))]
3752   "TARGET_SVE"
3753   "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>, lsl %3]"
3754   "&& !CONSTANT_P (operands[4])"
3755   {
3756     operands[4] = CONSTM1_RTX (<VPRED>mode);
3757   }
3758 )
3759
3760 ;; Same, but with the index being sign-extended from the low 32 bits.
3761 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
3762   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3763         (plus:VNx2DI
3764           (unspec:VNx2DI
3765             [(match_operand 4)
3766              (ashift:VNx2DI
3767                (unspec:VNx2DI
3768                  [(match_operand 5)
3769                   (sign_extend:VNx2DI
3770                     (truncate:VNx2SI
3771                       (match_operand:VNx2DI 2 "register_operand" "w")))]
3772                  UNSPEC_PRED_X)
3773                (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3774             UNSPEC_PRED_X)
3775           (match_operand:VNx2DI 1 "register_operand" "w")))]
3776   "TARGET_SVE"
3777   "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
3778   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3779   {
3780     operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
3781   }
3782 )
3783
3784 ;; Same, but with the index being zero-extended from the low 32 bits.
3785 (define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
3786   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
3787         (plus:VNx2DI
3788           (unspec:VNx2DI
3789             [(match_operand 5)
3790              (ashift:VNx2DI
3791                (and:VNx2DI
3792                  (match_operand:VNx2DI 2 "register_operand" "w")
3793                  (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
3794                (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
3795             UNSPEC_PRED_X)
3796           (match_operand:VNx2DI 1 "register_operand" "w")))]
3797   "TARGET_SVE"
3798   "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
3799   "&& !CONSTANT_P (operands[5])"
3800   {
3801     operands[5] = CONSTM1_RTX (VNx2BImode);
3802   }
3803 )
3804
3805 ;; -------------------------------------------------------------------------
3806 ;; ---- [INT] Absolute difference
3807 ;; -------------------------------------------------------------------------
3808 ;; Includes:
3809 ;; - SABD
3810 ;; - UABD
3811 ;; -------------------------------------------------------------------------
3812
3813 ;; Unpredicated integer absolute difference.
3814 (define_expand "<su>abd<mode>_3"
3815   [(use (match_operand:SVE_FULL_I 0 "register_operand"))
3816    (USMAX:SVE_FULL_I
3817      (match_operand:SVE_FULL_I 1 "register_operand")
3818      (match_operand:SVE_FULL_I 2 "register_operand"))]
3819   "TARGET_SVE"
3820   {
3821     rtx pred = aarch64_ptrue_reg (<VPRED>mode);
3822     emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
3823                                                operands[2]));
3824     DONE;
3825   }
3826 )
3827
3828 ;; Predicated integer absolute difference.
3829 (define_insn "@aarch64_pred_<su>abd<mode>"
3830   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3831         (unspec:SVE_FULL_I
3832           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3833            (minus:SVE_FULL_I
3834              (USMAX:SVE_FULL_I
3835                (match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
3836                (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
3837              (<max_opp>:SVE_FULL_I
3838                (match_dup 2)
3839                (match_dup 3)))]
3840           UNSPEC_PRED_X))]
3841   "TARGET_SVE"
3842   "@
3843    <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3844    movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3845   [(set_attr "movprfx" "*,yes")]
3846 )
3847
3848 (define_expand "@aarch64_cond_<su>abd<mode>"
3849   [(set (match_operand:SVE_FULL_I 0 "register_operand")
3850         (unspec:SVE_FULL_I
3851           [(match_operand:<VPRED> 1 "register_operand")
3852            (minus:SVE_FULL_I
3853              (unspec:SVE_FULL_I
3854                [(match_dup 1)
3855                 (USMAX:SVE_FULL_I
3856                   (match_operand:SVE_FULL_I 2 "register_operand")
3857                   (match_operand:SVE_FULL_I 3 "register_operand"))]
3858                UNSPEC_PRED_X)
3859              (unspec:SVE_FULL_I
3860                [(match_dup 1)
3861                 (<max_opp>:SVE_FULL_I
3862                   (match_dup 2)
3863                   (match_dup 3))]
3864                UNSPEC_PRED_X))
3865            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
3866           UNSPEC_SEL))]
3867   "TARGET_SVE"
3868 {
3869   if (rtx_equal_p (operands[3], operands[4]))
3870     std::swap (operands[2], operands[3]);
3871 })
3872
3873 ;; Predicated integer absolute difference, merging with the first input.
3874 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
3875   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
3876         (unspec:SVE_FULL_I
3877           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
3878            (minus:SVE_FULL_I
3879              (unspec:SVE_FULL_I
3880                [(match_operand 4)
3881                 (USMAX:SVE_FULL_I
3882                   (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
3883                   (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
3884                UNSPEC_PRED_X)
3885              (unspec:SVE_FULL_I
3886                [(match_operand 5)
3887                 (<max_opp>:SVE_FULL_I
3888                   (match_dup 2)
3889                   (match_dup 3))]
3890                UNSPEC_PRED_X))
3891            (match_dup 2)]
3892           UNSPEC_SEL))]
3893   "TARGET_SVE"
3894   "@
3895    <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3896    movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
3897   "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
3898   {
3899     operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
3900   }
3901   [(set_attr "movprfx" "*,yes")]
3902 )
3903
3904 ;; Predicated integer absolute difference, merging with an independent value.
3905 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
3906   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, ?&w")
3907         (unspec:SVE_FULL_I
3908           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
3909            (minus:SVE_FULL_I
3910              (unspec:SVE_FULL_I
3911                [(match_operand 5)
3912                 (USMAX:SVE_FULL_I
3913                   (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w, w")
3914                   (match_operand:SVE_FULL_I 3 "register_operand" "w, 0, w, w, w"))]
3915                UNSPEC_PRED_X)
3916              (unspec:SVE_FULL_I
3917                [(match_operand 6)
3918                 (<max_opp>:SVE_FULL_I
3919                   (match_dup 2)
3920                   (match_dup 3))]
3921                UNSPEC_PRED_X))
3922            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
3923           UNSPEC_SEL))]
3924   "TARGET_SVE
3925    && !rtx_equal_p (operands[2], operands[4])
3926    && !rtx_equal_p (operands[3], operands[4])"
3927   "@
3928    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3929    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
3930    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3931    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
3932    #"
3933   "&& 1"
3934   {
3935     if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
3936       operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
3937     else if (reload_completed
3938              && register_operand (operands[4], <MODE>mode)
3939              && !rtx_equal_p (operands[0], operands[4]))
3940       {
3941         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
3942                                                  operands[4], operands[1]));
3943         operands[4] = operands[2] = operands[0];
3944       }
3945     else
3946       FAIL;
3947   }
3948   [(set_attr "movprfx" "yes")]
3949 )
3950
3951 ;; -------------------------------------------------------------------------
3952 ;; ---- [INT] Saturating addition and subtraction
3953 ;; -------------------------------------------------------------------------
3954 ;; - SQADD
3955 ;; - SQSUB
3956 ;; - UQADD
3957 ;; - UQSUB
3958 ;; -------------------------------------------------------------------------
3959
3960 ;; Unpredicated saturating signed addition and subtraction.
3961 (define_insn "@aarch64_sve_<optab><mode>"
3962   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w, ?&w, w")
3963         (SBINQOPS:SVE_FULL_I
3964           (match_operand:SVE_FULL_I 1 "register_operand" "0, 0, w, w, w")
3965           (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand" "vsQ, vsS, vsQ, vsS, w")))]
3966   "TARGET_SVE"
3967   "@
3968    <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3969    <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
3970    movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3971    movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
3972    <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3973   [(set_attr "movprfx" "*,*,yes,yes,*")]
3974 )
3975
3976 ;; Unpredicated saturating unsigned addition and subtraction.
3977 (define_insn "@aarch64_sve_<optab><mode>"
3978   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w, w")
3979         (UBINQOPS:SVE_FULL_I
3980           (match_operand:SVE_FULL_I 1 "register_operand" "0, w, w")
3981           (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand" "vsa, vsa, w")))]
3982   "TARGET_SVE"
3983   "@
3984    <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3985    movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
3986    <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3987   [(set_attr "movprfx" "*,yes,*")]
3988 )
3989
3990 ;; -------------------------------------------------------------------------
3991 ;; ---- [INT] Highpart multiplication
3992 ;; -------------------------------------------------------------------------
3993 ;; Includes:
3994 ;; - SMULH
3995 ;; - UMULH
3996 ;; -------------------------------------------------------------------------
3997
3998 ;; Unpredicated highpart multiplication.
3999 (define_expand "<su>mul<mode>3_highpart"
4000   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4001         (unspec:SVE_FULL_I
4002           [(match_dup 3)
4003            (unspec:SVE_FULL_I
4004              [(match_operand:SVE_FULL_I 1 "register_operand")
4005               (match_operand:SVE_FULL_I 2 "register_operand")]
4006              MUL_HIGHPART)]
4007           UNSPEC_PRED_X))]
4008   "TARGET_SVE"
4009   {
4010     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4011   }
4012 )
4013
4014 ;; Predicated highpart multiplication.
4015 (define_insn "@aarch64_pred_<optab><mode>"
4016   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4017         (unspec:SVE_FULL_I
4018           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4019            (unspec:SVE_FULL_I
4020              [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
4021               (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4022              MUL_HIGHPART)]
4023           UNSPEC_PRED_X))]
4024   "TARGET_SVE"
4025   "@
4026    <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4027    movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4028   [(set_attr "movprfx" "*,yes")]
4029 )
4030
4031 ;; Predicated highpart multiplications with merging.
4032 (define_expand "@cond_<optab><mode>"
4033   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4034         (unspec:SVE_FULL_I
4035           [(match_operand:<VPRED> 1 "register_operand")
4036            (unspec:SVE_FULL_I
4037              [(match_operand:SVE_FULL_I 2 "register_operand")
4038               (match_operand:SVE_FULL_I 3 "register_operand")]
4039              MUL_HIGHPART)
4040            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4041           UNSPEC_SEL))]
4042   "TARGET_SVE"
4043 {
4044   /* Only target code is aware of these operations, so we don't need
4045      to handle the fully-general case.  */
4046   gcc_assert (rtx_equal_p (operands[2], operands[4])
4047               || CONSTANT_P (operands[4]));
4048 })
4049
4050 ;; Predicated highpart multiplications, merging with the first input.
4051 (define_insn "*cond_<optab><mode>_2"
4052   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4053         (unspec:SVE_FULL_I
4054           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4055            (unspec:SVE_FULL_I
4056              [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4057               (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4058              MUL_HIGHPART)
4059            (match_dup 2)]
4060           UNSPEC_SEL))]
4061   "TARGET_SVE"
4062   "@
4063    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4064    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4065   [(set_attr "movprfx" "*,yes")])
4066
4067 ;; Predicated highpart multiplications, merging with zero.
4068 (define_insn "*cond_<optab><mode>_z"
4069   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w")
4070         (unspec:SVE_FULL_I
4071           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4072            (unspec:SVE_FULL_I
4073              [(match_operand:SVE_FULL_I 2 "register_operand" "%0, w")
4074               (match_operand:SVE_FULL_I 3 "register_operand" "w, w")]
4075              MUL_HIGHPART)
4076            (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4077           UNSPEC_SEL))]
4078   "TARGET_SVE"
4079   "@
4080    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4081    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4082   [(set_attr "movprfx" "yes")])
4083
4084 ;; -------------------------------------------------------------------------
4085 ;; ---- [INT] Division
4086 ;; -------------------------------------------------------------------------
4087 ;; Includes:
4088 ;; - SDIV
4089 ;; - SDIVR
4090 ;; - UDIV
4091 ;; - UDIVR
4092 ;; -------------------------------------------------------------------------
4093
4094 ;; Unpredicated integer division.
4095 (define_expand "<optab><mode>3"
4096   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4097         (unspec:SVE_FULL_SDI
4098           [(match_dup 3)
4099            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4100              (match_operand:SVE_FULL_SDI 1 "register_operand")
4101              (match_operand:SVE_FULL_SDI 2 "register_operand"))]
4102           UNSPEC_PRED_X))]
4103   "TARGET_SVE"
4104   {
4105     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4106   }
4107 )
4108
4109 ;; Integer division predicated with a PTRUE.
4110 (define_insn "@aarch64_pred_<optab><mode>"
4111   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, w, ?&w")
4112         (unspec:SVE_FULL_SDI
4113           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4114            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4115              (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w")
4116              (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w"))]
4117           UNSPEC_PRED_X))]
4118   "TARGET_SVE"
4119   "@
4120    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4121    <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4122    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4123   [(set_attr "movprfx" "*,*,yes")]
4124 )
4125
4126 ;; Predicated integer division with merging.
4127 (define_expand "@cond_<optab><mode>"
4128   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4129         (unspec:SVE_FULL_SDI
4130           [(match_operand:<VPRED> 1 "register_operand")
4131            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4132              (match_operand:SVE_FULL_SDI 2 "register_operand")
4133              (match_operand:SVE_FULL_SDI 3 "register_operand"))
4134            (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4135           UNSPEC_SEL))]
4136   "TARGET_SVE"
4137 )
4138
4139 ;; Predicated integer division, merging with the first input.
4140 (define_insn "*cond_<optab><mode>_2"
4141   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4142         (unspec:SVE_FULL_SDI
4143           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4144            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4145              (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w")
4146              (match_operand:SVE_FULL_SDI 3 "register_operand" "w, w"))
4147            (match_dup 2)]
4148           UNSPEC_SEL))]
4149   "TARGET_SVE"
4150   "@
4151    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4152    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4153   [(set_attr "movprfx" "*,yes")]
4154 )
4155
4156 ;; Predicated integer division, merging with the second input.
4157 (define_insn "*cond_<optab><mode>_3"
4158   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
4159         (unspec:SVE_FULL_SDI
4160           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4161            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4162              (match_operand:SVE_FULL_SDI 2 "register_operand" "w, w")
4163              (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w"))
4164            (match_dup 3)]
4165           UNSPEC_SEL))]
4166   "TARGET_SVE"
4167   "@
4168    <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4169    movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4170   [(set_attr "movprfx" "*,yes")]
4171 )
4172
4173 ;; Predicated integer division, merging with an independent value.
4174 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4175   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4176         (unspec:SVE_FULL_SDI
4177           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4178            (SVE_INT_BINARY_SD:SVE_FULL_SDI
4179              (match_operand:SVE_FULL_SDI 2 "register_operand" "0, w, w, w, w")
4180              (match_operand:SVE_FULL_SDI 3 "register_operand" "w, 0, w, w, w"))
4181            (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4182           UNSPEC_SEL))]
4183   "TARGET_SVE
4184    && !rtx_equal_p (operands[2], operands[4])
4185    && !rtx_equal_p (operands[3], operands[4])"
4186   "@
4187    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4188    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4189    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4190    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4191    #"
4192   "&& reload_completed
4193    && register_operand (operands[4], <MODE>mode)
4194    && !rtx_equal_p (operands[0], operands[4])"
4195   {
4196     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4197                                              operands[4], operands[1]));
4198     operands[4] = operands[2] = operands[0];
4199   }
4200   [(set_attr "movprfx" "yes")]
4201 )
4202
4203 ;; -------------------------------------------------------------------------
4204 ;; ---- [INT] Binary logical operations
4205 ;; -------------------------------------------------------------------------
4206 ;; Includes:
4207 ;; - AND
4208 ;; - EOR
4209 ;; - ORR
4210 ;; -------------------------------------------------------------------------
4211
4212 ;; Unpredicated integer binary logical operations.
4213 (define_insn "<optab><mode>3"
4214   [(set (match_operand:SVE_I 0 "register_operand" "=w, ?w, w")
4215         (LOGICAL:SVE_I
4216           (match_operand:SVE_I 1 "register_operand" "%0, w, w")
4217           (match_operand:SVE_I 2 "aarch64_sve_logical_operand" "vsl, vsl, w")))]
4218   "TARGET_SVE"
4219   "@
4220    <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4221    movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4222    <logical>\t%0.d, %1.d, %2.d"
4223   [(set_attr "movprfx" "*,yes,*")]
4224 )
4225
4226 ;; Merging forms are handled through SVE_INT_BINARY.
4227
4228 ;; -------------------------------------------------------------------------
4229 ;; ---- [INT] Binary logical operations (inverted second input)
4230 ;; -------------------------------------------------------------------------
4231 ;; Includes:
4232 ;; - BIC
4233 ;; -------------------------------------------------------------------------
4234
4235 ;; Unpredicated BIC.
4236 (define_expand "@aarch64_bic<mode>"
4237   [(set (match_operand:SVE_I 0 "register_operand")
4238         (and:SVE_I
4239           (unspec:SVE_I
4240             [(match_dup 3)
4241              (not:SVE_I (match_operand:SVE_I 2 "register_operand"))]
4242             UNSPEC_PRED_X)
4243           (match_operand:SVE_I 1 "register_operand")))]
4244   "TARGET_SVE"
4245   {
4246     operands[3] = CONSTM1_RTX (<VPRED>mode);
4247   }
4248 )
4249
4250 ;; Predicated BIC.
4251 (define_insn_and_rewrite "*bic<mode>3"
4252   [(set (match_operand:SVE_I 0 "register_operand" "=w")
4253         (and:SVE_I
4254           (unspec:SVE_I
4255             [(match_operand 3)
4256              (not:SVE_I
4257                (match_operand:SVE_I 2 "register_operand" "w"))]
4258             UNSPEC_PRED_X)
4259           (match_operand:SVE_I 1 "register_operand" "w")))]
4260   "TARGET_SVE"
4261   "bic\t%0.d, %1.d, %2.d"
4262   "&& !CONSTANT_P (operands[3])"
4263   {
4264     operands[3] = CONSTM1_RTX (<VPRED>mode);
4265   }
4266 )
4267
4268 ;; Predicated BIC with merging.
4269 (define_expand "@cond_bic<mode>"
4270   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4271         (unspec:SVE_FULL_I
4272           [(match_operand:<VPRED> 1 "register_operand")
4273            (and:SVE_FULL_I
4274              (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
4275              (match_operand:SVE_FULL_I 2 "register_operand"))
4276            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4277           UNSPEC_SEL))]
4278   "TARGET_SVE"
4279 )
4280
4281 ;; Predicated integer BIC, merging with the first input.
4282 (define_insn "*cond_bic<mode>_2"
4283   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4284         (unspec:SVE_FULL_I
4285           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4286            (and:SVE_FULL_I
4287              (not:SVE_FULL_I
4288                (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
4289              (match_operand:SVE_FULL_I 2 "register_operand" "0, w"))
4290            (match_dup 2)]
4291           UNSPEC_SEL))]
4292   "TARGET_SVE"
4293   "@
4294    bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4295    movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4296   [(set_attr "movprfx" "*,yes")]
4297 )
4298
4299 ;; Predicated integer BIC, merging with an independent value.
4300 (define_insn_and_rewrite "*cond_bic<mode>_any"
4301   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, ?&w")
4302         (unspec:SVE_FULL_I
4303           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4304            (and:SVE_FULL_I
4305              (not:SVE_FULL_I
4306                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, w"))
4307              (match_operand:SVE_FULL_I 2 "register_operand" "0, w, w, w"))
4308            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4309           UNSPEC_SEL))]
4310   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4311   "@
4312    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4313    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4314    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4315    #"
4316   "&& reload_completed
4317    && register_operand (operands[4], <MODE>mode)
4318    && !rtx_equal_p (operands[0], operands[4])"
4319   {
4320     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4321                                              operands[4], operands[1]));
4322     operands[4] = operands[2] = operands[0];
4323   }
4324   [(set_attr "movprfx" "yes")]
4325 )
4326
4327 ;; -------------------------------------------------------------------------
4328 ;; ---- [INT] Shifts (rounding towards -Inf)
4329 ;; -------------------------------------------------------------------------
4330 ;; Includes:
4331 ;; - ASR
4332 ;; - ASRR
4333 ;; - LSL
4334 ;; - LSLR
4335 ;; - LSR
4336 ;; - LSRR
4337 ;; -------------------------------------------------------------------------
4338
4339 ;; Unpredicated shift by a scalar, which expands into one of the vector
4340 ;; shifts below.
4341 (define_expand "<ASHIFT:optab><mode>3"
4342   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4343         (ASHIFT:SVE_FULL_I
4344           (match_operand:SVE_FULL_I 1 "register_operand")
4345           (match_operand:<VEL> 2 "general_operand")))]
4346   "TARGET_SVE"
4347   {
4348     rtx amount;
4349     if (CONST_INT_P (operands[2]))
4350       {
4351         amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
4352         if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
4353           amount = force_reg (<MODE>mode, amount);
4354       }
4355     else
4356       {
4357         amount = gen_reg_rtx (<MODE>mode);
4358         emit_insn (gen_vec_duplicate<mode> (amount,
4359                                             convert_to_mode (<VEL>mode,
4360                                                              operands[2], 0)));
4361       }
4362     emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
4363     DONE;
4364   }
4365 )
4366
4367 ;; Unpredicated shift by a vector.
4368 (define_expand "v<optab><mode>3"
4369   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4370         (unspec:SVE_FULL_I
4371           [(match_dup 3)
4372            (ASHIFT:SVE_FULL_I
4373              (match_operand:SVE_FULL_I 1 "register_operand")
4374              (match_operand:SVE_FULL_I 2 "aarch64_sve_<lr>shift_operand"))]
4375           UNSPEC_PRED_X))]
4376   "TARGET_SVE"
4377   {
4378     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4379   }
4380 )
4381
4382 ;; Shift by a vector, predicated with a PTRUE.  We don't actually need
4383 ;; the predicate for the first alternative, but using Upa or X isn't
4384 ;; likely to gain much and would make the instruction seem less uniform
4385 ;; to the register allocator.
4386 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4387   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, w, ?&w")
4388         (unspec:SVE_FULL_I
4389           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4390            (ASHIFT:SVE_FULL_I
4391              (match_operand:SVE_FULL_I 2 "register_operand" "w, 0, w, w")
4392              (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand" "D<lr>, w, 0, w"))]
4393           UNSPEC_PRED_X))]
4394   "TARGET_SVE"
4395   "@
4396    #
4397    <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4398    <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
4399    movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4400   "&& reload_completed
4401    && !register_operand (operands[3], <MODE>mode)"
4402   [(set (match_dup 0) (ASHIFT:SVE_FULL_I (match_dup 2) (match_dup 3)))]
4403   ""
4404   [(set_attr "movprfx" "*,*,*,yes")]
4405 )
4406
4407 ;; Unpredicated shift operations by a constant (post-RA only).
4408 ;; These are generated by splitting a predicated instruction whose
4409 ;; predicate is unused.
4410 (define_insn "*post_ra_v<optab><mode>3"
4411   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4412         (ASHIFT:SVE_FULL_I
4413           (match_operand:SVE_FULL_I 1 "register_operand" "w")
4414           (match_operand:SVE_FULL_I 2 "aarch64_simd_<lr>shift_imm")))]
4415   "TARGET_SVE && reload_completed"
4416   "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
4417 )
4418
4419 ;; Predicated integer shift, merging with the first input.
4420 (define_insn "*cond_<optab><mode>_2_const"
4421   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4422         (unspec:SVE_FULL_I
4423           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4424            (ASHIFT:SVE_FULL_I
4425              (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4426              (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm"))
4427            (match_dup 2)]
4428          UNSPEC_SEL))]
4429   "TARGET_SVE"
4430   "@
4431    <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4432    movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4433   [(set_attr "movprfx" "*,yes")]
4434 )
4435
4436 ;; Predicated integer shift, merging with an independent value.
4437 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4438   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, &w, ?&w")
4439         (unspec:SVE_FULL_I
4440           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4441            (ASHIFT:SVE_FULL_I
4442              (match_operand:SVE_FULL_I 2 "register_operand" "w, w, w")
4443              (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm"))
4444            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4445          UNSPEC_SEL))]
4446   "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4447   "@
4448    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4449    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4450    #"
4451   "&& reload_completed
4452    && register_operand (operands[4], <MODE>mode)
4453    && !rtx_equal_p (operands[0], operands[4])"
4454   {
4455     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4456                                              operands[4], operands[1]));
4457     operands[4] = operands[2] = operands[0];
4458   }
4459   [(set_attr "movprfx" "yes")]
4460 )
4461
4462 ;; Unpredicated shifts of narrow elements by 64-bit amounts.
4463 (define_insn "@aarch64_sve_<sve_int_op><mode>"
4464   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
4465         (unspec:SVE_FULL_BHSI
4466           [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
4467            (match_operand:VNx2DI 2 "register_operand" "w")]
4468           SVE_SHIFT_WIDE))]
4469   "TARGET_SVE"
4470   "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
4471 )
4472
4473 ;; Merging predicated shifts of narrow elements by 64-bit amounts.
4474 (define_expand "@cond_<sve_int_op><mode>"
4475   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4476         (unspec:SVE_FULL_BHSI
4477           [(match_operand:<VPRED> 1 "register_operand")
4478            (unspec:SVE_FULL_BHSI
4479              [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4480               (match_operand:VNx2DI 3 "register_operand")]
4481              SVE_SHIFT_WIDE)
4482            (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
4483           UNSPEC_SEL))]
4484   "TARGET_SVE"
4485 )
4486
4487 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with
4488 ;; the first input.
4489 (define_insn "*cond_<sve_int_op><mode>_m"
4490   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w, ?&w")
4491         (unspec:SVE_FULL_BHSI
4492           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4493            (unspec:SVE_FULL_BHSI
4494              [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4495               (match_operand:VNx2DI 3 "register_operand" "w, w")]
4496              SVE_SHIFT_WIDE)
4497            (match_dup 2)]
4498          UNSPEC_SEL))]
4499   "TARGET_SVE"
4500   "@
4501    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4502    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4503   [(set_attr "movprfx" "*, yes")])
4504
4505 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
4506 (define_insn "*cond_<sve_int_op><mode>_z"
4507   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=&w, &w")
4508         (unspec:SVE_FULL_BHSI
4509           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4510            (unspec:SVE_FULL_BHSI
4511              [(match_operand:SVE_FULL_BHSI 2 "register_operand" "0, w")
4512               (match_operand:VNx2DI 3 "register_operand" "w, w")]
4513              SVE_SHIFT_WIDE)
4514            (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
4515          UNSPEC_SEL))]
4516   "TARGET_SVE"
4517   "@
4518    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
4519    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d"
4520   [(set_attr "movprfx" "yes")])
4521
4522 ;; -------------------------------------------------------------------------
4523 ;; ---- [INT] Shifts (rounding towards 0)
4524 ;; -------------------------------------------------------------------------
4525 ;; Includes:
4526 ;; - ASRD
4527 ;; - SQSHLU (SVE2)
4528 ;; - SRSHR (SVE2)
4529 ;; - URSHR (SVE2)
4530 ;; -------------------------------------------------------------------------
4531
4532 ;; Unpredicated <SVE_INT_OP>.
4533 (define_expand "sdiv_pow2<mode>3"
4534   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4535         (unspec:SVE_FULL_I
4536           [(match_dup 3)
4537            (unspec:SVE_FULL_I
4538              [(match_operand:SVE_FULL_I 1 "register_operand")
4539               (match_operand 2 "aarch64_simd_rshift_imm")]
4540              UNSPEC_ASRD)
4541            (match_dup 1)]
4542          UNSPEC_SEL))]
4543   "TARGET_SVE"
4544   {
4545     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4546   }
4547 )
4548
4549 ;; Predicated right shift with merging.
4550 (define_expand "@cond_<sve_int_op><mode>"
4551   [(set (match_operand:SVE_FULL_I 0 "register_operand")
4552         (unspec:SVE_FULL_I
4553           [(match_operand:<VPRED> 1 "register_operand")
4554            (unspec:SVE_FULL_I
4555              [(match_operand:SVE_FULL_I 2 "register_operand")
4556               (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4557              SVE_INT_SHIFT_IMM)
4558            (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4559           UNSPEC_SEL))]
4560   "TARGET_SVE"
4561 )
4562
4563 ;; Predicated right shift, merging with the first input.
4564 (define_insn "*cond_<sve_int_op><mode>_2"
4565   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
4566         (unspec:SVE_FULL_I
4567           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4568            (unspec:SVE_FULL_I
4569              [(match_operand:SVE_FULL_I 2 "register_operand" "0, w")
4570               (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4571              SVE_INT_SHIFT_IMM)
4572            (match_dup 2)]
4573           UNSPEC_SEL))]
4574   "TARGET_SVE"
4575   "@
4576    <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4577    movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4578   [(set_attr "movprfx" "*,yes")])
4579
4580 ;; Predicated right shift, merging with zero.
4581 (define_insn "*cond_<sve_int_op><mode>_z"
4582   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
4583         (unspec:SVE_FULL_I
4584           [(match_operand:<VPRED> 1 "register_operand" "Upl")
4585            (unspec:SVE_FULL_I
4586              [(match_operand:SVE_FULL_I 2 "register_operand" "w")
4587               (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
4588              SVE_INT_SHIFT_IMM)
4589            (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4590           UNSPEC_SEL))]
4591   "TARGET_SVE"
4592   "movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4593   [(set_attr "movprfx" "yes")])
4594
4595 ;; -------------------------------------------------------------------------
4596 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
4597 ;; -------------------------------------------------------------------------
4598 ;; Includes:
4599 ;; - FSCALE
4600 ;; - FTSMUL
4601 ;; - FTSSEL
4602 ;; -------------------------------------------------------------------------
4603
4604 ;; Unpredicated floating-point binary operations that take an integer as
4605 ;; their second operand.
4606 (define_insn "@aarch64_sve_<optab><mode>"
4607   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4608         (unspec:SVE_FULL_F
4609           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4610            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
4611           SVE_FP_BINARY_INT))]
4612   "TARGET_SVE"
4613   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4614 )
4615
4616 ;; Predicated floating-point binary operations that take an integer
4617 ;; as their second operand.
4618 (define_insn "@aarch64_pred_<optab><mode>"
4619   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4620         (unspec:SVE_FULL_F
4621           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4622            (match_operand:SI 4 "aarch64_sve_gp_strictness")
4623            (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4624            (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4625           SVE_COND_FP_BINARY_INT))]
4626   "TARGET_SVE"
4627   "@
4628    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4629    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4630   [(set_attr "movprfx" "*,yes")]
4631 )
4632
4633 ;; Predicated floating-point binary operations with merging, taking an
4634 ;; integer as their second operand.
4635 (define_expand "@cond_<optab><mode>"
4636   [(set (match_operand:SVE_FULL_F 0 "register_operand")
4637         (unspec:SVE_FULL_F
4638           [(match_operand:<VPRED> 1 "register_operand")
4639            (unspec:SVE_FULL_F
4640              [(match_dup 1)
4641               (const_int SVE_STRICT_GP)
4642               (match_operand:SVE_FULL_F 2 "register_operand")
4643               (match_operand:<V_INT_EQUIV> 3 "register_operand")]
4644              SVE_COND_FP_BINARY_INT)
4645            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4646           UNSPEC_SEL))]
4647   "TARGET_SVE"
4648 )
4649
4650 ;; Predicated floating-point binary operations that take an integer as their
4651 ;; second operand, with inactive lanes coming from the first operand.
4652 (define_insn_and_rewrite "*cond_<optab><mode>_2"
4653   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4654         (unspec:SVE_FULL_F
4655           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4656            (unspec:SVE_FULL_F
4657              [(match_operand 4)
4658               (match_operand:SI 5 "aarch64_sve_gp_strictness")
4659               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4660               (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w")]
4661              SVE_COND_FP_BINARY_INT)
4662            (match_dup 2)]
4663           UNSPEC_SEL))]
4664   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4665   "@
4666    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4667    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4668   "&& !rtx_equal_p (operands[1], operands[4])"
4669   {
4670     operands[4] = copy_rtx (operands[1]);
4671   }
4672   [(set_attr "movprfx" "*,yes")]
4673 )
4674
4675 ;; Predicated floating-point binary operations that take an integer as
4676 ;; their second operand, with the values of inactive lanes being distinct
4677 ;; from the other inputs.
4678 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4679   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
4680         (unspec:SVE_FULL_F
4681           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
4682            (unspec:SVE_FULL_F
4683              [(match_operand 5)
4684               (match_operand:SI 6 "aarch64_sve_gp_strictness")
4685               (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w")
4686               (match_operand:<V_INT_EQUIV> 3 "register_operand" "w, w, w, w")]
4687              SVE_COND_FP_BINARY_INT)
4688            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
4689           UNSPEC_SEL))]
4690   "TARGET_SVE
4691    && !rtx_equal_p (operands[2], operands[4])
4692    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4693   "@
4694    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4695    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4696    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4697    #"
4698   "&& 1"
4699   {
4700     if (reload_completed
4701         && register_operand (operands[4], <MODE>mode)
4702         && !rtx_equal_p (operands[0], operands[4]))
4703       {
4704         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4705                                                  operands[4], operands[1]));
4706         operands[4] = operands[2] = operands[0];
4707       }
4708     else if (!rtx_equal_p (operands[1], operands[5]))
4709       operands[5] = copy_rtx (operands[1]);
4710     else
4711       FAIL;
4712   }
4713   [(set_attr "movprfx" "yes")]
4714 )
4715
4716 ;; -------------------------------------------------------------------------
4717 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
4718 ;; -------------------------------------------------------------------------
4719 ;; Includes post-RA forms of:
4720 ;; - FADD
4721 ;; - FMUL
4722 ;; - FSUB
4723 ;; -------------------------------------------------------------------------
4724
4725 ;; Unpredicated floating-point binary operations (post-RA only).
4726 ;; These are generated by splitting a predicated instruction whose
4727 ;; predicate is unused.
4728 (define_insn "*post_ra_<sve_fp_op><mode>3"
4729   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4730         (SVE_UNPRED_FP_BINARY:SVE_FULL_F
4731           (match_operand:SVE_FULL_F 1 "register_operand" "w")
4732           (match_operand:SVE_FULL_F 2 "register_operand" "w")))]
4733   "TARGET_SVE && reload_completed"
4734   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
4735
4736 ;; -------------------------------------------------------------------------
4737 ;; ---- [FP] General binary arithmetic corresponding to unspecs
4738 ;; -------------------------------------------------------------------------
4739 ;; Includes merging forms of:
4740 ;; - FADD    (constant forms handled in the "Addition" section)
4741 ;; - FDIV
4742 ;; - FDIVR
4743 ;; - FMAX
4744 ;; - FMAXNM  (including #0.0 and #1.0)
4745 ;; - FMIN
4746 ;; - FMINNM  (including #0.0 and #1.0)
4747 ;; - FMUL    (including #0.5 and #2.0)
4748 ;; - FMULX
4749 ;; - FRECPS
4750 ;; - FRSQRTS
4751 ;; - FSUB    (constant forms handled in the "Addition" section)
4752 ;; - FSUBR   (constant forms handled in the "Subtraction" section)
4753 ;; -------------------------------------------------------------------------
4754
4755 ;; Unpredicated floating-point binary operations.
4756 (define_insn "@aarch64_sve_<optab><mode>"
4757   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
4758         (unspec:SVE_FULL_F
4759           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
4760            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
4761           SVE_FP_BINARY))]
4762   "TARGET_SVE"
4763   "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4764 )
4765
4766 ;; Unpredicated floating-point binary operations that need to be predicated
4767 ;; for SVE.
4768 (define_expand "<optab><mode>3"
4769   [(set (match_operand:SVE_FULL_F 0 "register_operand")
4770         (unspec:SVE_FULL_F
4771           [(match_dup 3)
4772            (const_int SVE_RELAXED_GP)
4773            (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
4774            (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
4775           SVE_COND_FP_BINARY_OPTAB))]
4776   "TARGET_SVE"
4777   {
4778     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4779   }
4780 )
4781
4782 ;; Predicated floating-point binary operations that have no immediate forms.
4783 (define_insn "@aarch64_pred_<optab><mode>"
4784   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
4785         (unspec:SVE_FULL_F
4786           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4787            (match_operand:SI 4 "aarch64_sve_gp_strictness")
4788            (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w")
4789            (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w")]
4790           SVE_COND_FP_BINARY_REG))]
4791   "TARGET_SVE"
4792   "@
4793    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4794    <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4795    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4796   [(set_attr "movprfx" "*,*,yes")]
4797 )
4798
4799 ;; Predicated floating-point operations with merging.
4800 (define_expand "@cond_<optab><mode>"
4801   [(set (match_operand:SVE_FULL_F 0 "register_operand")
4802         (unspec:SVE_FULL_F
4803           [(match_operand:<VPRED> 1 "register_operand")
4804            (unspec:SVE_FULL_F
4805              [(match_dup 1)
4806               (const_int SVE_STRICT_GP)
4807               (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>")
4808               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")]
4809              SVE_COND_FP_BINARY)
4810            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
4811           UNSPEC_SEL))]
4812   "TARGET_SVE"
4813 )
4814
4815 ;; Predicated floating-point operations, merging with the first input.
4816 (define_insn_and_rewrite "*cond_<optab><mode>_2"
4817   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4818         (unspec:SVE_FULL_F
4819           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4820            (unspec:SVE_FULL_F
4821              [(match_operand 4)
4822               (match_operand:SI 5 "aarch64_sve_gp_strictness")
4823               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4824               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
4825              SVE_COND_FP_BINARY)
4826            (match_dup 2)]
4827           UNSPEC_SEL))]
4828   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4829   "@
4830    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4831    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4832   "&& !rtx_equal_p (operands[1], operands[4])"
4833   {
4834     operands[4] = copy_rtx (operands[1]);
4835   }
4836   [(set_attr "movprfx" "*,yes")]
4837 )
4838
4839 ;; Same for operations that take a 1-bit constant.
4840 (define_insn_and_rewrite "*cond_<optab><mode>_2_const"
4841   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
4842         (unspec:SVE_FULL_F
4843           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4844            (unspec:SVE_FULL_F
4845              [(match_operand 4)
4846               (match_operand:SI 5 "aarch64_sve_gp_strictness")
4847               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
4848               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
4849              SVE_COND_FP_BINARY_I1)
4850            (match_dup 2)]
4851           UNSPEC_SEL))]
4852   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4853   "@
4854    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4855    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3"
4856   "&& !rtx_equal_p (operands[1], operands[4])"
4857   {
4858     operands[4] = copy_rtx (operands[1]);
4859   }
4860   [(set_attr "movprfx" "*,yes")]
4861 )
4862
4863 ;; Predicated floating-point operations, merging with the second input.
4864 (define_insn_and_rewrite "*cond_<optab><mode>_3"
4865   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
4866         (unspec:SVE_FULL_F
4867           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
4868            (unspec:SVE_FULL_F
4869              [(match_operand 4)
4870               (match_operand:SI 5 "aarch64_sve_gp_strictness")
4871               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
4872               (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
4873              SVE_COND_FP_BINARY)
4874            (match_dup 3)]
4875           UNSPEC_SEL))]
4876   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
4877   "@
4878    <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4879    movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
4880   "&& !rtx_equal_p (operands[1], operands[4])"
4881   {
4882     operands[4] = copy_rtx (operands[1]);
4883   }
4884   [(set_attr "movprfx" "*,yes")]
4885 )
4886
4887 ;; Predicated floating-point operations, merging with an independent value.
4888 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4889   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
4890         (unspec:SVE_FULL_F
4891           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
4892            (unspec:SVE_FULL_F
4893              [(match_operand 5)
4894               (match_operand:SI 6 "aarch64_sve_gp_strictness")
4895               (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
4896               (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
4897              SVE_COND_FP_BINARY)
4898            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
4899           UNSPEC_SEL))]
4900   "TARGET_SVE
4901    && !rtx_equal_p (operands[2], operands[4])
4902    && !rtx_equal_p (operands[3], operands[4])
4903    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4904   "@
4905    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4906    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4907    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4908    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4909    #"
4910   "&& 1"
4911   {
4912     if (reload_completed
4913         && register_operand (operands[4], <MODE>mode)
4914         && !rtx_equal_p (operands[0], operands[4]))
4915       {
4916         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4917                                                  operands[4], operands[1]));
4918         operands[4] = operands[2] = operands[0];
4919       }
4920     else if (!rtx_equal_p (operands[1], operands[5]))
4921       operands[5] = copy_rtx (operands[1]);
4922     else
4923       FAIL;
4924   }
4925   [(set_attr "movprfx" "yes")]
4926 )
4927
4928 ;; Same for operations that take a 1-bit constant.
4929 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4930   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
4931         (unspec:SVE_FULL_F
4932           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
4933            (unspec:SVE_FULL_F
4934              [(match_operand 5)
4935               (match_operand:SI 6 "aarch64_sve_gp_strictness")
4936               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")
4937               (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
4938              SVE_COND_FP_BINARY_I1)
4939            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
4940           UNSPEC_SEL))]
4941   "TARGET_SVE
4942    && !rtx_equal_p (operands[2], operands[4])
4943    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
4944   "@
4945    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4946    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4947    #"
4948   "&& 1"
4949   {
4950     if (reload_completed
4951         && register_operand (operands[4], <MODE>mode)
4952         && !rtx_equal_p (operands[0], operands[4]))
4953       {
4954         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4955                                                  operands[4], operands[1]));
4956         operands[4] = operands[2] = operands[0];
4957       }
4958     else if (!rtx_equal_p (operands[1], operands[5]))
4959       operands[5] = copy_rtx (operands[1]);
4960     else
4961       FAIL;
4962   }
4963   [(set_attr "movprfx" "yes")]
4964 )
4965
4966 ;; -------------------------------------------------------------------------
4967 ;; ---- [FP] Addition
4968 ;; -------------------------------------------------------------------------
4969 ;; Includes:
4970 ;; - FADD
4971 ;; - FSUB
4972 ;; -------------------------------------------------------------------------
4973
4974 ;; Predicated floating-point addition.
4975 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4976   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w, ?&w")
4977         (unspec:SVE_FULL_F
4978           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl, Upl")
4979            (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z, Ui1, i, i, Ui1")
4980            (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, 0, w, w, w")
4981            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w, w, vsA, vsN, w")]
4982           SVE_COND_FP_ADD))]
4983   "TARGET_SVE"
4984   "@
4985    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4986    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4987    #
4988    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4989    movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4990    movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
4991    movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
4992   ; Split the unpredicated form after reload, so that we don't have
4993   ; the unnecessary PTRUE.
4994   "&& reload_completed
4995    && register_operand (operands[3], <MODE>mode)
4996    && INTVAL (operands[4]) == SVE_RELAXED_GP"
4997   [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
4998   ""
4999   [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
5000 )
5001
5002 ;; Predicated floating-point addition of a constant, merging with the
5003 ;; first input.
5004 (define_insn_and_rewrite "*cond_add<mode>_2_const"
5005   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w, ?w")
5006         (unspec:SVE_FULL_F
5007           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5008            (unspec:SVE_FULL_F
5009              [(match_operand 4)
5010               (match_operand:SI 5 "aarch64_sve_gp_strictness")
5011               (match_operand:SVE_FULL_F 2 "register_operand" "0, 0, w, w")
5012               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN")]
5013              UNSPEC_COND_FADD)
5014            (match_dup 2)]
5015           UNSPEC_SEL))]
5016   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5017   "@
5018    fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5019    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5020    movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5021    movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3"
5022   "&& !rtx_equal_p (operands[1], operands[4])"
5023   {
5024     operands[4] = copy_rtx (operands[1]);
5025   }
5026   [(set_attr "movprfx" "*,*,yes,yes")]
5027 )
5028
5029 ;; Predicated floating-point addition of a constant, merging with an
5030 ;; independent value.
5031 (define_insn_and_rewrite "*cond_add<mode>_any_const"
5032   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?w, ?w")
5033         (unspec:SVE_FULL_F
5034           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5035            (unspec:SVE_FULL_F
5036              [(match_operand 5)
5037               (match_operand:SI 6 "aarch64_sve_gp_strictness")
5038               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w, w, w")
5039               (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate" "vsA, vsN, vsA, vsN, vsA, vsN")]
5040              UNSPEC_COND_FADD)
5041            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, 0, w, w")]
5042           UNSPEC_SEL))]
5043   "TARGET_SVE
5044    && !rtx_equal_p (operands[2], operands[4])
5045    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5046   "@
5047    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5048    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5049    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5050    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5051    #
5052    #"
5053   "&& 1"
5054   {
5055     if (reload_completed
5056         && register_operand (operands[4], <MODE>mode)
5057         && !rtx_equal_p (operands[0], operands[4]))
5058       {
5059         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5060                                                  operands[4], operands[1]));
5061         operands[4] = operands[2] = operands[0];
5062       }
5063     else if (!rtx_equal_p (operands[1], operands[5]))
5064       operands[5] = copy_rtx (operands[1]);
5065     else
5066       FAIL;
5067   }
5068   [(set_attr "movprfx" "yes")]
5069 )
5070
5071 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5072
5073 ;; -------------------------------------------------------------------------
5074 ;; ---- [FP] Complex addition
5075 ;; -------------------------------------------------------------------------
5076 ;; Includes:
5077 ;; - FCADD
5078 ;; -------------------------------------------------------------------------
5079
5080 ;; Predicated FCADD.
5081 (define_insn "@aarch64_pred_<optab><mode>"
5082   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5083         (unspec:SVE_FULL_F
5084           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5085            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5086            (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5087            (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5088           SVE_COND_FCADD))]
5089   "TARGET_SVE"
5090   "@
5091    fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5092    movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5093   [(set_attr "movprfx" "*,yes")]
5094 )
5095
5096 ;; Predicated FCADD with merging.
5097 (define_expand "@cond_<optab><mode>"
5098   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5099         (unspec:SVE_FULL_F
5100           [(match_operand:<VPRED> 1 "register_operand")
5101            (unspec:SVE_FULL_F
5102              [(match_dup 1)
5103               (const_int SVE_STRICT_GP)
5104               (match_operand:SVE_FULL_F 2 "register_operand")
5105               (match_operand:SVE_FULL_F 3 "register_operand")]
5106              SVE_COND_FCADD)
5107            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5108           UNSPEC_SEL))]
5109   "TARGET_SVE"
5110 )
5111
5112 ;; Predicated FCADD, merging with the first input.
5113 (define_insn_and_rewrite "*cond_<optab><mode>_2"
5114   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5115         (unspec:SVE_FULL_F
5116           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5117            (unspec:SVE_FULL_F
5118              [(match_operand 4)
5119               (match_operand:SI 5 "aarch64_sve_gp_strictness")
5120               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5121               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5122              SVE_COND_FCADD)
5123            (match_dup 2)]
5124           UNSPEC_SEL))]
5125   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5126   "@
5127    fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5128    movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>"
5129   "&& !rtx_equal_p (operands[1], operands[4])"
5130   {
5131     operands[4] = copy_rtx (operands[1]);
5132   }
5133   [(set_attr "movprfx" "*,yes")]
5134 )
5135
5136 ;; Predicated FCADD, merging with an independent value.
5137 (define_insn_and_rewrite "*cond_<optab><mode>_any"
5138   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
5139         (unspec:SVE_FULL_F
5140           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5141            (unspec:SVE_FULL_F
5142              [(match_operand 5)
5143               (match_operand:SI 6 "aarch64_sve_gp_strictness")
5144               (match_operand:SVE_FULL_F 2 "register_operand" "w, 0, w, w")
5145               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")]
5146              SVE_COND_FCADD)
5147            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
5148           UNSPEC_SEL))]
5149   "TARGET_SVE
5150    && !rtx_equal_p (operands[2], operands[4])
5151    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5152   "@
5153    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5154    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5155    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5156    #"
5157   "&& 1"
5158   {
5159     if (reload_completed
5160         && register_operand (operands[4], <MODE>mode)
5161         && !rtx_equal_p (operands[0], operands[4]))
5162       {
5163         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5164                                                  operands[4], operands[1]));
5165         operands[4] = operands[2] = operands[0];
5166       }
5167     else if (!rtx_equal_p (operands[1], operands[5]))
5168       operands[5] = copy_rtx (operands[1]);
5169     else
5170       FAIL;
5171   }
5172   [(set_attr "movprfx" "yes")]
5173 )
5174
5175 ;; -------------------------------------------------------------------------
5176 ;; ---- [FP] Subtraction
5177 ;; -------------------------------------------------------------------------
5178 ;; Includes:
5179 ;; - FSUB
5180 ;; - FSUBR
5181 ;; -------------------------------------------------------------------------
5182
5183 ;; Predicated floating-point subtraction.
5184 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5185   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, w, ?&w, ?&w")
5186         (unspec:SVE_FULL_F
5187           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
5188            (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, Ui1, i, Ui1")
5189            (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand" "vsA, w, 0, w, vsA, w")
5190            (match_operand:SVE_FULL_F 3 "register_operand" "0, w, w, 0, w, w")]
5191           SVE_COND_FP_SUB))]
5192   "TARGET_SVE"
5193   "@
5194    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5195    #
5196    fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5197    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5198    movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5199    movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5200   ; Split the unpredicated form after reload, so that we don't have
5201   ; the unnecessary PTRUE.
5202   "&& reload_completed
5203    && register_operand (operands[2], <MODE>mode)
5204    && INTVAL (operands[4]) == SVE_RELAXED_GP"
5205   [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5206   ""
5207   [(set_attr "movprfx" "*,*,*,*,yes,yes")]
5208 )
5209
5210 ;; Predicated floating-point subtraction from a constant, merging with the
5211 ;; second input.
5212 (define_insn_and_rewrite "*cond_sub<mode>_3_const"
5213   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?w")
5214         (unspec:SVE_FULL_F
5215           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5216            (unspec:SVE_FULL_F
5217              [(match_operand 4)
5218               (match_operand:SI 5 "aarch64_sve_gp_strictness")
5219               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5220               (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5221              UNSPEC_COND_FSUB)
5222            (match_dup 3)]
5223           UNSPEC_SEL))]
5224   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
5225   "@
5226    fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5227    movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2"
5228   "&& !rtx_equal_p (operands[1], operands[4])"
5229   {
5230     operands[4] = copy_rtx (operands[1]);
5231   }
5232   [(set_attr "movprfx" "*,yes")]
5233 )
5234
5235 ;; Predicated floating-point subtraction from a constant, merging with an
5236 ;; independent value.
5237 (define_insn_and_rewrite "*cond_sub<mode>_any_const"
5238   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?w")
5239         (unspec:SVE_FULL_F
5240           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5241            (unspec:SVE_FULL_F
5242              [(match_operand 5)
5243               (match_operand:SI 6 "aarch64_sve_gp_strictness")
5244               (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
5245               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")]
5246              UNSPEC_COND_FSUB)
5247            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, 0, w")]
5248           UNSPEC_SEL))]
5249   "TARGET_SVE
5250    && !rtx_equal_p (operands[3], operands[4])
5251    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5252   "@
5253    movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5254    movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
5255    #"
5256   "&& 1"
5257   {
5258     if (reload_completed
5259         && register_operand (operands[4], <MODE>mode)
5260         && !rtx_equal_p (operands[0], operands[4]))
5261       {
5262         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5263                                                  operands[4], operands[1]));
5264         operands[4] = operands[3] = operands[0];
5265       }
5266     else if (!rtx_equal_p (operands[1], operands[5]))
5267       operands[5] = copy_rtx (operands[1]);
5268     else
5269       FAIL;
5270   }
5271   [(set_attr "movprfx" "yes")]
5272 )
5273
5274 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5275
5276 ;; -------------------------------------------------------------------------
5277 ;; ---- [FP] Absolute difference
5278 ;; -------------------------------------------------------------------------
5279 ;; Includes:
5280 ;; - FABD
5281 ;; -------------------------------------------------------------------------
5282
5283 ;; Predicated floating-point absolute difference.
5284 (define_expand "@aarch64_pred_abd<mode>"
5285   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5286         (unspec:SVE_FULL_F
5287           [(match_operand:<VPRED> 1 "register_operand")
5288            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5289            (unspec:SVE_FULL_F
5290              [(match_dup 1)
5291               (match_dup 4)
5292               (match_operand:SVE_FULL_F 2 "register_operand")
5293               (match_operand:SVE_FULL_F 3 "register_operand")]
5294              UNSPEC_COND_FSUB)]
5295           UNSPEC_COND_FABS))]
5296   "TARGET_SVE"
5297 )
5298
5299 ;; Predicated floating-point absolute difference.
5300 (define_insn_and_rewrite "*aarch64_pred_abd<mode>"
5301   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5302         (unspec:SVE_FULL_F
5303           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5304            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5305            (unspec:SVE_FULL_F
5306              [(match_operand 5)
5307               (match_operand:SI 6 "aarch64_sve_gp_strictness")
5308               (match_operand:SVE_FULL_F 2 "register_operand" "%0, w")
5309               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5310              UNSPEC_COND_FSUB)]
5311           UNSPEC_COND_FABS))]
5312   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
5313   "@
5314    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5315    movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5316   "&& !rtx_equal_p (operands[1], operands[5])"
5317   {
5318     operands[5] = copy_rtx (operands[1]);
5319   }
5320   [(set_attr "movprfx" "*,yes")]
5321 )
5322
5323 (define_expand "@aarch64_cond_abd<mode>"
5324   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5325         (unspec:SVE_FULL_F
5326           [(match_operand:<VPRED> 1 "register_operand")
5327            (unspec:SVE_FULL_F
5328              [(match_dup 1)
5329               (const_int SVE_STRICT_GP)
5330               (unspec:SVE_FULL_F
5331                 [(match_dup 1)
5332                  (const_int SVE_STRICT_GP)
5333                  (match_operand:SVE_FULL_F 2 "register_operand")
5334                  (match_operand:SVE_FULL_F 3 "register_operand")]
5335                 UNSPEC_COND_FSUB)]
5336              UNSPEC_COND_FABS)
5337            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5338           UNSPEC_SEL))]
5339   "TARGET_SVE"
5340 {
5341   if (rtx_equal_p (operands[3], operands[4]))
5342     std::swap (operands[2], operands[3]);
5343 })
5344
5345 ;; Predicated floating-point absolute difference, merging with the first
5346 ;; input.
5347 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_2"
5348   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5349         (unspec:SVE_FULL_F
5350           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5351            (unspec:SVE_FULL_F
5352              [(match_operand 4)
5353               (match_operand:SI 5 "aarch64_sve_gp_strictness")
5354               (unspec:SVE_FULL_F
5355                 [(match_operand 6)
5356                  (match_operand:SI 7 "aarch64_sve_gp_strictness")
5357                  (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
5358                  (match_operand:SVE_FULL_F 3 "register_operand" "w, w")]
5359                 UNSPEC_COND_FSUB)]
5360              UNSPEC_COND_FABS)
5361            (match_dup 2)]
5362           UNSPEC_SEL))]
5363   "TARGET_SVE
5364    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
5365    && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
5366   "@
5367    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5368    movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5369   "&& (!rtx_equal_p (operands[1], operands[4])
5370        || !rtx_equal_p (operands[1], operands[6]))"
5371   {
5372     operands[4] = copy_rtx (operands[1]);
5373     operands[6] = copy_rtx (operands[1]);
5374   }
5375   [(set_attr "movprfx" "*,yes")]
5376 )
5377
5378 ;; Predicated floating-point absolute difference, merging with the second
5379 ;; input.
5380 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_3"
5381   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
5382         (unspec:SVE_FULL_F
5383           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5384            (unspec:SVE_FULL_F
5385              [(match_operand 4)
5386               (match_operand:SI 5 "aarch64_sve_gp_strictness")
5387               (unspec:SVE_FULL_F
5388                 [(match_operand 6)
5389                  (match_operand:SI 7 "aarch64_sve_gp_strictness")
5390                  (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
5391                  (match_operand:SVE_FULL_F 3 "register_operand" "0, w")]
5392                 UNSPEC_COND_FSUB)]
5393              UNSPEC_COND_FABS)
5394            (match_dup 3)]
5395           UNSPEC_SEL))]
5396   "TARGET_SVE
5397    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])
5398    && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
5399   "@
5400    fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5401    movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
5402   "&& (!rtx_equal_p (operands[1], operands[4])
5403        || !rtx_equal_p (operands[1], operands[6]))"
5404   {
5405     operands[4] = copy_rtx (operands[1]);
5406     operands[6] = copy_rtx (operands[1]);
5407   }
5408   [(set_attr "movprfx" "*,yes")]
5409 )
5410
5411 ;; Predicated floating-point absolute difference, merging with an
5412 ;; independent value.
5413 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any"
5414   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, ?&w")
5415         (unspec:SVE_FULL_F
5416           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5417            (unspec:SVE_FULL_F
5418              [(match_operand 5)
5419               (match_operand:SI 6 "aarch64_sve_gp_strictness")
5420               (unspec:SVE_FULL_F
5421                 [(match_operand 7)
5422                  (match_operand:SI 8 "aarch64_sve_gp_strictness")
5423                  (match_operand:SVE_FULL_F 2 "register_operand" "0, w, w, w, w")
5424                  (match_operand:SVE_FULL_F 3 "register_operand" "w, 0, w, w, w")]
5425                 UNSPEC_COND_FSUB)]
5426              UNSPEC_COND_FABS)
5427            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, 0, w")]
5428           UNSPEC_SEL))]
5429   "TARGET_SVE
5430    && !rtx_equal_p (operands[2], operands[4])
5431    && !rtx_equal_p (operands[3], operands[4])
5432    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
5433    && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
5434   "@
5435    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5436    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5437    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5438    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5439    #"
5440   "&& 1"
5441   {
5442     if (reload_completed
5443         && register_operand (operands[4], <MODE>mode)
5444         && !rtx_equal_p (operands[0], operands[4]))
5445       {
5446         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
5447                                                  operands[4], operands[1]));
5448         operands[4] = operands[3] = operands[0];
5449       }
5450     else if (!rtx_equal_p (operands[1], operands[5])
5451              || !rtx_equal_p (operands[1], operands[7]))
5452       {
5453         operands[5] = copy_rtx (operands[1]);
5454         operands[7] = copy_rtx (operands[1]);
5455       }
5456     else
5457       FAIL;
5458   }
5459   [(set_attr "movprfx" "yes")]
5460 )
5461
5462 ;; -------------------------------------------------------------------------
5463 ;; ---- [FP] Multiplication
5464 ;; -------------------------------------------------------------------------
5465 ;; Includes:
5466 ;; - FMUL
5467 ;; -------------------------------------------------------------------------
5468
5469 ;; Predicated floating-point multiplication.
5470 (define_insn_and_split "@aarch64_pred_<optab><mode>"
5471   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, w, ?&w, ?&w")
5472         (unspec:SVE_FULL_F
5473           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
5474            (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z, Ui1, i, Ui1")
5475            (match_operand:SVE_FULL_F 2 "register_operand" "%0, w, 0, w, w")
5476            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand" "vsM, w, w, vsM, w")]
5477           SVE_COND_FP_MUL))]
5478   "TARGET_SVE"
5479   "@
5480    fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5481    #
5482    fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5483    movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5484    movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5485   ; Split the unpredicated form after reload, so that we don't have
5486   ; the unnecessary PTRUE.
5487   "&& reload_completed
5488    && register_operand (operands[3], <MODE>mode)
5489    && INTVAL (operands[4]) == SVE_RELAXED_GP"
5490   [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))]
5491   ""
5492   [(set_attr "movprfx" "*,*,*,yes,yes")]
5493 )
5494
5495 ;; Merging forms are handled through SVE_COND_FP_BINARY and
5496 ;; SVE_COND_FP_BINARY_I1.
5497
5498 ;; Unpredicated multiplication by selected lanes.
5499 (define_insn "@aarch64_mul_lane_<mode>"
5500   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5501         (mult:SVE_FULL_F
5502           (unspec:SVE_FULL_F
5503             [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>")
5504              (match_operand:SI 3 "const_int_operand")]
5505             UNSPEC_SVE_LANE_SELECT)
5506           (match_operand:SVE_FULL_F 1 "register_operand" "w")))]
5507   "TARGET_SVE"
5508   "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
5509 )
5510
5511 ;; -------------------------------------------------------------------------
5512 ;; ---- [FP] Division
5513 ;; -------------------------------------------------------------------------
5514 ;; The patterns in this section are synthetic.
5515 ;; -------------------------------------------------------------------------
5516
5517 (define_expand "div<mode>3"
5518   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5519         (unspec:SVE_FULL_F
5520           [(match_dup 3)
5521            (const_int SVE_RELAXED_GP)
5522            (match_operand:SVE_FULL_F 1 "nonmemory_operand")
5523            (match_operand:SVE_FULL_F 2 "register_operand")]
5524           UNSPEC_COND_FDIV))]
5525   "TARGET_SVE"
5526   {
5527     if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
5528       DONE;
5529
5530     operands[1] = force_reg (<MODE>mode, operands[1]);
5531     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5532   }
5533 )
5534
5535 (define_expand "@aarch64_frecpe<mode>"
5536   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5537         (unspec:SVE_FULL_F
5538           [(match_operand:SVE_FULL_F 1 "register_operand")]
5539           UNSPEC_FRECPE))]
5540   "TARGET_SVE"
5541 )
5542
5543 (define_expand "@aarch64_frecps<mode>"
5544   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5545         (unspec:SVE_FULL_F
5546           [(match_operand:SVE_FULL_F 1 "register_operand")
5547            (match_operand:SVE_FULL_F 2 "register_operand")]
5548           UNSPEC_FRECPS))]
5549   "TARGET_SVE"
5550 )
5551
5552 ;; -------------------------------------------------------------------------
5553 ;; ---- [FP] Binary logical operations
5554 ;; -------------------------------------------------------------------------
5555 ;; Includes
5556 ;; - AND
5557 ;; - EOR
5558 ;; - ORR
5559 ;; -------------------------------------------------------------------------
5560
5561 ;; Binary logical operations on floating-point modes.  We avoid subregs
5562 ;; by providing this, but we need to use UNSPECs since rtx logical ops
5563 ;; aren't defined for floating-point modes.
5564 (define_insn "*<optab><mode>3"
5565   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5566         (unspec:SVE_FULL_F
5567           [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5568            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
5569           LOGICALF))]
5570   "TARGET_SVE"
5571   "<logicalf_op>\t%0.d, %1.d, %2.d"
5572 )
5573
5574 ;; -------------------------------------------------------------------------
5575 ;; ---- [FP] Sign copying
5576 ;; -------------------------------------------------------------------------
5577 ;; The patterns in this section are synthetic.
5578 ;; -------------------------------------------------------------------------
5579
5580 (define_expand "copysign<mode>3"
5581   [(match_operand:SVE_FULL_F 0 "register_operand")
5582    (match_operand:SVE_FULL_F 1 "register_operand")
5583    (match_operand:SVE_FULL_F 2 "register_operand")]
5584   "TARGET_SVE"
5585   {
5586     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
5587     rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
5588     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
5589     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
5590
5591     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
5592     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
5593
5594     emit_insn (gen_and<v_int_equiv>3
5595                (sign, arg2,
5596                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5597                                                    HOST_WIDE_INT_M1U
5598                                                    << bits)));
5599     emit_insn (gen_and<v_int_equiv>3
5600                (mant, arg1,
5601                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5602                                                    ~(HOST_WIDE_INT_M1U
5603                                                      << bits))));
5604     emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
5605     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
5606     DONE;
5607   }
5608 )
5609
5610 (define_expand "xorsign<mode>3"
5611   [(match_operand:SVE_FULL_F 0 "register_operand")
5612    (match_operand:SVE_FULL_F 1 "register_operand")
5613    (match_operand:SVE_FULL_F 2 "register_operand")]
5614   "TARGET_SVE"
5615   {
5616     rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
5617     rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
5618     int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
5619
5620     rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
5621     rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
5622
5623     emit_insn (gen_and<v_int_equiv>3
5624                (sign, arg2,
5625                 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
5626                                                    HOST_WIDE_INT_M1U
5627                                                    << bits)));
5628     emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
5629     emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
5630     DONE;
5631   }
5632 )
5633
5634 ;; -------------------------------------------------------------------------
5635 ;; ---- [FP] Maximum and minimum
5636 ;; -------------------------------------------------------------------------
5637 ;; Includes:
5638 ;; - FMAX
5639 ;; - FMAXNM
5640 ;; - FMIN
5641 ;; - FMINNM
5642 ;; -------------------------------------------------------------------------
5643
5644 ;; Unpredicated fmax/fmin (the libm functions).  The optabs for the
5645 ;; smin/smax rtx codes are handled in the generic section above.
5646 (define_expand "<maxmin_uns><mode>3"
5647   [(set (match_operand:SVE_FULL_F 0 "register_operand")
5648         (unspec:SVE_FULL_F
5649           [(match_dup 3)
5650            (const_int SVE_RELAXED_GP)
5651            (match_operand:SVE_FULL_F 1 "register_operand")
5652            (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")]
5653           SVE_COND_FP_MAXMIN_PUBLIC))]
5654   "TARGET_SVE"
5655   {
5656     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5657   }
5658 )
5659
5660 ;; Predicated floating-point maximum/minimum.
5661 (define_insn "@aarch64_pred_<optab><mode>"
5662   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w, ?&w")
5663         (unspec:SVE_FULL_F
5664           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
5665            (match_operand:SI 4 "aarch64_sve_gp_strictness")
5666            (match_operand:SVE_FULL_F 2 "register_operand" "%0, 0, w, w")
5667            (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand" "vsB, w, vsB, w")]
5668           SVE_COND_FP_MAXMIN))]
5669   "TARGET_SVE"
5670   "@
5671    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5672    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5673    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5674    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
5675   [(set_attr "movprfx" "*,*,yes,yes")]
5676 )
5677
5678 ;; Merging forms are handled through SVE_COND_FP_BINARY and
5679 ;; SVE_COND_FP_BINARY_I1.
5680
5681 ;; -------------------------------------------------------------------------
5682 ;; ---- [PRED] Binary logical operations
5683 ;; -------------------------------------------------------------------------
5684 ;; Includes:
5685 ;; - AND
5686 ;; - ANDS
5687 ;; - EOR
5688 ;; - EORS
5689 ;; - ORR
5690 ;; - ORRS
5691 ;; -------------------------------------------------------------------------
5692
5693 ;; Predicate AND.  We can reuse one of the inputs as the GP.
5694 ;; Doubling the second operand is the preferred implementation
5695 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
5696 (define_insn "and<mode>3"
5697   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5698         (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand" "Upa")
5699                       (match_operand:PRED_ALL 2 "register_operand" "Upa")))]
5700   "TARGET_SVE"
5701   "and\t%0.b, %1/z, %2.b, %2.b"
5702 )
5703
5704 ;; Unpredicated predicate EOR and ORR.
5705 (define_expand "<optab><mode>3"
5706   [(set (match_operand:PRED_ALL 0 "register_operand")
5707         (and:PRED_ALL
5708           (LOGICAL_OR:PRED_ALL
5709             (match_operand:PRED_ALL 1 "register_operand")
5710             (match_operand:PRED_ALL 2 "register_operand"))
5711           (match_dup 3)))]
5712   "TARGET_SVE"
5713   {
5714     operands[3] = aarch64_ptrue_reg (<MODE>mode);
5715   }
5716 )
5717
5718 ;; Predicated predicate AND, EOR and ORR.
5719 (define_insn "@aarch64_pred_<optab><mode>_z"
5720   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5721         (and:PRED_ALL
5722           (LOGICAL:PRED_ALL
5723             (match_operand:PRED_ALL 2 "register_operand" "Upa")
5724             (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5725           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5726   "TARGET_SVE"
5727   "<logical>\t%0.b, %1/z, %2.b, %3.b"
5728 )
5729
5730 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
5731 ;; the GP.  Store the result in operand 0 and set the flags in the same
5732 ;; way as for PTEST.
5733 (define_insn "*<optab><mode>3_cc"
5734   [(set (reg:CC_NZC CC_REGNUM)
5735         (unspec:CC_NZC
5736           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5737            (match_operand 4)
5738            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5739            (and:PRED_ALL
5740              (LOGICAL:PRED_ALL
5741                (match_operand:PRED_ALL 2 "register_operand" "Upa")
5742                (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5743              (match_dup 4))]
5744           UNSPEC_PTEST))
5745    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5746         (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
5747                       (match_dup 4)))]
5748   "TARGET_SVE"
5749   "<logical>s\t%0.b, %1/z, %2.b, %3.b"
5750 )
5751
5752 ;; Same with just the flags result.
5753 (define_insn "*<optab><mode>3_ptest"
5754   [(set (reg:CC_NZC CC_REGNUM)
5755         (unspec:CC_NZC
5756           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5757            (match_operand 4)
5758            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5759            (and:PRED_ALL
5760              (LOGICAL:PRED_ALL
5761                (match_operand:PRED_ALL 2 "register_operand" "Upa")
5762                (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5763              (match_dup 4))]
5764           UNSPEC_PTEST))
5765    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5766   "TARGET_SVE"
5767   "<logical>s\t%0.b, %1/z, %2.b, %3.b"
5768 )
5769
5770 ;; -------------------------------------------------------------------------
5771 ;; ---- [PRED] Binary logical operations (inverted second input)
5772 ;; -------------------------------------------------------------------------
5773 ;; Includes:
5774 ;; - BIC
5775 ;; - ORN
5776 ;; -------------------------------------------------------------------------
5777
5778 ;; Predicated predicate BIC and ORN.
5779 (define_insn "aarch64_pred_<nlogical><mode>_z"
5780   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5781         (and:PRED_ALL
5782           (NLOGICAL:PRED_ALL
5783             (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5784             (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5785           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5786   "TARGET_SVE"
5787   "<nlogical>\t%0.b, %1/z, %2.b, %3.b"
5788 )
5789
5790 ;; Same, but set the flags as a side-effect.
5791 (define_insn "*<nlogical><mode>3_cc"
5792   [(set (reg:CC_NZC CC_REGNUM)
5793         (unspec:CC_NZC
5794           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5795            (match_operand 4)
5796            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5797            (and:PRED_ALL
5798              (NLOGICAL:PRED_ALL
5799                (not:PRED_ALL
5800                  (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5801                (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5802              (match_dup 4))]
5803           UNSPEC_PTEST))
5804    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5805         (and:PRED_ALL (NLOGICAL:PRED_ALL
5806                         (not:PRED_ALL (match_dup 3))
5807                         (match_dup 2))
5808                       (match_dup 4)))]
5809   "TARGET_SVE"
5810   "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
5811 )
5812
5813 ;; Same with just the flags result.
5814 (define_insn "*<nlogical><mode>3_ptest"
5815   [(set (reg:CC_NZC CC_REGNUM)
5816         (unspec:CC_NZC
5817           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5818            (match_operand 4)
5819            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5820            (and:PRED_ALL
5821              (NLOGICAL:PRED_ALL
5822                (not:PRED_ALL
5823                  (match_operand:PRED_ALL 3 "register_operand" "Upa"))
5824                (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5825              (match_dup 4))]
5826           UNSPEC_PTEST))
5827    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5828   "TARGET_SVE"
5829   "<nlogical>s\t%0.b, %1/z, %2.b, %3.b"
5830 )
5831
5832 ;; -------------------------------------------------------------------------
5833 ;; ---- [PRED] Binary logical operations (inverted result)
5834 ;; -------------------------------------------------------------------------
5835 ;; Includes:
5836 ;; - NAND
5837 ;; - NOR
5838 ;; -------------------------------------------------------------------------
5839
5840 ;; Predicated predicate NAND and NOR.
5841 (define_insn "aarch64_pred_<logical_nn><mode>_z"
5842   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5843         (and:PRED_ALL
5844           (NLOGICAL:PRED_ALL
5845             (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5846             (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5847           (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
5848   "TARGET_SVE"
5849   "<logical_nn>\t%0.b, %1/z, %2.b, %3.b"
5850 )
5851
5852 ;; Same, but set the flags as a side-effect.
5853 (define_insn "*<logical_nn><mode>3_cc"
5854   [(set (reg:CC_NZC CC_REGNUM)
5855         (unspec:CC_NZC
5856           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5857            (match_operand 4)
5858            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5859            (and:PRED_ALL
5860              (NLOGICAL:PRED_ALL
5861                (not:PRED_ALL
5862                  (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5863                (not:PRED_ALL
5864                  (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5865              (match_dup 4))]
5866           UNSPEC_PTEST))
5867    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
5868         (and:PRED_ALL (NLOGICAL:PRED_ALL
5869                         (not:PRED_ALL (match_dup 2))
5870                         (not:PRED_ALL (match_dup 3)))
5871                       (match_dup 4)))]
5872   "TARGET_SVE"
5873   "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
5874 )
5875
5876 ;; Same with just the flags result.
5877 (define_insn "*<logical_nn><mode>3_ptest"
5878   [(set (reg:CC_NZC CC_REGNUM)
5879         (unspec:CC_NZC
5880           [(match_operand:VNx16BI 1 "register_operand" "Upa")
5881            (match_operand 4)
5882            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
5883            (and:PRED_ALL
5884              (NLOGICAL:PRED_ALL
5885                (not:PRED_ALL
5886                  (match_operand:PRED_ALL 2 "register_operand" "Upa"))
5887                (not:PRED_ALL
5888                  (match_operand:PRED_ALL 3 "register_operand" "Upa")))
5889              (match_dup 4))]
5890           UNSPEC_PTEST))
5891    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
5892   "TARGET_SVE"
5893   "<logical_nn>s\t%0.b, %1/z, %2.b, %3.b"
5894 )
5895
5896 ;; =========================================================================
5897 ;; == Ternary arithmetic
5898 ;; =========================================================================
5899
5900 ;; -------------------------------------------------------------------------
5901 ;; ---- [INT] MLA and MAD
5902 ;; -------------------------------------------------------------------------
5903 ;; Includes:
5904 ;; - MAD
5905 ;; - MLA
5906 ;; -------------------------------------------------------------------------
5907
5908 ;; Unpredicated integer addition of product.
5909 (define_expand "fma<mode>4"
5910   [(set (match_operand:SVE_FULL_I 0 "register_operand")
5911         (plus:SVE_FULL_I
5912           (unspec:SVE_FULL_I
5913             [(match_dup 4)
5914              (mult:SVE_FULL_I
5915                (match_operand:SVE_FULL_I 1 "register_operand")
5916                (match_operand:SVE_FULL_I 2 "nonmemory_operand"))]
5917             UNSPEC_PRED_X)
5918           (match_operand:SVE_FULL_I 3 "register_operand")))]
5919   "TARGET_SVE"
5920   {
5921     if (aarch64_prepare_sve_int_fma (operands, PLUS))
5922       DONE;
5923     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
5924   }
5925 )
5926
5927 ;; Predicated integer addition of product.
5928 (define_insn "@aarch64_pred_fma<mode>"
5929   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
5930         (plus:SVE_FULL_I
5931           (unspec:SVE_FULL_I
5932             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
5933              (mult:SVE_FULL_I
5934                (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
5935                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
5936             UNSPEC_PRED_X)
5937           (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")))]
5938   "TARGET_SVE"
5939   "@
5940    mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5941    mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
5942    movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
5943   [(set_attr "movprfx" "*,*,yes")]
5944 )
5945
5946 ;; Predicated integer addition of product with merging.
5947 (define_expand "cond_fma<mode>"
5948   [(set (match_operand:SVE_FULL_I 0 "register_operand")
5949         (unspec:SVE_FULL_I
5950           [(match_operand:<VPRED> 1 "register_operand")
5951            (plus:SVE_FULL_I
5952              (mult:SVE_FULL_I
5953                (match_operand:SVE_FULL_I 2 "register_operand")
5954                (match_operand:SVE_FULL_I 3 "general_operand"))
5955              (match_operand:SVE_FULL_I 4 "register_operand"))
5956            (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
5957           UNSPEC_SEL))]
5958   "TARGET_SVE"
5959   {
5960     if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
5961       DONE;
5962     /* Swap the multiplication operands if the fallback value is the
5963        second of the two.  */
5964     if (rtx_equal_p (operands[3], operands[5]))
5965       std::swap (operands[2], operands[3]);
5966   }
5967 )
5968
5969 ;; Predicated integer addition of product, merging with the first input.
5970 (define_insn "*cond_fma<mode>_2"
5971   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
5972         (unspec:SVE_FULL_I
5973           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5974            (plus:SVE_FULL_I
5975              (mult:SVE_FULL_I
5976                (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
5977                (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
5978              (match_operand:SVE_FULL_I 4 "register_operand" "w, w"))
5979            (match_dup 2)]
5980           UNSPEC_SEL))]
5981   "TARGET_SVE"
5982   "@
5983    mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
5984    movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
5985   [(set_attr "movprfx" "*,yes")]
5986 )
5987
5988 ;; Predicated integer addition of product, merging with the third input.
5989 (define_insn "*cond_fma<mode>_4"
5990   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
5991         (unspec:SVE_FULL_I
5992           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
5993            (plus:SVE_FULL_I
5994              (mult:SVE_FULL_I
5995                (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
5996                (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))
5997              (match_operand:SVE_FULL_I 4 "register_operand" "0, w"))
5998            (match_dup 4)]
5999           UNSPEC_SEL))]
6000   "TARGET_SVE"
6001   "@
6002    mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6003    movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6004   [(set_attr "movprfx" "*,yes")]
6005 )
6006
6007 ;; Predicated integer addition of product, merging with an independent value.
6008 (define_insn_and_rewrite "*cond_fma<mode>_any"
6009   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6010         (unspec:SVE_FULL_I
6011           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6012            (plus:SVE_FULL_I
6013              (mult:SVE_FULL_I
6014                (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w")
6015                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w"))
6016              (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w"))
6017            (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6018           UNSPEC_SEL))]
6019   "TARGET_SVE
6020    && !rtx_equal_p (operands[2], operands[5])
6021    && !rtx_equal_p (operands[3], operands[5])
6022    && !rtx_equal_p (operands[4], operands[5])"
6023   "@
6024    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6025    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6026    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6027    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6028    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6029    #"
6030   "&& reload_completed
6031    && register_operand (operands[5], <MODE>mode)
6032    && !rtx_equal_p (operands[0], operands[5])"
6033   {
6034     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6035                                              operands[5], operands[1]));
6036     operands[5] = operands[4] = operands[0];
6037   }
6038   [(set_attr "movprfx" "yes")]
6039 )
6040
6041 ;; -------------------------------------------------------------------------
6042 ;; ---- [INT] MLS and MSB
6043 ;; -------------------------------------------------------------------------
6044 ;; Includes:
6045 ;; - MLS
6046 ;; - MSB
6047 ;; -------------------------------------------------------------------------
6048
6049 ;; Unpredicated integer subtraction of product.
6050 (define_expand "fnma<mode>4"
6051   [(set (match_operand:SVE_FULL_I 0 "register_operand")
6052         (minus:SVE_FULL_I
6053           (match_operand:SVE_FULL_I 3 "register_operand")
6054           (unspec:SVE_FULL_I
6055             [(match_dup 4)
6056              (mult:SVE_FULL_I
6057                (match_operand:SVE_FULL_I 1 "register_operand")
6058                (match_operand:SVE_FULL_I 2 "general_operand"))]
6059             UNSPEC_PRED_X)))]
6060   "TARGET_SVE"
6061   {
6062     if (aarch64_prepare_sve_int_fma (operands, MINUS))
6063       DONE;
6064     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6065   }
6066 )
6067
6068 ;; Predicated integer subtraction of product.
6069 (define_insn "@aarch64_pred_fnma<mode>"
6070   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, w, ?&w")
6071         (minus:SVE_FULL_I
6072           (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w")
6073           (unspec:SVE_FULL_I
6074             [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6075              (mult:SVE_FULL_I
6076                (match_operand:SVE_FULL_I 2 "register_operand" "%0, w, w")
6077                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w"))]
6078             UNSPEC_PRED_X)))]
6079   "TARGET_SVE"
6080   "@
6081    msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6082    mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6083    movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6084   [(set_attr "movprfx" "*,*,yes")]
6085 )
6086
6087 ;; Predicated integer subtraction of product with merging.
6088 (define_expand "cond_fnma<mode>"
6089   [(set (match_operand:SVE_FULL_I 0 "register_operand")
6090    (unspec:SVE_FULL_I
6091         [(match_operand:<VPRED> 1 "register_operand")
6092          (minus:SVE_FULL_I
6093            (match_operand:SVE_FULL_I 4 "register_operand")
6094            (mult:SVE_FULL_I
6095              (match_operand:SVE_FULL_I 2 "register_operand")
6096              (match_operand:SVE_FULL_I 3 "general_operand")))
6097          (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero")]
6098         UNSPEC_SEL))]
6099   "TARGET_SVE"
6100   {
6101     if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
6102       DONE;
6103     /* Swap the multiplication operands if the fallback value is the
6104        second of the two.  */
6105     if (rtx_equal_p (operands[3], operands[5]))
6106       std::swap (operands[2], operands[3]);
6107   }
6108 )
6109
6110 ;; Predicated integer subtraction of product, merging with the first input.
6111 (define_insn "*cond_fnma<mode>_2"
6112   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6113         (unspec:SVE_FULL_I
6114           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6115            (minus:SVE_FULL_I
6116              (match_operand:SVE_FULL_I 4 "register_operand" "w, w")
6117              (mult:SVE_FULL_I
6118                (match_operand:SVE_FULL_I 2 "register_operand" "0, w")
6119                (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))
6120            (match_dup 2)]
6121           UNSPEC_SEL))]
6122   "TARGET_SVE"
6123   "@
6124    msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6125    movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6126   [(set_attr "movprfx" "*,yes")]
6127 )
6128
6129 ;; Predicated integer subtraction of product, merging with the third input.
6130 (define_insn "*cond_fnma<mode>_4"
6131   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
6132         (unspec:SVE_FULL_I
6133           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6134            (minus:SVE_FULL_I
6135              (match_operand:SVE_FULL_I 4 "register_operand" "0, w")
6136              (mult:SVE_FULL_I
6137                (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6138                (match_operand:SVE_FULL_I 3 "register_operand" "w, w")))
6139            (match_dup 4)]
6140           UNSPEC_SEL))]
6141   "TARGET_SVE"
6142   "@
6143    mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6144    movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6145   [(set_attr "movprfx" "*,yes")]
6146 )
6147
6148 ;; Predicated integer subtraction of product, merging with an
6149 ;; independent value.
6150 (define_insn_and_rewrite "*cond_fnma<mode>_any"
6151   [(set (match_operand:SVE_FULL_I 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6152         (unspec:SVE_FULL_I
6153           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6154            (minus:SVE_FULL_I
6155              (match_operand:SVE_FULL_I 4 "register_operand" "w, 0, w, w, w, w")
6156              (mult:SVE_FULL_I
6157                (match_operand:SVE_FULL_I 2 "register_operand" "w, w, 0, w, w, w")
6158                (match_operand:SVE_FULL_I 3 "register_operand" "w, w, w, 0, w, w")))
6159            (match_operand:SVE_FULL_I 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6160           UNSPEC_SEL))]
6161   "TARGET_SVE
6162    && !rtx_equal_p (operands[2], operands[5])
6163    && !rtx_equal_p (operands[3], operands[5])
6164    && !rtx_equal_p (operands[4], operands[5])"
6165   "@
6166    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6167    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6168    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6169    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6170    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6171    #"
6172   "&& reload_completed
6173    && register_operand (operands[5], <MODE>mode)
6174    && !rtx_equal_p (operands[0], operands[5])"
6175   {
6176     emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6177                                              operands[5], operands[1]));
6178     operands[5] = operands[4] = operands[0];
6179   }
6180   [(set_attr "movprfx" "yes")]
6181 )
6182
6183 ;; -------------------------------------------------------------------------
6184 ;; ---- [INT] Dot product
6185 ;; -------------------------------------------------------------------------
6186 ;; Includes:
6187 ;; - SDOT
6188 ;; - SUDOT   (I8MM)
6189 ;; - UDOT
6190 ;; - USDOT   (I8MM)
6191 ;; -------------------------------------------------------------------------
6192
6193 ;; Four-element integer dot-product with accumulation.
6194 (define_insn "<sur>dot_prod<vsi2qi>"
6195   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6196         (plus:SVE_FULL_SDI
6197           (unspec:SVE_FULL_SDI
6198             [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6199              (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6200             DOTPROD)
6201           (match_operand:SVE_FULL_SDI 3 "register_operand" "0, w")))]
6202   "TARGET_SVE"
6203   "@
6204    <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
6205    movprfx\t%0, %3\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>"
6206   [(set_attr "movprfx" "*,yes")]
6207 )
6208
6209 ;; Four-element integer dot-product by selected lanes with accumulation.
6210 (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6211   [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w, ?&w")
6212         (plus:SVE_FULL_SDI
6213           (unspec:SVE_FULL_SDI
6214             [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6215              (unspec:<VSI2QI>
6216                [(match_operand:<VSI2QI> 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6217                 (match_operand:SI 3 "const_int_operand")]
6218                UNSPEC_SVE_LANE_SELECT)]
6219             DOTPROD)
6220           (match_operand:SVE_FULL_SDI 4 "register_operand" "0, w")))]
6221   "TARGET_SVE"
6222   "@
6223    <sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]
6224    movprfx\t%0, %4\;<sur>dot\\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>[%3]"
6225   [(set_attr "movprfx" "*,yes")]
6226 )
6227
6228 (define_insn "@aarch64_<sur>dot_prod<vsi2qi>"
6229   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6230         (plus:VNx4SI_ONLY
6231           (unspec:VNx4SI_ONLY
6232             [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6233              (match_operand:<VSI2QI> 2 "register_operand" "w, w")]
6234             DOTPROD_US_ONLY)
6235           (match_operand:VNx4SI_ONLY 3 "register_operand" "0, w")))]
6236   "TARGET_SVE_I8MM"
6237   "@
6238    <sur>dot\\t%0.s, %1.b, %2.b
6239    movprfx\t%0, %3\;<sur>dot\\t%0.s, %1.b, %2.b"
6240    [(set_attr "movprfx" "*,yes")]
6241 )
6242
6243 (define_insn "@aarch64_<sur>dot_prod_lane<vsi2qi>"
6244   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6245         (plus:VNx4SI_ONLY
6246           (unspec:VNx4SI_ONLY
6247             [(match_operand:<VSI2QI> 1 "register_operand" "w, w")
6248              (unspec:<VSI2QI>
6249                [(match_operand:<VSI2QI> 2 "register_operand" "y, y")
6250                 (match_operand:SI 3 "const_int_operand")]
6251                UNSPEC_SVE_LANE_SELECT)]
6252             DOTPROD_I8MM)
6253           (match_operand:VNx4SI_ONLY 4 "register_operand" "0, w")))]
6254   "TARGET_SVE_I8MM"
6255   "@
6256    <sur>dot\\t%0.s, %1.b, %2.b[%3]
6257    movprfx\t%0, %4\;<sur>dot\\t%0.s, %1.b, %2.b[%3]"
6258   [(set_attr "movprfx" "*,yes")]
6259 )
6260
6261 ;; -------------------------------------------------------------------------
6262 ;; ---- [INT] Sum of absolute differences
6263 ;; -------------------------------------------------------------------------
6264 ;; The patterns in this section are synthetic.
6265 ;; -------------------------------------------------------------------------
6266
6267 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
6268 ;; operands 1 and 2.  The sequence also has to perform a widening reduction of
6269 ;; the difference into a vector and accumulate that into operand 3 before
6270 ;; copying that into the result operand 0.
6271 ;; Perform that with a sequence of:
6272 ;; MOV          ones.b, #1
6273 ;; [SU]ABD      diff.b, p0/m, op1.b, op2.b
6274 ;; MOVPRFX      op0, op3        // If necessary
6275 ;; UDOT         op0.s, diff.b, ones.b
6276 (define_expand "<sur>sad<vsi2qi>"
6277   [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
6278    (unspec:<VSI2QI> [(use (match_operand:<VSI2QI> 1 "register_operand"))
6279                     (use (match_operand:<VSI2QI> 2 "register_operand"))] ABAL)
6280    (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
6281   "TARGET_SVE"
6282   {
6283     rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
6284     rtx diff = gen_reg_rtx (<VSI2QI>mode);
6285     emit_insn (gen_<sur>abd<vsi2qi>_3 (diff, operands[1], operands[2]));
6286     emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
6287     DONE;
6288   }
6289 )
6290
6291 ;; -------------------------------------------------------------------------
6292 ;; ---- [INT] Matrix multiply-accumulate
6293 ;; -------------------------------------------------------------------------
6294 ;; Includes:
6295 ;; - SMMLA (I8MM)
6296 ;; - UMMLA (I8MM)
6297 ;; - USMMLA (I8MM)
6298 ;; -------------------------------------------------------------------------
6299
6300 (define_insn "@aarch64_sve_add_<optab><vsi2qi>"
6301   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w, ?&w")
6302         (plus:VNx4SI_ONLY
6303           (unspec:VNx4SI_ONLY
6304             [(match_operand:<VSI2QI> 2 "register_operand" "w, w")
6305              (match_operand:<VSI2QI> 3 "register_operand" "w, w")]
6306             MATMUL)
6307           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
6308   "TARGET_SVE_I8MM"
6309   "@
6310    <sur>mmla\\t%0.s, %2.b, %3.b
6311    movprfx\t%0, %1\;<sur>mmla\\t%0.s, %2.b, %3.b"
6312   [(set_attr "movprfx" "*,yes")]
6313 )
6314
6315 ;; -------------------------------------------------------------------------
6316 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
6317 ;; -------------------------------------------------------------------------
6318 ;; Includes merging patterns for:
6319 ;; - FMAD
6320 ;; - FMLA
6321 ;; - FMLS
6322 ;; - FMSB
6323 ;; - FNMAD
6324 ;; - FNMLA
6325 ;; - FNMLS
6326 ;; - FNMSB
6327 ;; -------------------------------------------------------------------------
6328
6329 ;; Unpredicated floating-point ternary operations.
6330 (define_expand "<optab><mode>4"
6331   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6332         (unspec:SVE_FULL_F
6333           [(match_dup 4)
6334            (const_int SVE_RELAXED_GP)
6335            (match_operand:SVE_FULL_F 1 "register_operand")
6336            (match_operand:SVE_FULL_F 2 "register_operand")
6337            (match_operand:SVE_FULL_F 3 "register_operand")]
6338           SVE_COND_FP_TERNARY))]
6339   "TARGET_SVE"
6340   {
6341     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
6342   }
6343 )
6344
6345 ;; Predicated floating-point ternary operations.
6346 (define_insn "@aarch64_pred_<optab><mode>"
6347   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, w, ?&w")
6348         (unspec:SVE_FULL_F
6349           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
6350            (match_operand:SI 5 "aarch64_sve_gp_strictness")
6351            (match_operand:SVE_FULL_F 2 "register_operand" "%w, 0, w")
6352            (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w")
6353            (match_operand:SVE_FULL_F 4 "register_operand" "0, w, w")]
6354           SVE_COND_FP_TERNARY))]
6355   "TARGET_SVE"
6356   "@
6357    <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6358    <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6359    movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6360   [(set_attr "movprfx" "*,*,yes")]
6361 )
6362
6363 ;; Predicated floating-point ternary operations with merging.
6364 (define_expand "@cond_<optab><mode>"
6365   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6366         (unspec:SVE_FULL_F
6367           [(match_operand:<VPRED> 1 "register_operand")
6368            (unspec:SVE_FULL_F
6369              [(match_dup 1)
6370               (const_int SVE_STRICT_GP)
6371               (match_operand:SVE_FULL_F 2 "register_operand")
6372               (match_operand:SVE_FULL_F 3 "register_operand")
6373               (match_operand:SVE_FULL_F 4 "register_operand")]
6374              SVE_COND_FP_TERNARY)
6375            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
6376           UNSPEC_SEL))]
6377   "TARGET_SVE"
6378 {
6379   /* Swap the multiplication operands if the fallback value is the
6380      second of the two.  */
6381   if (rtx_equal_p (operands[3], operands[5]))
6382     std::swap (operands[2], operands[3]);
6383 })
6384
6385 ;; Predicated floating-point ternary operations, merging with the
6386 ;; first input.
6387 (define_insn_and_rewrite "*cond_<optab><mode>_2"
6388   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6389         (unspec:SVE_FULL_F
6390           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6391            (unspec:SVE_FULL_F
6392              [(match_operand 5)
6393               (match_operand:SI 6 "aarch64_sve_gp_strictness")
6394               (match_operand:SVE_FULL_F 2 "register_operand" "0, w")
6395               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6396               (match_operand:SVE_FULL_F 4 "register_operand" "w, w")]
6397              SVE_COND_FP_TERNARY)
6398            (match_dup 2)]
6399           UNSPEC_SEL))]
6400   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6401   "@
6402    <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6403    movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
6404   "&& !rtx_equal_p (operands[1], operands[5])"
6405   {
6406     operands[5] = copy_rtx (operands[1]);
6407   }
6408   [(set_attr "movprfx" "*,yes")]
6409 )
6410
6411 ;; Predicated floating-point ternary operations, merging with the
6412 ;; third input.
6413 (define_insn_and_rewrite "*cond_<optab><mode>_4"
6414   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6415         (unspec:SVE_FULL_F
6416           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6417            (unspec:SVE_FULL_F
6418              [(match_operand 5)
6419               (match_operand:SI 6 "aarch64_sve_gp_strictness")
6420               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6421               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6422               (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6423              SVE_COND_FP_TERNARY)
6424            (match_dup 4)]
6425           UNSPEC_SEL))]
6426   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6427   "@
6428    <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6429    movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
6430   "&& !rtx_equal_p (operands[1], operands[5])"
6431   {
6432     operands[5] = copy_rtx (operands[1]);
6433   }
6434   [(set_attr "movprfx" "*,yes")]
6435 )
6436
6437 ;; Predicated floating-point ternary operations, merging with an
6438 ;; independent value.
6439 (define_insn_and_rewrite "*cond_<optab><mode>_any"
6440   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, &w, &w, ?&w")
6441         (unspec:SVE_FULL_F
6442           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6443            (unspec:SVE_FULL_F
6444              [(match_operand 6)
6445               (match_operand:SI 7 "aarch64_sve_gp_strictness")
6446               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, 0, w, w, w")
6447               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, 0, w, w")
6448               (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w, w, w")]
6449              SVE_COND_FP_TERNARY)
6450            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, Dz, Dz, 0, w")]
6451           UNSPEC_SEL))]
6452   "TARGET_SVE
6453    && !rtx_equal_p (operands[2], operands[5])
6454    && !rtx_equal_p (operands[3], operands[5])
6455    && !rtx_equal_p (operands[4], operands[5])
6456    && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
6457   "@
6458    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6459    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6460    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
6461    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
6462    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
6463    #"
6464   "&& 1"
6465   {
6466     if (reload_completed
6467         && register_operand (operands[5], <MODE>mode)
6468         && !rtx_equal_p (operands[0], operands[5]))
6469       {
6470         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6471                                                  operands[5], operands[1]));
6472         operands[5] = operands[4] = operands[0];
6473       }
6474     else if (!rtx_equal_p (operands[1], operands[6]))
6475       operands[6] = copy_rtx (operands[1]);
6476     else
6477       FAIL;
6478   }
6479   [(set_attr "movprfx" "yes")]
6480 )
6481
6482 ;; Unpredicated FMLA and FMLS by selected lanes.  It doesn't seem worth using
6483 ;; (fma ...) since target-independent code won't understand the indexing.
6484 (define_insn "@aarch64_<optab>_lane_<mode>"
6485   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6486         (unspec:SVE_FULL_F
6487           [(match_operand:SVE_FULL_F 1 "register_operand" "w, w")
6488            (unspec:SVE_FULL_F
6489              [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>, <sve_lane_con>")
6490               (match_operand:SI 3 "const_int_operand")]
6491              UNSPEC_SVE_LANE_SELECT)
6492            (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6493           SVE_FP_TERNARY_LANE))]
6494   "TARGET_SVE"
6495   "@
6496    <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
6497    movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
6498   [(set_attr "movprfx" "*,yes")]
6499 )
6500
6501 ;; -------------------------------------------------------------------------
6502 ;; ---- [FP] Complex multiply-add
6503 ;; -------------------------------------------------------------------------
6504 ;; Includes merging patterns for:
6505 ;; - FCMLA
6506 ;; -------------------------------------------------------------------------
6507
6508 ;; Predicated FCMLA.
6509 (define_insn "@aarch64_pred_<optab><mode>"
6510   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6511         (unspec:SVE_FULL_F
6512           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6513            (match_operand:SI 5 "aarch64_sve_gp_strictness")
6514            (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6515            (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6516            (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6517           SVE_COND_FCMLA))]
6518   "TARGET_SVE"
6519   "@
6520    fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6521    movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
6522   [(set_attr "movprfx" "*,yes")]
6523 )
6524
6525 ;; Predicated FCMLA with merging.
6526 (define_expand "@cond_<optab><mode>"
6527   [(set (match_operand:SVE_FULL_F 0 "register_operand")
6528         (unspec:SVE_FULL_F
6529           [(match_operand:<VPRED> 1 "register_operand")
6530            (unspec:SVE_FULL_F
6531              [(match_dup 1)
6532               (const_int SVE_STRICT_GP)
6533               (match_operand:SVE_FULL_F 2 "register_operand")
6534               (match_operand:SVE_FULL_F 3 "register_operand")
6535               (match_operand:SVE_FULL_F 4 "register_operand")]
6536              SVE_COND_FCMLA)
6537            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
6538           UNSPEC_SEL))]
6539   "TARGET_SVE"
6540 )
6541
6542 ;; Predicated FCMLA, merging with the third input.
6543 (define_insn_and_rewrite "*cond_<optab><mode>_4"
6544   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6545         (unspec:SVE_FULL_F
6546           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6547            (unspec:SVE_FULL_F
6548              [(match_operand 5)
6549               (match_operand:SI 6 "aarch64_sve_gp_strictness")
6550               (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6551               (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
6552               (match_operand:SVE_FULL_F 4 "register_operand" "0, w")]
6553              SVE_COND_FCMLA)
6554            (match_dup 4)]
6555           UNSPEC_SEL))]
6556   "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
6557   "@
6558    fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6559    movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>"
6560   "&& !rtx_equal_p (operands[1], operands[5])"
6561   {
6562     operands[5] = copy_rtx (operands[1]);
6563   }
6564   [(set_attr "movprfx" "*,yes")]
6565 )
6566
6567 ;; Predicated FCMLA, merging with an independent value.
6568 (define_insn_and_rewrite "*cond_<optab><mode>_any"
6569   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, &w, ?&w")
6570         (unspec:SVE_FULL_F
6571           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
6572            (unspec:SVE_FULL_F
6573              [(match_operand 6)
6574               (match_operand:SI 7 "aarch64_sve_gp_strictness")
6575               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w, w")
6576               (match_operand:SVE_FULL_F 3 "register_operand" "w, w, w, w")
6577               (match_operand:SVE_FULL_F 4 "register_operand" "w, 0, w, w")]
6578              SVE_COND_FCMLA)
6579            (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero" "Dz, Dz, 0, w")]
6580           UNSPEC_SEL))]
6581   "TARGET_SVE
6582    && !rtx_equal_p (operands[4], operands[5])
6583    && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
6584   "@
6585    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6586    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6587    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
6588    #"
6589   "&& 1"
6590   {
6591     if (reload_completed
6592         && register_operand (operands[5], <MODE>mode)
6593         && !rtx_equal_p (operands[0], operands[5]))
6594       {
6595         emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
6596                                                  operands[5], operands[1]));
6597         operands[5] = operands[4] = operands[0];
6598       }
6599     else if (!rtx_equal_p (operands[1], operands[6]))
6600       operands[6] = copy_rtx (operands[1]);
6601     else
6602       FAIL;
6603   }
6604   [(set_attr "movprfx" "yes")]
6605 )
6606
6607 ;; Unpredicated FCMLA with indexing.
6608 (define_insn "@aarch64_<optab>_lane_<mode>"
6609   [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w")
6610         (unspec:SVE_FULL_HSF
6611           [(match_operand:SVE_FULL_HSF 1 "register_operand" "w, w")
6612            (unspec:SVE_FULL_HSF
6613              [(match_operand:SVE_FULL_HSF 2 "register_operand" "<sve_lane_pair_con>, <sve_lane_pair_con>")
6614               (match_operand:SI 3 "const_int_operand")]
6615              UNSPEC_SVE_LANE_SELECT)
6616            (match_operand:SVE_FULL_HSF 4 "register_operand" "0, w")]
6617           FCMLA))]
6618   "TARGET_SVE"
6619   "@
6620    fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
6621    movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>"
6622   [(set_attr "movprfx" "*,yes")]
6623 )
6624
6625 ;; -------------------------------------------------------------------------
6626 ;; ---- [FP] Trigonometric multiply-add
6627 ;; -------------------------------------------------------------------------
6628 ;; Includes:
6629 ;; - FTMAD
6630 ;; -------------------------------------------------------------------------
6631
6632 (define_insn "@aarch64_sve_tmad<mode>"
6633   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w, ?&w")
6634         (unspec:SVE_FULL_F
6635           [(match_operand:SVE_FULL_F 1 "register_operand" "0, w")
6636            (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
6637            (match_operand:DI 3 "const_int_operand")]
6638           UNSPEC_FTMAD))]
6639   "TARGET_SVE"
6640   "@
6641    ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
6642    movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3"
6643   [(set_attr "movprfx" "*,yes")]
6644 )
6645
6646 ;; -------------------------------------------------------------------------
6647 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
6648 ;; -------------------------------------------------------------------------
6649 ;; Includes:
6650 ;; - BFDOT (BF16)
6651 ;; - BFMLALB (BF16)
6652 ;; - BFMLALT (BF16)
6653 ;; - BFMMLA (BF16)
6654 ;; -------------------------------------------------------------------------
6655
6656 (define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
6657   [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
6658         (unspec:VNx4SF
6659           [(match_operand:VNx4SF 1 "register_operand" "0, w")
6660            (match_operand:VNx8BF 2 "register_operand" "w, w")
6661            (match_operand:VNx8BF 3 "register_operand" "w, w")]
6662           SVE_BFLOAT_TERNARY_LONG))]
6663   "TARGET_SVE_BF16"
6664   "@
6665    <sve_fp_op>\t%0.s, %2.h, %3.h
6666    movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h"
6667   [(set_attr "movprfx" "*,yes")]
6668 )
6669
6670 ;; The immediate range is enforced before generating the instruction.
6671 (define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
6672   [(set (match_operand:VNx4SF 0 "register_operand" "=w, ?&w")
6673         (unspec:VNx4SF
6674           [(match_operand:VNx4SF 1 "register_operand" "0, w")
6675            (match_operand:VNx8BF 2 "register_operand" "w, w")
6676            (match_operand:VNx8BF 3 "register_operand" "y, y")
6677            (match_operand:SI 4 "const_int_operand")]
6678           SVE_BFLOAT_TERNARY_LONG_LANE))]
6679   "TARGET_SVE_BF16"
6680   "@
6681    <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
6682    movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]"
6683   [(set_attr "movprfx" "*,yes")]
6684 )
6685
6686 ;; -------------------------------------------------------------------------
6687 ;; ---- [FP] Matrix multiply-accumulate
6688 ;; -------------------------------------------------------------------------
6689 ;; Includes:
6690 ;; - FMMLA (F32MM,F64MM)
6691 ;; -------------------------------------------------------------------------
6692
6693 ;; The mode iterator enforces the target requirements.
6694 (define_insn "@aarch64_sve_<sve_fp_op><mode>"
6695   [(set (match_operand:SVE_MATMULF 0 "register_operand" "=w, ?&w")
6696         (unspec:SVE_MATMULF
6697           [(match_operand:SVE_MATMULF 2 "register_operand" "w, w")
6698            (match_operand:SVE_MATMULF 3 "register_operand" "w, w")
6699            (match_operand:SVE_MATMULF 1 "register_operand" "0, w")]
6700           FMMLA))]
6701   "TARGET_SVE"
6702   "@
6703    <sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
6704    movprfx\t%0, %1\;<sve_fp_op>\\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
6705   [(set_attr "movprfx" "*,yes")]
6706 )
6707
6708 ;; =========================================================================
6709 ;; == Comparisons and selects
6710 ;; =========================================================================
6711
6712 ;; -------------------------------------------------------------------------
6713 ;; ---- [INT,FP] Select based on predicates
6714 ;; -------------------------------------------------------------------------
6715 ;; Includes merging patterns for:
6716 ;; - FMOV
6717 ;; - MOV
6718 ;; - SEL
6719 ;; -------------------------------------------------------------------------
6720
6721 ;; vcond_mask operand order: true, false, mask
6722 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
6723 ;; SEL operand order:        mask, true, false
6724 (define_expand "@vcond_mask_<mode><vpred>"
6725   [(set (match_operand:SVE_FULL 0 "register_operand")
6726         (unspec:SVE_FULL
6727           [(match_operand:<VPRED> 3 "register_operand")
6728            (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm")
6729            (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero")]
6730           UNSPEC_SEL))]
6731   "TARGET_SVE"
6732   {
6733     if (register_operand (operands[1], <MODE>mode))
6734       operands[2] = force_reg (<MODE>mode, operands[2]);
6735   }
6736 )
6737
6738 ;; Selects between:
6739 ;; - two registers
6740 ;; - a duplicated immediate and a register
6741 ;; - a duplicated immediate and zero
6742 (define_insn "*vcond_mask_<mode><vpred>"
6743   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w, w, w, ?w, ?&w, ?&w")
6744         (unspec:SVE_FULL
6745           [(match_operand:<VPRED> 3 "register_operand" "Upa, Upa, Upa, Upa, Upl, Upl, Upl")
6746            (match_operand:SVE_FULL 1 "aarch64_sve_reg_or_dup_imm" "w, vss, vss, Ufc, Ufc, vss, Ufc")
6747            (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "w, 0, Dz, 0, Dz, w, w")]
6748           UNSPEC_SEL))]
6749   "TARGET_SVE
6750    && (!register_operand (operands[1], <MODE>mode)
6751        || register_operand (operands[2], <MODE>mode))"
6752   "@
6753    sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
6754    mov\t%0.<Vetype>, %3/m, #%I1
6755    mov\t%0.<Vetype>, %3/z, #%I1
6756    fmov\t%0.<Vetype>, %3/m, #%1
6757    movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
6758    movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
6759    movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1"
6760   [(set_attr "movprfx" "*,*,*,*,yes,yes,yes")]
6761 )
6762
6763 ;; Optimize selects between a duplicated scalar variable and another vector,
6764 ;; the latter of which can be a zero constant or a variable.  Treat duplicates
6765 ;; of GPRs as being more expensive than duplicates of FPRs, since they
6766 ;; involve a cross-file move.
6767 (define_insn "@aarch64_sel_dup<mode>"
6768   [(set (match_operand:SVE_FULL 0 "register_operand" "=?w, w, ??w, ?&w, ??&w, ?&w")
6769         (unspec:SVE_FULL
6770           [(match_operand:<VPRED> 3 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
6771            (vec_duplicate:SVE_FULL
6772              (match_operand:<VEL> 1 "register_operand" "r, w, r, w, r, w"))
6773            (match_operand:SVE_FULL 2 "aarch64_simd_reg_or_zero" "0, 0, Dz, Dz, w, w")]
6774           UNSPEC_SEL))]
6775   "TARGET_SVE"
6776   "@
6777    mov\t%0.<Vetype>, %3/m, %<vwcore>1
6778    mov\t%0.<Vetype>, %3/m, %<Vetype>1
6779    movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
6780    movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
6781    movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
6782    movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1"
6783   [(set_attr "movprfx" "*,*,yes,yes,yes,yes")]
6784 )
6785
6786 ;; -------------------------------------------------------------------------
6787 ;; ---- [INT,FP] Compare and select
6788 ;; -------------------------------------------------------------------------
6789 ;; The patterns in this section are synthetic.
6790 ;; -------------------------------------------------------------------------
6791
6792 ;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
6793 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
6794 (define_expand "vcond<mode><v_int_equiv>"
6795   [(set (match_operand:SVE_FULL 0 "register_operand")
6796         (if_then_else:SVE_FULL
6797           (match_operator 3 "comparison_operator"
6798             [(match_operand:<V_INT_EQUIV> 4 "register_operand")
6799              (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
6800           (match_operand:SVE_FULL 1 "nonmemory_operand")
6801           (match_operand:SVE_FULL 2 "nonmemory_operand")))]
6802   "TARGET_SVE"
6803   {
6804     aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
6805     DONE;
6806   }
6807 )
6808
6809 ;; Integer vcondu.  Don't enforce an immediate range here, since it
6810 ;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
6811 (define_expand "vcondu<mode><v_int_equiv>"
6812   [(set (match_operand:SVE_FULL 0 "register_operand")
6813         (if_then_else:SVE_FULL
6814           (match_operator 3 "comparison_operator"
6815             [(match_operand:<V_INT_EQUIV> 4 "register_operand")
6816              (match_operand:<V_INT_EQUIV> 5 "nonmemory_operand")])
6817           (match_operand:SVE_FULL 1 "nonmemory_operand")
6818           (match_operand:SVE_FULL 2 "nonmemory_operand")))]
6819   "TARGET_SVE"
6820   {
6821     aarch64_expand_sve_vcond (<MODE>mode, <V_INT_EQUIV>mode, operands);
6822     DONE;
6823   }
6824 )
6825
6826 ;; Floating-point vcond.  All comparisons except FCMUO allow a zero operand;
6827 ;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
6828 (define_expand "vcond<mode><v_fp_equiv>"
6829   [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
6830         (if_then_else:SVE_FULL_HSD
6831           (match_operator 3 "comparison_operator"
6832             [(match_operand:<V_FP_EQUIV> 4 "register_operand")
6833              (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
6834           (match_operand:SVE_FULL_HSD 1 "nonmemory_operand")
6835           (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))]
6836   "TARGET_SVE"
6837   {
6838     aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
6839     DONE;
6840   }
6841 )
6842
6843 ;; -------------------------------------------------------------------------
6844 ;; ---- [INT] Comparisons
6845 ;; -------------------------------------------------------------------------
6846 ;; Includes:
6847 ;; - CMPEQ
6848 ;; - CMPGE
6849 ;; - CMPGT
6850 ;; - CMPHI
6851 ;; - CMPHS
6852 ;; - CMPLE
6853 ;; - CMPLO
6854 ;; - CMPLS
6855 ;; - CMPLT
6856 ;; - CMPNE
6857 ;; -------------------------------------------------------------------------
6858
6859 ;; Signed integer comparisons.  Don't enforce an immediate range here, since
6860 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
6861 ;; instead.
6862 (define_expand "vec_cmp<mode><vpred>"
6863   [(parallel
6864     [(set (match_operand:<VPRED> 0 "register_operand")
6865           (match_operator:<VPRED> 1 "comparison_operator"
6866             [(match_operand:SVE_FULL_I 2 "register_operand")
6867              (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
6868      (clobber (reg:CC_NZC CC_REGNUM))])]
6869   "TARGET_SVE"
6870   {
6871     aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
6872                                     operands[2], operands[3]);
6873     DONE;
6874   }
6875 )
6876
6877 ;; Unsigned integer comparisons.  Don't enforce an immediate range here, since
6878 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
6879 ;; instead.
6880 (define_expand "vec_cmpu<mode><vpred>"
6881   [(parallel
6882     [(set (match_operand:<VPRED> 0 "register_operand")
6883           (match_operator:<VPRED> 1 "comparison_operator"
6884             [(match_operand:SVE_FULL_I 2 "register_operand")
6885              (match_operand:SVE_FULL_I 3 "nonmemory_operand")]))
6886      (clobber (reg:CC_NZC CC_REGNUM))])]
6887   "TARGET_SVE"
6888   {
6889     aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
6890                                     operands[2], operands[3]);
6891     DONE;
6892   }
6893 )
6894
6895 ;; Predicated integer comparisons.
6896 (define_insn "@aarch64_pred_cmp<cmp_op><mode>"
6897   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6898         (unspec:<VPRED>
6899           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
6900            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
6901            (SVE_INT_CMP:<VPRED>
6902              (match_operand:SVE_FULL_I 3 "register_operand" "w, w")
6903              (match_operand:SVE_FULL_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6904           UNSPEC_PRED_Z))
6905    (clobber (reg:CC_NZC CC_REGNUM))]
6906   "TARGET_SVE"
6907   "@
6908    cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
6909    cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
6910 )
6911
6912 ;; Predicated integer comparisons in which both the flag and predicate
6913 ;; results are interesting.
6914 (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
6915   [(set (reg:CC_NZC CC_REGNUM)
6916         (unspec:CC_NZC
6917           [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
6918            (match_operand 4)
6919            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6920            (unspec:<VPRED>
6921              [(match_operand 6)
6922               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6923               (SVE_INT_CMP:<VPRED>
6924                 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6925                 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6926              UNSPEC_PRED_Z)]
6927           UNSPEC_PTEST))
6928    (set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6929         (unspec:<VPRED>
6930           [(match_dup 6)
6931            (match_dup 7)
6932            (SVE_INT_CMP:<VPRED>
6933              (match_dup 2)
6934              (match_dup 3))]
6935           UNSPEC_PRED_Z))]
6936   "TARGET_SVE
6937    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6938   "@
6939    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
6940    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
6941   "&& !rtx_equal_p (operands[4], operands[6])"
6942   {
6943     operands[6] = copy_rtx (operands[4]);
6944     operands[7] = operands[5];
6945   }
6946 )
6947
6948 ;; Predicated integer comparisons in which only the flags result is
6949 ;; interesting.
6950 (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
6951   [(set (reg:CC_NZC CC_REGNUM)
6952         (unspec:CC_NZC
6953           [(match_operand:VNx16BI 1 "register_operand" "Upl, Upl")
6954            (match_operand 4)
6955            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6956            (unspec:<VPRED>
6957              [(match_operand 6)
6958               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
6959               (SVE_INT_CMP:<VPRED>
6960                 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6961                 (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6962              UNSPEC_PRED_Z)]
6963           UNSPEC_PTEST))
6964    (clobber (match_scratch:<VPRED> 0 "=Upa, Upa"))]
6965   "TARGET_SVE
6966    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
6967   "@
6968    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
6969    cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
6970   "&& !rtx_equal_p (operands[4], operands[6])"
6971   {
6972     operands[6] = copy_rtx (operands[4]);
6973     operands[7] = operands[5];
6974   }
6975 )
6976
6977 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
6978 ;; comparison with an AND.  Split the instruction into its preferred form
6979 ;; at the earliest opportunity, in order to get rid of the redundant
6980 ;; operand 4.
6981 (define_insn_and_split "*cmp<cmp_op><mode>_and"
6982   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
6983         (and:<VPRED>
6984           (unspec:<VPRED>
6985             [(match_operand 4)
6986              (const_int SVE_KNOWN_PTRUE)
6987              (SVE_INT_CMP:<VPRED>
6988                (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
6989                (match_operand:SVE_FULL_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
6990             UNSPEC_PRED_Z)
6991           (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
6992    (clobber (reg:CC_NZC CC_REGNUM))]
6993   "TARGET_SVE"
6994   "#"
6995   "&& 1"
6996   [(parallel
6997      [(set (match_dup 0)
6998            (unspec:<VPRED>
6999              [(match_dup 1)
7000               (const_int SVE_MAYBE_NOT_PTRUE)
7001               (SVE_INT_CMP:<VPRED>
7002                 (match_dup 2)
7003                 (match_dup 3))]
7004              UNSPEC_PRED_Z))
7005       (clobber (reg:CC_NZC CC_REGNUM))])]
7006 )
7007
7008 ;; Predicated integer wide comparisons.
7009 (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
7010   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7011         (unspec:<VPRED>
7012           [(match_operand:VNx16BI 1 "register_operand" "Upl")
7013            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7014            (unspec:<VPRED>
7015              [(match_operand:SVE_FULL_BHSI 3 "register_operand" "w")
7016               (match_operand:VNx2DI 4 "register_operand" "w")]
7017              SVE_COND_INT_CMP_WIDE)]
7018           UNSPEC_PRED_Z))
7019    (clobber (reg:CC_NZC CC_REGNUM))]
7020   "TARGET_SVE"
7021   "cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d"
7022 )
7023
7024 ;; Predicated integer wide comparisons in which both the flag and
7025 ;; predicate results are interesting.
7026 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
7027   [(set (reg:CC_NZC CC_REGNUM)
7028         (unspec:CC_NZC
7029           [(match_operand:VNx16BI 1 "register_operand" "Upl")
7030            (match_operand 4)
7031            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7032            (unspec:<VPRED>
7033              [(match_operand:VNx16BI 6 "register_operand" "Upl")
7034               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7035               (unspec:<VPRED>
7036                 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
7037                  (match_operand:VNx2DI 3 "register_operand" "w")]
7038                 SVE_COND_INT_CMP_WIDE)]
7039              UNSPEC_PRED_Z)]
7040           UNSPEC_PTEST))
7041    (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7042         (unspec:<VPRED>
7043           [(match_dup 6)
7044            (match_dup 7)
7045            (unspec:<VPRED>
7046              [(match_dup 2)
7047               (match_dup 3)]
7048              SVE_COND_INT_CMP_WIDE)]
7049           UNSPEC_PRED_Z))]
7050   "TARGET_SVE
7051    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7052   "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
7053 )
7054
7055 ;; Predicated integer wide comparisons in which only the flags result
7056 ;; is interesting.
7057 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
7058   [(set (reg:CC_NZC CC_REGNUM)
7059         (unspec:CC_NZC
7060           [(match_operand:VNx16BI 1 "register_operand" "Upl")
7061            (match_operand 4)
7062            (match_operand:SI 5 "aarch64_sve_ptrue_flag")
7063            (unspec:<VPRED>
7064              [(match_operand:VNx16BI 6 "register_operand" "Upl")
7065               (match_operand:SI 7 "aarch64_sve_ptrue_flag")
7066               (unspec:<VPRED>
7067                 [(match_operand:SVE_FULL_BHSI 2 "register_operand" "w")
7068                  (match_operand:VNx2DI 3 "register_operand" "w")]
7069                 SVE_COND_INT_CMP_WIDE)]
7070              UNSPEC_PRED_Z)]
7071           UNSPEC_PTEST))
7072    (clobber (match_scratch:<VPRED> 0 "=Upa"))]
7073   "TARGET_SVE
7074    && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
7075   "cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d"
7076 )
7077
7078 ;; -------------------------------------------------------------------------
7079 ;; ---- [INT] While tests
7080 ;; -------------------------------------------------------------------------
7081 ;; Includes:
7082 ;; - WHILEGE (SVE2)
7083 ;; - WHILEGT (SVE2)
7084 ;; - WHILEHI (SVE2)
7085 ;; - WHILEHS (SVE2)
7086 ;; - WHILELE
7087 ;; - WHILELO
7088 ;; - WHILELS
7089 ;; - WHILELT
7090 ;; - WHILERW (SVE2)
7091 ;; - WHILEWR (SVE2)
7092 ;; -------------------------------------------------------------------------
7093
7094 ;; Set element I of the result if (cmp (plus operand1 J) operand2) is
7095 ;; true for all J in [0, I].
7096 (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
7097   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7098         (unspec:PRED_ALL [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7099                           (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7100                          SVE_WHILE))
7101    (clobber (reg:CC_NZC CC_REGNUM))]
7102   "TARGET_SVE"
7103   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7104 )
7105
7106 ;; The WHILE instructions set the flags in the same way as a PTEST with
7107 ;; a PTRUE GP.  Handle the case in which both results are useful.  The GP
7108 ;; operands to the PTEST aren't needed, so we allow them to be anything.
7109 (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
7110   [(set (reg:CC_NZC CC_REGNUM)
7111         (unspec:CC_NZC
7112           [(match_operand 3)
7113            (match_operand 4)
7114            (const_int SVE_KNOWN_PTRUE)
7115            (unspec:PRED_ALL
7116              [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7117               (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7118              SVE_WHILE)]
7119           UNSPEC_PTEST))
7120    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7121         (unspec:PRED_ALL [(match_dup 1)
7122                           (match_dup 2)]
7123                          SVE_WHILE))]
7124   "TARGET_SVE"
7125   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7126   ;; Force the compiler to drop the unused predicate operand, so that we
7127   ;; don't have an unnecessary PTRUE.
7128   "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
7129   {
7130     operands[3] = CONSTM1_RTX (VNx16BImode);
7131     operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
7132   }
7133 )
7134
7135 ;; Same, but handle the case in which only the flags result is useful.
7136 (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
7137   [(set (reg:CC_NZC CC_REGNUM)
7138         (unspec:CC_NZC
7139           [(match_operand 3)
7140            (match_operand 4)
7141            (const_int SVE_KNOWN_PTRUE)
7142            (unspec:PRED_ALL
7143              [(match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
7144               (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
7145              SVE_WHILE)]
7146           UNSPEC_PTEST))
7147    (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
7148   "TARGET_SVE"
7149   "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
7150   ;; Force the compiler to drop the unused predicate operand, so that we
7151   ;; don't have an unnecessary PTRUE.
7152   "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
7153   {
7154     operands[3] = CONSTM1_RTX (VNx16BImode);
7155     operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
7156   }
7157 )
7158
7159 ;; -------------------------------------------------------------------------
7160 ;; ---- [FP] Direct comparisons
7161 ;; -------------------------------------------------------------------------
7162 ;; Includes:
7163 ;; - FCMEQ
7164 ;; - FCMGE
7165 ;; - FCMGT
7166 ;; - FCMLE
7167 ;; - FCMLT
7168 ;; - FCMNE
7169 ;; - FCMUO
7170 ;; -------------------------------------------------------------------------
7171
7172 ;; Floating-point comparisons.  All comparisons except FCMUO allow a zero
7173 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
7174 ;; with zero.
7175 (define_expand "vec_cmp<mode><vpred>"
7176   [(set (match_operand:<VPRED> 0 "register_operand")
7177         (match_operator:<VPRED> 1 "comparison_operator"
7178           [(match_operand:SVE_FULL_F 2 "register_operand")
7179            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
7180   "TARGET_SVE"
7181   {
7182     aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
7183                                       operands[2], operands[3], false);
7184     DONE;
7185   }
7186 )
7187
7188 ;; Predicated floating-point comparisons.
7189 (define_insn "@aarch64_pred_fcm<cmp_op><mode>"
7190   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7191         (unspec:<VPRED>
7192           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7193            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7194            (match_operand:SVE_FULL_F 3 "register_operand" "w, w")
7195            (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero" "Dz, w")]
7196           SVE_COND_FP_CMP_I0))]
7197   "TARGET_SVE"
7198   "@
7199    fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
7200    fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
7201 )
7202
7203 ;; Same for unordered comparisons.
7204 (define_insn "@aarch64_pred_fcmuo<mode>"
7205   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7206         (unspec:<VPRED>
7207           [(match_operand:<VPRED> 1 "register_operand" "Upl")
7208            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7209            (match_operand:SVE_FULL_F 3 "register_operand" "w")
7210            (match_operand:SVE_FULL_F 4 "register_operand" "w")]
7211           UNSPEC_COND_FCMUO))]
7212   "TARGET_SVE"
7213   "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
7214 )
7215
7216 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
7217 ;; with another predicate P.  This does not have the same trapping behavior
7218 ;; as predicating the comparison itself on P, but it's a legitimate fold,
7219 ;; since we can drop any potentially-trapping operations whose results
7220 ;; are not needed.
7221 ;;
7222 ;; Split the instruction into its preferred form (below) at the earliest
7223 ;; opportunity, in order to get rid of the redundant operand 1.
7224 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
7225   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
7226         (and:<VPRED>
7227           (unspec:<VPRED>
7228             [(match_operand:<VPRED> 1)
7229              (const_int SVE_KNOWN_PTRUE)
7230              (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
7231              (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
7232             SVE_COND_FP_CMP_I0)
7233           (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
7234   "TARGET_SVE"
7235   "#"
7236   "&& 1"
7237   [(set (match_dup 0)
7238         (unspec:<VPRED>
7239           [(match_dup 4)
7240            (const_int SVE_MAYBE_NOT_PTRUE)
7241            (match_dup 2)
7242            (match_dup 3)]
7243           SVE_COND_FP_CMP_I0))]
7244 )
7245
7246 ;; Same for unordered comparisons.
7247 (define_insn_and_split "*fcmuo<mode>_and_combine"
7248   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7249         (and:<VPRED>
7250           (unspec:<VPRED>
7251             [(match_operand:<VPRED> 1)
7252              (const_int SVE_KNOWN_PTRUE)
7253              (match_operand:SVE_FULL_F 2 "register_operand" "w")
7254              (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7255             UNSPEC_COND_FCMUO)
7256           (match_operand:<VPRED> 4 "register_operand" "Upl")))]
7257   "TARGET_SVE"
7258   "#"
7259   "&& 1"
7260   [(set (match_dup 0)
7261         (unspec:<VPRED>
7262           [(match_dup 4)
7263            (const_int SVE_MAYBE_NOT_PTRUE)
7264            (match_dup 2)
7265            (match_dup 3)]
7266           UNSPEC_COND_FCMUO))]
7267 )
7268
7269 ;; -------------------------------------------------------------------------
7270 ;; ---- [FP] Absolute comparisons
7271 ;; -------------------------------------------------------------------------
7272 ;; Includes:
7273 ;; - FACGE
7274 ;; - FACGT
7275 ;; - FACLE
7276 ;; - FACLT
7277 ;; -------------------------------------------------------------------------
7278
7279 ;; Predicated floating-point absolute comparisons.
7280 (define_expand "@aarch64_pred_fac<cmp_op><mode>"
7281   [(set (match_operand:<VPRED> 0 "register_operand")
7282         (unspec:<VPRED>
7283           [(match_operand:<VPRED> 1 "register_operand")
7284            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7285            (unspec:SVE_FULL_F
7286              [(match_dup 1)
7287               (match_dup 2)
7288               (match_operand:SVE_FULL_F 3 "register_operand")]
7289              UNSPEC_COND_FABS)
7290            (unspec:SVE_FULL_F
7291              [(match_dup 1)
7292               (match_dup 2)
7293               (match_operand:SVE_FULL_F 4 "register_operand")]
7294              UNSPEC_COND_FABS)]
7295           SVE_COND_FP_ABS_CMP))]
7296   "TARGET_SVE"
7297 )
7298
7299 (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>"
7300   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
7301         (unspec:<VPRED>
7302           [(match_operand:<VPRED> 1 "register_operand" "Upl")
7303            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
7304            (unspec:SVE_FULL_F
7305              [(match_operand 5)
7306               (match_operand:SI 6 "aarch64_sve_gp_strictness")
7307               (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7308              UNSPEC_COND_FABS)
7309            (unspec:SVE_FULL_F
7310              [(match_operand 7)
7311               (match_operand:SI 8 "aarch64_sve_gp_strictness")
7312               (match_operand:SVE_FULL_F 3 "register_operand" "w")]
7313              UNSPEC_COND_FABS)]
7314           SVE_COND_FP_ABS_CMP))]
7315   "TARGET_SVE
7316    && aarch64_sve_pred_dominates_p (&operands[5], operands[1])
7317    && aarch64_sve_pred_dominates_p (&operands[7], operands[1])"
7318   "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
7319   "&& (!rtx_equal_p (operands[1], operands[5])
7320        || !rtx_equal_p (operands[1], operands[7]))"
7321   {
7322     operands[5] = copy_rtx (operands[1]);
7323     operands[7] = copy_rtx (operands[1]);
7324   }
7325 )
7326
7327 ;; -------------------------------------------------------------------------
7328 ;; ---- [PRED] Select
7329 ;; -------------------------------------------------------------------------
7330 ;; Includes:
7331 ;; - SEL
7332 ;; -------------------------------------------------------------------------
7333
7334 (define_insn "@vcond_mask_<mode><mode>"
7335   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7336         (ior:PRED_ALL
7337           (and:PRED_ALL
7338             (match_operand:PRED_ALL 3 "register_operand" "Upa")
7339             (match_operand:PRED_ALL 1 "register_operand" "Upa"))
7340           (and:PRED_ALL
7341             (not (match_dup 3))
7342             (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
7343   "TARGET_SVE"
7344   "sel\t%0.b, %3, %1.b, %2.b"
7345 )
7346
7347 ;; -------------------------------------------------------------------------
7348 ;; ---- [PRED] Test bits
7349 ;; -------------------------------------------------------------------------
7350 ;; Includes:
7351 ;; - PTEST
7352 ;; -------------------------------------------------------------------------
7353
7354 ;; Branch based on predicate equality or inequality.
7355 (define_expand "cbranch<mode>4"
7356   [(set (pc)
7357         (if_then_else
7358           (match_operator 0 "aarch64_equality_operator"
7359             [(match_operand:PRED_ALL 1 "register_operand")
7360              (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
7361           (label_ref (match_operand 3 ""))
7362           (pc)))]
7363   ""
7364   {
7365     rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
7366     rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
7367     rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
7368     rtx pred;
7369     if (operands[2] == CONST0_RTX (<MODE>mode))
7370       pred = operands[1];
7371     else
7372       {
7373         pred = gen_reg_rtx (<MODE>mode);
7374         emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
7375                                                  operands[2]));
7376       }
7377     emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
7378     operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
7379     operands[2] = const0_rtx;
7380   }
7381 )
7382
7383 ;; See "Description of UNSPEC_PTEST" above for details.
7384 (define_insn "aarch64_ptest<mode>"
7385   [(set (reg:CC_NZC CC_REGNUM)
7386         (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
7387                         (match_operand 1)
7388                         (match_operand:SI 2 "aarch64_sve_ptrue_flag")
7389                         (match_operand:PRED_ALL 3 "register_operand" "Upa")]
7390                        UNSPEC_PTEST))]
7391   "TARGET_SVE"
7392   "ptest\t%0, %3.b"
7393 )
7394
7395 ;; =========================================================================
7396 ;; == Reductions
7397 ;; =========================================================================
7398
7399 ;; -------------------------------------------------------------------------
7400 ;; ---- [INT,FP] Conditional reductions
7401 ;; -------------------------------------------------------------------------
7402 ;; Includes:
7403 ;; - CLASTA
7404 ;; - CLASTB
7405 ;; -------------------------------------------------------------------------
7406
7407 ;; Set operand 0 to the last active element in operand 3, or to tied
7408 ;; operand 1 if no elements are active.
7409 (define_insn "@fold_extract_<last_op>_<mode>"
7410   [(set (match_operand:<VEL> 0 "register_operand" "=?r, w")
7411         (unspec:<VEL>
7412           [(match_operand:<VEL> 1 "register_operand" "0, 0")
7413            (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
7414            (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7415           CLAST))]
7416   "TARGET_SVE"
7417   "@
7418    clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
7419    clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>"
7420 )
7421
7422 (define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
7423   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7424         (unspec:SVE_FULL
7425           [(match_operand:SVE_FULL 1 "register_operand" "0, w")
7426            (match_operand:<VPRED> 2 "register_operand" "Upl, Upl")
7427            (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7428           CLAST))]
7429   "TARGET_SVE"
7430   "@
7431    clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
7432    movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>"
7433 )
7434
7435 ;; -------------------------------------------------------------------------
7436 ;; ---- [INT] Tree reductions
7437 ;; -------------------------------------------------------------------------
7438 ;; Includes:
7439 ;; - ANDV
7440 ;; - EORV
7441 ;; - ORV
7442 ;; - SADDV
7443 ;; - SMAXV
7444 ;; - SMINV
7445 ;; - UADDV
7446 ;; - UMAXV
7447 ;; - UMINV
7448 ;; -------------------------------------------------------------------------
7449
7450 ;; Unpredicated integer add reduction.
7451 (define_expand "reduc_plus_scal_<mode>"
7452   [(match_operand:<VEL> 0 "register_operand")
7453    (match_operand:SVE_FULL_I 1 "register_operand")]
7454   "TARGET_SVE"
7455   {
7456     rtx pred = aarch64_ptrue_reg (<VPRED>mode);
7457     rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
7458     emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
7459     if (tmp != operands[0])
7460       emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
7461     DONE;
7462   }
7463 )
7464
7465 ;; Predicated integer add reduction.  The result is always 64-bits.
7466 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7467   [(set (match_operand:DI 0 "register_operand" "=w")
7468         (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
7469                     (match_operand:SVE_FULL_I 2 "register_operand" "w")]
7470                    SVE_INT_ADDV))]
7471   "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
7472   "<su>addv\t%d0, %1, %2.<Vetype>"
7473 )
7474
7475 ;; Unpredicated integer reductions.
7476 (define_expand "reduc_<optab>_scal_<mode>"
7477   [(set (match_operand:<VEL> 0 "register_operand")
7478         (unspec:<VEL> [(match_dup 2)
7479                        (match_operand:SVE_FULL_I 1 "register_operand")]
7480                       SVE_INT_REDUCTION))]
7481   "TARGET_SVE"
7482   {
7483     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7484   }
7485 )
7486
7487 ;; Predicated integer reductions.
7488 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7489   [(set (match_operand:<VEL> 0 "register_operand" "=w")
7490         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
7491                        (match_operand:SVE_FULL_I 2 "register_operand" "w")]
7492                       SVE_INT_REDUCTION))]
7493   "TARGET_SVE"
7494   "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
7495 )
7496
7497 ;; -------------------------------------------------------------------------
7498 ;; ---- [FP] Tree reductions
7499 ;; -------------------------------------------------------------------------
7500 ;; Includes:
7501 ;; - FADDV
7502 ;; - FMAXNMV
7503 ;; - FMAXV
7504 ;; - FMINNMV
7505 ;; - FMINV
7506 ;; -------------------------------------------------------------------------
7507
7508 ;; Unpredicated floating-point tree reductions.
7509 (define_expand "reduc_<optab>_scal_<mode>"
7510   [(set (match_operand:<VEL> 0 "register_operand")
7511         (unspec:<VEL> [(match_dup 2)
7512                        (match_operand:SVE_FULL_F 1 "register_operand")]
7513                       SVE_FP_REDUCTION))]
7514   "TARGET_SVE"
7515   {
7516     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7517   }
7518 )
7519
7520 ;; Predicated floating-point tree reductions.
7521 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
7522   [(set (match_operand:<VEL> 0 "register_operand" "=w")
7523         (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
7524                        (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7525                       SVE_FP_REDUCTION))]
7526   "TARGET_SVE"
7527   "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
7528 )
7529
7530 ;; -------------------------------------------------------------------------
7531 ;; ---- [FP] Left-to-right reductions
7532 ;; -------------------------------------------------------------------------
7533 ;; Includes:
7534 ;; - FADDA
7535 ;; -------------------------------------------------------------------------
7536
7537 ;; Unpredicated in-order FP reductions.
7538 (define_expand "fold_left_plus_<mode>"
7539   [(set (match_operand:<VEL> 0 "register_operand")
7540         (unspec:<VEL> [(match_dup 3)
7541                        (match_operand:<VEL> 1 "register_operand")
7542                        (match_operand:SVE_FULL_F 2 "register_operand")]
7543                       UNSPEC_FADDA))]
7544   "TARGET_SVE"
7545   {
7546     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
7547   }
7548 )
7549
7550 ;; Predicated in-order FP reductions.
7551 (define_insn "mask_fold_left_plus_<mode>"
7552   [(set (match_operand:<VEL> 0 "register_operand" "=w")
7553         (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
7554                        (match_operand:<VEL> 1 "register_operand" "0")
7555                        (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7556                       UNSPEC_FADDA))]
7557   "TARGET_SVE"
7558   "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
7559 )
7560
7561 ;; =========================================================================
7562 ;; == Permutes
7563 ;; =========================================================================
7564
7565 ;; -------------------------------------------------------------------------
7566 ;; ---- [INT,FP] General permutes
7567 ;; -------------------------------------------------------------------------
7568 ;; Includes:
7569 ;; - TBL
7570 ;; -------------------------------------------------------------------------
7571
7572 (define_expand "vec_perm<mode>"
7573   [(match_operand:SVE_FULL 0 "register_operand")
7574    (match_operand:SVE_FULL 1 "register_operand")
7575    (match_operand:SVE_FULL 2 "register_operand")
7576    (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
7577   "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
7578   {
7579     aarch64_expand_sve_vec_perm (operands[0], operands[1],
7580                                  operands[2], operands[3]);
7581     DONE;
7582   }
7583 )
7584
7585 (define_insn "@aarch64_sve_tbl<mode>"
7586   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7587         (unspec:SVE_FULL
7588           [(match_operand:SVE_FULL 1 "register_operand" "w")
7589            (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
7590           UNSPEC_TBL))]
7591   "TARGET_SVE"
7592   "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7593 )
7594
7595 ;; -------------------------------------------------------------------------
7596 ;; ---- [INT,FP] Special-purpose unary permutes
7597 ;; -------------------------------------------------------------------------
7598 ;; Includes:
7599 ;; - COMPACT
7600 ;; - DUP
7601 ;; - REV
7602 ;; -------------------------------------------------------------------------
7603
7604 ;; Compact active elements and pad with zeros.
7605 (define_insn "@aarch64_sve_compact<mode>"
7606   [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
7607         (unspec:SVE_FULL_SD
7608           [(match_operand:<VPRED> 1 "register_operand" "Upl")
7609            (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
7610           UNSPEC_SVE_COMPACT))]
7611   "TARGET_SVE"
7612   "compact\t%0.<Vetype>, %1, %2.<Vetype>"
7613 )
7614
7615 ;; Duplicate one element of a vector.
7616 (define_insn "@aarch64_sve_dup_lane<mode>"
7617   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7618         (vec_duplicate:SVE_FULL
7619           (vec_select:<VEL>
7620             (match_operand:SVE_FULL 1 "register_operand" "w")
7621             (parallel [(match_operand:SI 2 "const_int_operand")]))))]
7622   "TARGET_SVE
7623    && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 0, 63)"
7624   "dup\t%0.<Vetype>, %1.<Vetype>[%2]"
7625 )
7626
7627 ;; Use DUP.Q to duplicate a 128-bit segment of a register.
7628 ;;
7629 ;; The vec_select:<V128> sets memory lane number N of the V128 to lane
7630 ;; number op2 + N of op1.  (We don't need to distinguish between memory
7631 ;; and architectural register lane numbering for op1 or op0, since the
7632 ;; two numbering schemes are the same for SVE.)
7633 ;;
7634 ;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
7635 ;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
7636 ;; of op0.  We therefore get the correct result for both endiannesses.
7637 ;;
7638 ;; The wrinkle is that for big-endian V128 registers, memory lane numbering
7639 ;; is in the opposite order to architectural register lane numbering.
7640 ;; Thus if we were to do this operation via a V128 temporary register,
7641 ;; the vec_select and vec_duplicate would both involve a reverse operation
7642 ;; for big-endian targets.  In this fused pattern the two reverses cancel
7643 ;; each other out.
7644 (define_insn "@aarch64_sve_dupq_lane<mode>"
7645   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7646         (vec_duplicate:SVE_FULL
7647           (vec_select:<V128>
7648             (match_operand:SVE_FULL 1 "register_operand" "w")
7649             (match_operand 2 "ascending_int_parallel"))))]
7650   "TARGET_SVE
7651    && (INTVAL (XVECEXP (operands[2], 0, 0))
7652        * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
7653    && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
7654                 * GET_MODE_SIZE (<VEL>mode), 0, 63)"
7655   {
7656     unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
7657                          * GET_MODE_SIZE (<VEL>mode));
7658     operands[2] = gen_int_mode (byte / 16, DImode);
7659     return "dup\t%0.q, %1.q[%2]";
7660   }
7661 )
7662
7663 ;; Reverse the order of elements within a full vector.
7664 (define_insn "@aarch64_sve_rev<mode>"
7665   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7666         (unspec:SVE_FULL
7667           [(match_operand:SVE_FULL 1 "register_operand" "w")]
7668           UNSPEC_REV))]
7669   "TARGET_SVE"
7670   "rev\t%0.<Vetype>, %1.<Vetype>")
7671
7672 ;; -------------------------------------------------------------------------
7673 ;; ---- [INT,FP] Special-purpose binary permutes
7674 ;; -------------------------------------------------------------------------
7675 ;; Includes:
7676 ;; - SPLICE
7677 ;; - TRN1
7678 ;; - TRN2
7679 ;; - UZP1
7680 ;; - UZP2
7681 ;; - ZIP1
7682 ;; - ZIP2
7683 ;; -------------------------------------------------------------------------
7684
7685 ;; Like EXT, but start at the first active element.
7686 (define_insn "@aarch64_sve_splice<mode>"
7687   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7688         (unspec:SVE_FULL
7689           [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
7690            (match_operand:SVE_FULL 2 "register_operand" "0, w")
7691            (match_operand:SVE_FULL 3 "register_operand" "w, w")]
7692           UNSPEC_SVE_SPLICE))]
7693   "TARGET_SVE"
7694   "@
7695    splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
7696    movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>"
7697   [(set_attr "movprfx" "*, yes")]
7698 )
7699
7700 ;; Permutes that take half the elements from one vector and half the
7701 ;; elements from the other.
7702 (define_insn "@aarch64_sve_<perm_insn><mode>"
7703   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7704         (unspec:SVE_FULL
7705           [(match_operand:SVE_FULL 1 "register_operand" "w")
7706            (match_operand:SVE_FULL 2 "register_operand" "w")]
7707           PERMUTE))]
7708   "TARGET_SVE"
7709   "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7710 )
7711
7712 ;; Apply PERMUTE to 128-bit sequences.  The behavior of these patterns
7713 ;; doesn't depend on the mode.
7714 (define_insn "@aarch64_sve_<optab><mode>"
7715   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
7716         (unspec:SVE_FULL
7717           [(match_operand:SVE_FULL 1 "register_operand" "w")
7718            (match_operand:SVE_FULL 2 "register_operand" "w")]
7719           PERMUTEQ))]
7720   "TARGET_SVE_F64MM"
7721   "<perm_insn>\t%0.q, %1.q, %2.q"
7722 )
7723
7724 ;; Concatenate two vectors and extract a subvector.  Note that the
7725 ;; immediate (third) operand is the lane index not the byte index.
7726 (define_insn "@aarch64_sve_ext<mode>"
7727   [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
7728         (unspec:SVE_FULL
7729           [(match_operand:SVE_FULL 1 "register_operand" "0, w")
7730            (match_operand:SVE_FULL 2 "register_operand" "w, w")
7731            (match_operand:SI 3 "const_int_operand")]
7732           UNSPEC_EXT))]
7733   "TARGET_SVE
7734    && IN_RANGE (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode), 0, 255)"
7735   {
7736     operands[3] = GEN_INT (INTVAL (operands[3]) * GET_MODE_SIZE (<VEL>mode));
7737     return (which_alternative == 0
7738             ? "ext\\t%0.b, %0.b, %2.b, #%3"
7739             : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
7740   }
7741   [(set_attr "movprfx" "*,yes")]
7742 )
7743
7744 ;; -------------------------------------------------------------------------
7745 ;; ---- [PRED] Special-purpose unary permutes
7746 ;; -------------------------------------------------------------------------
7747 ;; Includes:
7748 ;; - REV
7749 ;; -------------------------------------------------------------------------
7750
7751 (define_insn "@aarch64_sve_rev<mode>"
7752   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7753         (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
7754                          UNSPEC_REV))]
7755   "TARGET_SVE"
7756   "rev\t%0.<Vetype>, %1.<Vetype>")
7757
7758 ;; -------------------------------------------------------------------------
7759 ;; ---- [PRED] Special-purpose binary permutes
7760 ;; -------------------------------------------------------------------------
7761 ;; Includes:
7762 ;; - TRN1
7763 ;; - TRN2
7764 ;; - UZP1
7765 ;; - UZP2
7766 ;; - ZIP1
7767 ;; - ZIP2
7768 ;; -------------------------------------------------------------------------
7769
7770 ;; Permutes that take half the elements from one vector and half the
7771 ;; elements from the other.
7772 (define_insn "@aarch64_sve_<perm_insn><mode>"
7773   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
7774         (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
7775                           (match_operand:PRED_ALL 2 "register_operand" "Upa")]
7776                          PERMUTE))]
7777   "TARGET_SVE"
7778   "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7779 )
7780
7781 ;; =========================================================================
7782 ;; == Conversions
7783 ;; =========================================================================
7784
7785 ;; -------------------------------------------------------------------------
7786 ;; ---- [INT<-INT] Packs
7787 ;; -------------------------------------------------------------------------
7788 ;; Includes:
7789 ;; - UZP1
7790 ;; -------------------------------------------------------------------------
7791
7792 ;; Integer pack.  Use UZP1 on the narrower type, which discards
7793 ;; the high part of each wide element.
7794 (define_insn "vec_pack_trunc_<Vwide>"
7795   [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
7796         (unspec:SVE_FULL_BHSI
7797           [(match_operand:<VWIDE> 1 "register_operand" "w")
7798            (match_operand:<VWIDE> 2 "register_operand" "w")]
7799           UNSPEC_PACK))]
7800   "TARGET_SVE"
7801   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
7802 )
7803
7804 ;; -------------------------------------------------------------------------
7805 ;; ---- [INT<-INT] Unpacks
7806 ;; -------------------------------------------------------------------------
7807 ;; Includes:
7808 ;; - SUNPKHI
7809 ;; - SUNPKLO
7810 ;; - UUNPKHI
7811 ;; - UUNPKLO
7812 ;; -------------------------------------------------------------------------
7813
7814 ;; Unpack the low or high half of a vector, where "high" refers to
7815 ;; the low-numbered lanes for big-endian and the high-numbered lanes
7816 ;; for little-endian.
7817 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
7818   [(match_operand:<VWIDE> 0 "register_operand")
7819    (unspec:<VWIDE>
7820      [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
7821   "TARGET_SVE"
7822   {
7823     emit_insn ((<hi_lanes_optab>
7824                 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
7825                 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
7826                (operands[0], operands[1]));
7827     DONE;
7828   }
7829 )
7830
7831 (define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
7832   [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
7833         (unspec:<VWIDE>
7834           [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
7835           UNPACK))]
7836   "TARGET_SVE"
7837   "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
7838 )
7839
7840 ;; -------------------------------------------------------------------------
7841 ;; ---- [INT<-FP] Conversions
7842 ;; -------------------------------------------------------------------------
7843 ;; Includes:
7844 ;; - FCVTZS
7845 ;; - FCVTZU
7846 ;; -------------------------------------------------------------------------
7847
7848 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
7849 ;; SF to SI or DF to DI).
7850 (define_expand "<optab><mode><v_int_equiv>2"
7851   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
7852         (unspec:<V_INT_EQUIV>
7853           [(match_dup 2)
7854            (const_int SVE_RELAXED_GP)
7855            (match_operand:SVE_FULL_F 1 "register_operand")]
7856           SVE_COND_FCVTI))]
7857   "TARGET_SVE"
7858   {
7859     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
7860   }
7861 )
7862
7863 ;; Predicated float-to-integer conversion, either to the same width or wider.
7864 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7865   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
7866         (unspec:SVE_FULL_HSDI
7867           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
7868            (match_operand:SI 3 "aarch64_sve_gp_strictness")
7869            (match_operand:SVE_FULL_F 2 "register_operand" "w")]
7870           SVE_COND_FCVTI))]
7871   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7872   "fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
7873 )
7874
7875 ;; Predicated narrowing float-to-integer conversion.
7876 (define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7877   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=w")
7878         (unspec:VNx4SI_ONLY
7879           [(match_operand:VNx2BI 1 "register_operand" "Upl")
7880            (match_operand:SI 3 "aarch64_sve_gp_strictness")
7881            (match_operand:VNx2DF_ONLY 2 "register_operand" "w")]
7882           SVE_COND_FCVTI))]
7883   "TARGET_SVE"
7884   "fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
7885 )
7886
7887 ;; Predicated float-to-integer conversion with merging, either to the same
7888 ;; width or wider.
7889 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7890   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
7891         (unspec:SVE_FULL_HSDI
7892           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
7893            (unspec:SVE_FULL_HSDI
7894              [(match_dup 1)
7895               (const_int SVE_STRICT_GP)
7896               (match_operand:SVE_FULL_F 2 "register_operand")]
7897              SVE_COND_FCVTI)
7898            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
7899           UNSPEC_SEL))]
7900   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
7901 )
7902
7903 ;; The first alternative doesn't need the earlyclobber, but the only case
7904 ;; it would help is the uninteresting one in which operands 2 and 3 are
7905 ;; the same register (despite having different modes).  Making all the
7906 ;; alternatives earlyclobber makes things more consistent for the
7907 ;; register allocator.
7908 (define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
7909   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=&w, &w, ?&w")
7910         (unspec:SVE_FULL_HSDI
7911           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
7912            (unspec:SVE_FULL_HSDI
7913              [(match_operand 4)
7914               (match_operand:SI 5 "aarch64_sve_gp_strictness")
7915               (match_operand:SVE_FULL_F 2 "register_operand" "w, w, w")]
7916              SVE_COND_FCVTI)
7917            (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7918           UNSPEC_SEL))]
7919   "TARGET_SVE
7920    && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
7921    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
7922   "@
7923    fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
7924    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
7925    movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>"
7926   "&& !rtx_equal_p (operands[1], operands[4])"
7927   {
7928     operands[4] = copy_rtx (operands[1]);
7929   }
7930   [(set_attr "movprfx" "*,yes,yes")]
7931 )
7932
7933 ;; Predicated narrowing float-to-integer conversion with merging.
7934 (define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7935   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7936         (unspec:VNx4SI_ONLY
7937           [(match_operand:VNx2BI 1 "register_operand")
7938            (unspec:VNx4SI_ONLY
7939              [(match_dup 1)
7940               (const_int SVE_STRICT_GP)
7941               (match_operand:VNx2DF_ONLY 2 "register_operand")]
7942              SVE_COND_FCVTI)
7943            (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
7944           UNSPEC_SEL))]
7945   "TARGET_SVE"
7946 )
7947
7948 (define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
7949   [(set (match_operand:VNx4SI_ONLY 0 "register_operand" "=&w, &w, ?&w")
7950         (unspec:VNx4SI_ONLY
7951           [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
7952            (unspec:VNx4SI_ONLY
7953              [(match_dup 1)
7954               (match_operand:SI 4 "aarch64_sve_gp_strictness")
7955               (match_operand:VNx2DF_ONLY 2 "register_operand" "w, w, w")]
7956              SVE_COND_FCVTI)
7957            (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
7958           UNSPEC_SEL))]
7959   "TARGET_SVE"
7960   "@
7961    fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
7962    movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
7963    movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>"
7964   [(set_attr "movprfx" "*,yes,yes")]
7965 )
7966
7967 ;; -------------------------------------------------------------------------
7968 ;; ---- [INT<-FP] Packs
7969 ;; -------------------------------------------------------------------------
7970 ;; The patterns in this section are synthetic.
7971 ;; -------------------------------------------------------------------------
7972
7973 ;; Convert two vectors of DF to SI and pack the results into a single vector.
7974 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
7975   [(set (match_dup 4)
7976         (unspec:VNx4SI
7977           [(match_dup 3)
7978            (const_int SVE_RELAXED_GP)
7979            (match_operand:VNx2DF 1 "register_operand")]
7980           SVE_COND_FCVTI))
7981    (set (match_dup 5)
7982         (unspec:VNx4SI
7983           [(match_dup 3)
7984            (const_int SVE_RELAXED_GP)
7985            (match_operand:VNx2DF 2 "register_operand")]
7986           SVE_COND_FCVTI))
7987    (set (match_operand:VNx4SI 0 "register_operand")
7988         (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
7989   "TARGET_SVE"
7990   {
7991     operands[3] = aarch64_ptrue_reg (VNx2BImode);
7992     operands[4] = gen_reg_rtx (VNx4SImode);
7993     operands[5] = gen_reg_rtx (VNx4SImode);
7994   }
7995 )
7996
7997 ;; -------------------------------------------------------------------------
7998 ;; ---- [INT<-FP] Unpacks
7999 ;; -------------------------------------------------------------------------
8000 ;; No patterns here yet!
8001 ;; -------------------------------------------------------------------------
8002
8003 ;; -------------------------------------------------------------------------
8004 ;; ---- [FP<-INT] Conversions
8005 ;; -------------------------------------------------------------------------
8006 ;; Includes:
8007 ;; - SCVTF
8008 ;; - UCVTF
8009 ;; -------------------------------------------------------------------------
8010
8011 ;; Unpredicated conversion of integers to floats of the same size
8012 ;; (HI to HF, SI to SF or DI to DF).
8013 (define_expand "<optab><v_int_equiv><mode>2"
8014   [(set (match_operand:SVE_FULL_F 0 "register_operand")
8015         (unspec:SVE_FULL_F
8016           [(match_dup 2)
8017            (const_int SVE_RELAXED_GP)
8018            (match_operand:<V_INT_EQUIV> 1 "register_operand")]
8019           SVE_COND_ICVTF))]
8020   "TARGET_SVE"
8021   {
8022     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8023   }
8024 )
8025
8026 ;; Predicated integer-to-float conversion, either to the same width or
8027 ;; narrower.
8028 (define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
8029   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
8030         (unspec:SVE_FULL_F
8031           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl")
8032            (match_operand:SI 3 "aarch64_sve_gp_strictness")
8033            (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
8034           SVE_COND_ICVTF))]
8035   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8036   "<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
8037 )
8038
8039 ;; Predicated widening integer-to-float conversion.
8040 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
8041   [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w")
8042         (unspec:VNx2DF_ONLY
8043           [(match_operand:VNx2BI 1 "register_operand" "Upl")
8044            (match_operand:SI 3 "aarch64_sve_gp_strictness")
8045            (match_operand:VNx4SI_ONLY 2 "register_operand" "w")]
8046           SVE_COND_ICVTF))]
8047   "TARGET_SVE"
8048   "<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
8049 )
8050
8051 ;; Predicated integer-to-float conversion with merging, either to the same
8052 ;; width or narrower.
8053 (define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
8054   [(set (match_operand:SVE_FULL_F 0 "register_operand")
8055         (unspec:SVE_FULL_F
8056           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
8057            (unspec:SVE_FULL_F
8058              [(match_dup 1)
8059               (const_int SVE_STRICT_GP)
8060               (match_operand:SVE_FULL_HSDI 2 "register_operand")]
8061              SVE_COND_ICVTF)
8062            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
8063           UNSPEC_SEL))]
8064   "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
8065 )
8066
8067 ;; The first alternative doesn't need the earlyclobber, but the only case
8068 ;; it would help is the uninteresting one in which operands 2 and 3 are
8069 ;; the same register (despite having different modes).  Making all the
8070 ;; alternatives earlyclobber makes things more consistent for the
8071 ;; register allocator.
8072 (define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
8073   [(set (match_operand:SVE_FULL_F 0 "register_operand" "=&w, &w, ?&w")
8074         (unspec:SVE_FULL_F
8075           [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8076            (unspec:SVE_FULL_F
8077              [(match_operand 4)
8078               (match_operand:SI 5 "aarch64_sve_gp_strictness")
8079               (match_operand:SVE_FULL_HSDI 2 "register_operand" "w, w, w")]
8080              SVE_COND_ICVTF)
8081            (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8082           UNSPEC_SEL))]
8083   "TARGET_SVE
8084    && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>
8085    && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
8086   "@
8087    <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
8088    movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
8089    movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>"
8090   "&& !rtx_equal_p (operands[1], operands[4])"
8091   {
8092     operands[4] = copy_rtx (operands[1]);
8093   }
8094   [(set_attr "movprfx" "*,yes,yes")]
8095 )
8096
8097 ;; Predicated widening integer-to-float conversion with merging.
8098 (define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
8099   [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
8100         (unspec:VNx2DF_ONLY
8101           [(match_operand:VNx2BI 1 "register_operand")
8102            (unspec:VNx2DF_ONLY
8103              [(match_dup 1)
8104               (const_int SVE_STRICT_GP)
8105               (match_operand:VNx4SI_ONLY 2 "register_operand")]
8106              SVE_COND_ICVTF)
8107            (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
8108           UNSPEC_SEL))]
8109   "TARGET_SVE"
8110 )
8111
8112 (define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
8113   [(set (match_operand:VNx2DF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
8114         (unspec:VNx2DF_ONLY
8115           [(match_operand:VNx2BI 1 "register_operand" "Upl, Upl, Upl")
8116            (unspec:VNx2DF_ONLY
8117              [(match_dup 1)
8118               (match_operand:SI 4 "aarch64_sve_gp_strictness")
8119               (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w")]
8120              SVE_COND_ICVTF)
8121            (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8122           UNSPEC_SEL))]
8123   "TARGET_SVE"
8124   "@
8125    <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
8126    movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
8127    movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>"
8128   [(set_attr "movprfx" "*,yes,yes")]
8129 )
8130
8131 ;; -------------------------------------------------------------------------
8132 ;; ---- [FP<-INT] Packs
8133 ;; -------------------------------------------------------------------------
8134 ;; No patterns here yet!
8135 ;; -------------------------------------------------------------------------
8136
8137 ;; -------------------------------------------------------------------------
8138 ;; ---- [FP<-INT] Unpacks
8139 ;; -------------------------------------------------------------------------
8140 ;; The patterns in this section are synthetic.
8141 ;; -------------------------------------------------------------------------
8142
8143 ;; Unpack one half of a VNx4SI to VNx2DF.  First unpack from VNx4SI
8144 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
8145 ;; unpacked VNx4SI to VNx2DF.
8146 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
8147   [(match_operand:VNx2DF 0 "register_operand")
8148    (FLOATUORS:VNx2DF
8149      (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
8150                     UNPACK_UNSIGNED))]
8151   "TARGET_SVE"
8152   {
8153     /* Use ZIP to do the unpack, since we don't care about the upper halves
8154        and since it has the nice property of not needing any subregs.
8155        If using UUNPK* turns out to be preferable, we could model it as
8156        a ZIP whose first operand is zero.  */
8157     rtx temp = gen_reg_rtx (VNx4SImode);
8158     emit_insn ((<hi_lanes_optab>
8159                 ? gen_aarch64_sve_zip2vnx4si
8160                 : gen_aarch64_sve_zip1vnx4si)
8161                (temp, operands[1], operands[1]));
8162     rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
8163     rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
8164     emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
8165                (operands[0], ptrue, temp, strictness));
8166     DONE;
8167   }
8168 )
8169
8170 ;; -------------------------------------------------------------------------
8171 ;; ---- [FP<-FP] Packs
8172 ;; -------------------------------------------------------------------------
8173 ;; Includes:
8174 ;; - FCVT
8175 ;; -------------------------------------------------------------------------
8176
8177 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
8178 ;; the results into a single vector.
8179 (define_expand "vec_pack_trunc_<Vwide>"
8180   [(set (match_dup 4)
8181         (unspec:SVE_FULL_HSF
8182           [(match_dup 3)
8183            (const_int SVE_RELAXED_GP)
8184            (match_operand:<VWIDE> 1 "register_operand")]
8185           UNSPEC_COND_FCVT))
8186    (set (match_dup 5)
8187         (unspec:SVE_FULL_HSF
8188           [(match_dup 3)
8189            (const_int SVE_RELAXED_GP)
8190            (match_operand:<VWIDE> 2 "register_operand")]
8191           UNSPEC_COND_FCVT))
8192    (set (match_operand:SVE_FULL_HSF 0 "register_operand")
8193         (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
8194   "TARGET_SVE"
8195   {
8196     operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
8197     operands[4] = gen_reg_rtx (<MODE>mode);
8198     operands[5] = gen_reg_rtx (<MODE>mode);
8199   }
8200 )
8201
8202 ;; Predicated float-to-float truncation.
8203 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
8204   [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
8205         (unspec:SVE_FULL_HSF
8206           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl")
8207            (match_operand:SI 3 "aarch64_sve_gp_strictness")
8208            (match_operand:SVE_FULL_SDF 2 "register_operand" "w")]
8209           SVE_COND_FCVT))]
8210   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8211   "fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
8212 )
8213
8214 ;; Predicated float-to-float truncation with merging.
8215 (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
8216   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
8217         (unspec:SVE_FULL_HSF
8218           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
8219            (unspec:SVE_FULL_HSF
8220              [(match_dup 1)
8221               (const_int SVE_STRICT_GP)
8222               (match_operand:SVE_FULL_SDF 2 "register_operand")]
8223              SVE_COND_FCVT)
8224            (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
8225           UNSPEC_SEL))]
8226   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8227 )
8228
8229 (define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
8230   [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w, ?&w, ?&w")
8231         (unspec:SVE_FULL_HSF
8232           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8233            (unspec:SVE_FULL_HSF
8234              [(match_dup 1)
8235               (match_operand:SI 4 "aarch64_sve_gp_strictness")
8236               (match_operand:SVE_FULL_SDF 2 "register_operand" "w, w, w")]
8237              SVE_COND_FCVT)
8238            (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8239           UNSPEC_SEL))]
8240   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8241   "@
8242    fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
8243    movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
8244    movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>"
8245   [(set_attr "movprfx" "*,yes,yes")]
8246 )
8247
8248 ;; -------------------------------------------------------------------------
8249 ;; ---- [FP<-FP] Packs (bfloat16)
8250 ;; -------------------------------------------------------------------------
8251 ;; Includes:
8252 ;; - BFCVT (BF16)
8253 ;; - BFCVTNT (BF16)
8254 ;; -------------------------------------------------------------------------
8255
8256 ;; Predicated BFCVT.
8257 (define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
8258   [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
8259         (unspec:VNx8BF_ONLY
8260           [(match_operand:VNx4BI 1 "register_operand" "Upl")
8261            (match_operand:SI 3 "aarch64_sve_gp_strictness")
8262            (match_operand:VNx4SF_ONLY 2 "register_operand" "w")]
8263           SVE_COND_FCVT))]
8264   "TARGET_SVE_BF16"
8265   "bfcvt\t%0.h, %1/m, %2.s"
8266 )
8267
8268 ;; Predicated BFCVT with merging.
8269 (define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
8270   [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
8271         (unspec:VNx8BF_ONLY
8272           [(match_operand:VNx4BI 1 "register_operand")
8273            (unspec:VNx8BF_ONLY
8274              [(match_dup 1)
8275               (const_int SVE_STRICT_GP)
8276               (match_operand:VNx4SF_ONLY 2 "register_operand")]
8277              SVE_COND_FCVT)
8278            (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
8279           UNSPEC_SEL))]
8280   "TARGET_SVE_BF16"
8281 )
8282
8283 (define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
8284   [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w, ?&w, ?&w")
8285         (unspec:VNx8BF_ONLY
8286           [(match_operand:VNx4BI 1 "register_operand" "Upl, Upl, Upl")
8287            (unspec:VNx8BF_ONLY
8288              [(match_dup 1)
8289               (match_operand:SI 4 "aarch64_sve_gp_strictness")
8290               (match_operand:VNx4SF_ONLY 2 "register_operand" "w, w, w")]
8291              SVE_COND_FCVT)
8292            (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8293           UNSPEC_SEL))]
8294   "TARGET_SVE_BF16"
8295   "@
8296    bfcvt\t%0.h, %1/m, %2.s
8297    movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
8298    movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s"
8299   [(set_attr "movprfx" "*,yes,yes")]
8300 )
8301
8302 ;; Predicated BFCVTNT.  This doesn't give a natural aarch64_pred_*/cond_*
8303 ;; pair because the even elements always have to be supplied for active
8304 ;; elements, even if the inactive elements don't matter.
8305 ;;
8306 ;; This instructions does not take MOVPRFX.
8307 (define_insn "@aarch64_sve_cvtnt<mode>"
8308   [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
8309         (unspec:VNx8BF_ONLY
8310           [(match_operand:VNx4BI 2 "register_operand" "Upl")
8311            (const_int SVE_STRICT_GP)
8312            (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
8313            (match_operand:VNx4SF 3 "register_operand" "w")]
8314           UNSPEC_COND_FCVTNT))]
8315   "TARGET_SVE_BF16"
8316   "bfcvtnt\t%0.h, %2/m, %3.s"
8317 )
8318
8319 ;; -------------------------------------------------------------------------
8320 ;; ---- [FP<-FP] Unpacks
8321 ;; -------------------------------------------------------------------------
8322 ;; Includes:
8323 ;; - FCVT
8324 ;; -------------------------------------------------------------------------
8325
8326 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
8327 ;; First unpack the source without conversion, then float-convert the
8328 ;; unpacked source.
8329 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
8330   [(match_operand:<VWIDE> 0 "register_operand")
8331    (unspec:SVE_FULL_HSF
8332      [(match_operand:SVE_FULL_HSF 1 "register_operand")]
8333      UNPACK_UNSIGNED)]
8334   "TARGET_SVE"
8335   {
8336     /* Use ZIP to do the unpack, since we don't care about the upper halves
8337        and since it has the nice property of not needing any subregs.
8338        If using UUNPK* turns out to be preferable, we could model it as
8339        a ZIP whose first operand is zero.  */
8340     rtx temp = gen_reg_rtx (<MODE>mode);
8341     emit_insn ((<hi_lanes_optab>
8342                 ? gen_aarch64_sve_zip2<mode>
8343                 : gen_aarch64_sve_zip1<mode>)
8344                 (temp, operands[1], operands[1]));
8345     rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
8346     rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
8347     emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
8348                (operands[0], ptrue, temp, strictness));
8349     DONE;
8350   }
8351 )
8352
8353 ;; Predicated float-to-float extension.
8354 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8355   [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
8356         (unspec:SVE_FULL_SDF
8357           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl")
8358            (match_operand:SI 3 "aarch64_sve_gp_strictness")
8359            (match_operand:SVE_FULL_HSF 2 "register_operand" "w")]
8360           SVE_COND_FCVT))]
8361   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8362   "fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
8363 )
8364
8365 ;; Predicated float-to-float extension with merging.
8366 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8367   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
8368         (unspec:SVE_FULL_SDF
8369           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
8370            (unspec:SVE_FULL_SDF
8371              [(match_dup 1)
8372               (const_int SVE_STRICT_GP)
8373               (match_operand:SVE_FULL_HSF 2 "register_operand")]
8374              SVE_COND_FCVT)
8375            (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
8376           UNSPEC_SEL))]
8377   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8378 )
8379
8380 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
8381   [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w, ?&w, ?&w")
8382         (unspec:SVE_FULL_SDF
8383           [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand" "Upl, Upl, Upl")
8384            (unspec:SVE_FULL_SDF
8385              [(match_dup 1)
8386               (match_operand:SI 4 "aarch64_sve_gp_strictness")
8387               (match_operand:SVE_FULL_HSF 2 "register_operand" "w, w, w")]
8388              SVE_COND_FCVT)
8389            (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero" "0, Dz, w")]
8390           UNSPEC_SEL))]
8391   "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
8392   "@
8393    fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
8394    movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
8395    movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>"
8396   [(set_attr "movprfx" "*,yes,yes")]
8397 )
8398
8399 ;; -------------------------------------------------------------------------
8400 ;; ---- [PRED<-PRED] Packs
8401 ;; -------------------------------------------------------------------------
8402 ;; Includes:
8403 ;; - UZP1
8404 ;; -------------------------------------------------------------------------
8405
8406 ;; Predicate pack.  Use UZP1 on the narrower type, which discards
8407 ;; the high part of each wide element.
8408 (define_insn "vec_pack_trunc_<Vwide>"
8409   [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
8410         (unspec:PRED_BHS
8411           [(match_operand:<VWIDE> 1 "register_operand" "Upa")
8412            (match_operand:<VWIDE> 2 "register_operand" "Upa")]
8413           UNSPEC_PACK))]
8414   "TARGET_SVE"
8415   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
8416 )
8417
8418 ;; -------------------------------------------------------------------------
8419 ;; ---- [PRED<-PRED] Unpacks
8420 ;; -------------------------------------------------------------------------
8421 ;; Includes:
8422 ;; - PUNPKHI
8423 ;; - PUNPKLO
8424 ;; -------------------------------------------------------------------------
8425
8426 ;; Unpack the low or high half of a predicate, where "high" refers to
8427 ;; the low-numbered lanes for big-endian and the high-numbered lanes
8428 ;; for little-endian.
8429 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
8430   [(match_operand:<VWIDE> 0 "register_operand")
8431    (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
8432                    UNPACK)]
8433   "TARGET_SVE"
8434   {
8435     emit_insn ((<hi_lanes_optab>
8436                 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
8437                 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
8438                (operands[0], operands[1]));
8439     DONE;
8440   }
8441 )
8442
8443 (define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
8444   [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
8445         (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
8446                         UNPACK_UNSIGNED))]
8447   "TARGET_SVE"
8448   "punpk<perm_hilo>\t%0.h, %1.b"
8449 )
8450
8451 ;; =========================================================================
8452 ;; == Vector partitioning
8453 ;; =========================================================================
8454
8455 ;; -------------------------------------------------------------------------
8456 ;; ---- [PRED] Unary partitioning
8457 ;; -------------------------------------------------------------------------
8458 ;; Includes:
8459 ;; - BRKA
8460 ;; - BRKAS
8461 ;; - BRKB
8462 ;; - BRKBS
8463 ;; -------------------------------------------------------------------------
8464
8465 ;; Note that unlike most other instructions that have both merging and
8466 ;; zeroing forms, these instructions don't operate elementwise and so
8467 ;; don't fit the IFN_COND model.
8468 (define_insn "@aarch64_brk<brk_op>"
8469   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
8470         (unspec:VNx16BI
8471           [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8472            (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8473            (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8474           SVE_BRK_UNARY))]
8475   "TARGET_SVE"
8476   "@
8477    brk<brk_op>\t%0.b, %1/z, %2.b
8478    brk<brk_op>\t%0.b, %1/m, %2.b"
8479 )
8480
8481 ;; Same, but also producing a flags result.
8482 (define_insn "*aarch64_brk<brk_op>_cc"
8483   [(set (reg:CC_NZC CC_REGNUM)
8484         (unspec:CC_NZC
8485           [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8486            (match_dup 1)
8487            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8488            (unspec:VNx16BI
8489              [(match_dup 1)
8490               (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8491               (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8492              SVE_BRK_UNARY)]
8493           UNSPEC_PTEST))
8494    (set (match_operand:VNx16BI 0 "register_operand" "=Upa, Upa")
8495         (unspec:VNx16BI
8496           [(match_dup 1)
8497            (match_dup 2)
8498            (match_dup 3)]
8499           SVE_BRK_UNARY))]
8500   "TARGET_SVE"
8501   "@
8502    brk<brk_op>s\t%0.b, %1/z, %2.b
8503    brk<brk_op>s\t%0.b, %1/m, %2.b"
8504 )
8505
8506 ;; Same, but with only the flags result being interesting.
8507 (define_insn "*aarch64_brk<brk_op>_ptest"
8508   [(set (reg:CC_NZC CC_REGNUM)
8509         (unspec:CC_NZC
8510           [(match_operand:VNx16BI 1 "register_operand" "Upa, Upa")
8511            (match_dup 1)
8512            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8513            (unspec:VNx16BI
8514              [(match_dup 1)
8515               (match_operand:VNx16BI 2 "register_operand" "Upa, Upa")
8516               (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero" "Dz, 0")]
8517              SVE_BRK_UNARY)]
8518           UNSPEC_PTEST))
8519    (clobber (match_scratch:VNx16BI 0 "=Upa, Upa"))]
8520   "TARGET_SVE"
8521   "@
8522    brk<brk_op>s\t%0.b, %1/z, %2.b
8523    brk<brk_op>s\t%0.b, %1/m, %2.b"
8524 )
8525
8526 ;; -------------------------------------------------------------------------
8527 ;; ---- [PRED] Binary partitioning
8528 ;; -------------------------------------------------------------------------
8529 ;; Includes:
8530 ;; - BRKN
8531 ;; - BRKNS
8532 ;; - BRKPA
8533 ;; - BRKPAS
8534 ;; - BRKPB
8535 ;; - BRKPBS
8536 ;; -------------------------------------------------------------------------
8537
8538 ;; Binary BRKs (BRKN, BRKPA, BRKPB).
8539 (define_insn "@aarch64_brk<brk_op>"
8540   [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8541         (unspec:VNx16BI
8542           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8543            (match_operand:VNx16BI 2 "register_operand" "Upa")
8544            (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8545           SVE_BRK_BINARY))]
8546   "TARGET_SVE"
8547   "brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8548 )
8549
8550 ;; Same, but also producing a flags result.
8551 (define_insn "*aarch64_brk<brk_op>_cc"
8552   [(set (reg:CC_NZC CC_REGNUM)
8553         (unspec:CC_NZC
8554           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8555            (match_dup 1)
8556            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8557            (unspec:VNx16BI
8558              [(match_dup 1)
8559               (match_operand:VNx16BI 2 "register_operand" "Upa")
8560               (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8561              SVE_BRK_BINARY)]
8562           UNSPEC_PTEST))
8563    (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
8564         (unspec:VNx16BI
8565           [(match_dup 1)
8566            (match_dup 2)
8567            (match_dup 3)]
8568           SVE_BRK_BINARY))]
8569   "TARGET_SVE"
8570   "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8571 )
8572
8573 ;; Same, but with only the flags result being interesting.
8574 (define_insn "*aarch64_brk<brk_op>_ptest"
8575   [(set (reg:CC_NZC CC_REGNUM)
8576         (unspec:CC_NZC
8577           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8578            (match_dup 1)
8579            (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8580            (unspec:VNx16BI
8581              [(match_dup 1)
8582               (match_operand:VNx16BI 2 "register_operand" "Upa")
8583               (match_operand:VNx16BI 3 "register_operand" "<brk_reg_con>")]
8584              SVE_BRK_BINARY)]
8585           UNSPEC_PTEST))
8586    (clobber (match_scratch:VNx16BI 0 "=Upa"))]
8587   "TARGET_SVE"
8588   "brk<brk_op>s\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b"
8589 )
8590
8591 ;; -------------------------------------------------------------------------
8592 ;; ---- [PRED] Scalarization
8593 ;; -------------------------------------------------------------------------
8594 ;; Includes:
8595 ;; - PFIRST
8596 ;; - PNEXT
8597 ;; -------------------------------------------------------------------------
8598
8599 (define_insn "@aarch64_sve_<sve_pred_op><mode>"
8600   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8601         (unspec:PRED_ALL
8602           [(match_operand:PRED_ALL 1 "register_operand" "Upa")
8603            (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8604            (match_operand:PRED_ALL 3 "register_operand" "0")]
8605           SVE_PITER))
8606    (clobber (reg:CC_NZC CC_REGNUM))]
8607   "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
8608   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8609 )
8610
8611 ;; Same, but also producing a flags result.
8612 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
8613   [(set (reg:CC_NZC CC_REGNUM)
8614         (unspec:CC_NZC
8615           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8616            (match_operand 2)
8617            (match_operand:SI 3 "aarch64_sve_ptrue_flag")
8618            (unspec:PRED_ALL
8619              [(match_operand 4)
8620               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8621               (match_operand:PRED_ALL 6 "register_operand" "0")]
8622              SVE_PITER)]
8623           UNSPEC_PTEST))
8624    (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8625         (unspec:PRED_ALL
8626           [(match_dup 4)
8627            (match_dup 5)
8628            (match_dup 6)]
8629           SVE_PITER))]
8630   "TARGET_SVE
8631    && <max_elem_bits> >= <elem_bits>
8632    && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
8633   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8634   "&& !rtx_equal_p (operands[2], operands[4])"
8635   {
8636     operands[4] = operands[2];
8637     operands[5] = operands[3];
8638   }
8639 )
8640
8641 ;; Same, but with only the flags result being interesting.
8642 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
8643   [(set (reg:CC_NZC CC_REGNUM)
8644         (unspec:CC_NZC
8645           [(match_operand:VNx16BI 1 "register_operand" "Upa")
8646            (match_operand 2)
8647            (match_operand:SI 3 "aarch64_sve_ptrue_flag")
8648            (unspec:PRED_ALL
8649              [(match_operand 4)
8650               (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8651               (match_operand:PRED_ALL 6 "register_operand" "0")]
8652              SVE_PITER)]
8653           UNSPEC_PTEST))
8654    (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
8655   "TARGET_SVE
8656    && <max_elem_bits> >= <elem_bits>
8657    && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
8658   "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
8659   "&& !rtx_equal_p (operands[2], operands[4])"
8660   {
8661     operands[4] = operands[2];
8662     operands[5] = operands[3];
8663   }
8664 )
8665
8666 ;; =========================================================================
8667 ;; == Counting elements
8668 ;; =========================================================================
8669
8670 ;; -------------------------------------------------------------------------
8671 ;; ---- [INT] Count elements in a pattern (scalar)
8672 ;; -------------------------------------------------------------------------
8673 ;; Includes:
8674 ;; - CNTB
8675 ;; - CNTD
8676 ;; - CNTH
8677 ;; - CNTW
8678 ;; -------------------------------------------------------------------------
8679
8680 ;; Count the number of elements in an svpattern.  Operand 1 is the pattern,
8681 ;; operand 2 is the number of elements that fit in a 128-bit block, and
8682 ;; operand 3 is a multiplier in the range [1, 16].
8683 ;;
8684 ;; Note that this pattern isn't used for SV_ALL (but would work for that too).
8685 (define_insn "aarch64_sve_cnt_pat"
8686   [(set (match_operand:DI 0 "register_operand" "=r")
8687         (zero_extend:DI
8688           (unspec:SI [(match_operand:DI 1 "const_int_operand")
8689                       (match_operand:DI 2 "const_int_operand")
8690                       (match_operand:DI 3 "const_int_operand")]
8691                      UNSPEC_SVE_CNT_PAT)))]
8692   "TARGET_SVE"
8693   {
8694     return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
8695   }
8696 )
8697
8698 ;; -------------------------------------------------------------------------
8699 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
8700 ;; -------------------------------------------------------------------------
8701 ;; Includes:
8702 ;; - INC
8703 ;; - SQINC
8704 ;; - UQINC
8705 ;; -------------------------------------------------------------------------
8706
8707 ;; Increment a DImode register by the number of elements in an svpattern.
8708 ;; See aarch64_sve_cnt_pat for the counting behavior.
8709 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8710   [(set (match_operand:DI 0 "register_operand" "=r")
8711         (ANY_PLUS:DI (zero_extend:DI
8712                        (unspec:SI [(match_operand:DI 2 "const_int_operand")
8713                                    (match_operand:DI 3 "const_int_operand")
8714                                    (match_operand:DI 4 "const_int_operand")]
8715                                   UNSPEC_SVE_CNT_PAT))
8716                      (match_operand:DI_ONLY 1 "register_operand" "0")))]
8717   "TARGET_SVE"
8718   {
8719     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
8720                                                  operands + 2);
8721   }
8722 )
8723
8724 ;; Increment an SImode register by the number of elements in an svpattern
8725 ;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
8726 ;; behavior.
8727 (define_insn "*aarch64_sve_incsi_pat"
8728   [(set (match_operand:SI 0 "register_operand" "=r")
8729         (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
8730                              (match_operand:DI 3 "const_int_operand")
8731                              (match_operand:DI 4 "const_int_operand")]
8732                             UNSPEC_SVE_CNT_PAT)
8733                  (match_operand:SI 1 "register_operand" "0")))]
8734   "TARGET_SVE"
8735   {
8736     return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
8737   }
8738 )
8739
8740 ;; Increment an SImode register by the number of elements in an svpattern
8741 ;; using saturating arithmetic, extending the result to 64 bits.
8742 ;;
8743 ;; See aarch64_sve_cnt_pat for the counting behavior.
8744 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8745   [(set (match_operand:DI 0 "register_operand" "=r")
8746         (<paired_extend>:DI
8747           (SAT_PLUS:SI
8748             (unspec:SI [(match_operand:DI 2 "const_int_operand")
8749                         (match_operand:DI 3 "const_int_operand")
8750                         (match_operand:DI 4 "const_int_operand")]
8751                        UNSPEC_SVE_CNT_PAT)
8752             (match_operand:SI_ONLY 1 "register_operand" "0"))))]
8753   "TARGET_SVE"
8754   {
8755     const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
8756     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
8757                                                  operands + 2);
8758   }
8759 )
8760
8761 ;; -------------------------------------------------------------------------
8762 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
8763 ;; -------------------------------------------------------------------------
8764 ;; Includes:
8765 ;; - INC
8766 ;; - SQINC
8767 ;; - UQINC
8768 ;; -------------------------------------------------------------------------
8769
8770 ;; Increment a vector of DIs by the number of elements in an svpattern.
8771 ;; See aarch64_sve_cnt_pat for the counting behavior.
8772 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8773   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8774         (ANY_PLUS:VNx2DI
8775           (vec_duplicate:VNx2DI
8776             (zero_extend:DI
8777               (unspec:SI [(match_operand:DI 2 "const_int_operand")
8778                           (match_operand:DI 3 "const_int_operand")
8779                           (match_operand:DI 4 "const_int_operand")]
8780                          UNSPEC_SVE_CNT_PAT)))
8781           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
8782   "TARGET_SVE"
8783   {
8784     if (which_alternative == 1)
8785       output_asm_insn ("movprfx\t%0, %1", operands);
8786     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8787                                                  operands + 2);
8788   }
8789   [(set_attr "movprfx" "*,yes")]
8790 )
8791
8792 ;; Increment a vector of SIs by the number of elements in an svpattern.
8793 ;; See aarch64_sve_cnt_pat for the counting behavior.
8794 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8795   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8796         (ANY_PLUS:VNx4SI
8797           (vec_duplicate:VNx4SI
8798             (unspec:SI [(match_operand:DI 2 "const_int_operand")
8799                         (match_operand:DI 3 "const_int_operand")
8800                         (match_operand:DI 4 "const_int_operand")]
8801                        UNSPEC_SVE_CNT_PAT))
8802           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
8803   "TARGET_SVE"
8804   {
8805     if (which_alternative == 1)
8806       output_asm_insn ("movprfx\t%0, %1", operands);
8807     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8808                                                  operands + 2);
8809   }
8810   [(set_attr "movprfx" "*,yes")]
8811 )
8812
8813 ;; Increment a vector of HIs by the number of elements in an svpattern.
8814 ;; See aarch64_sve_cnt_pat for the counting behavior.
8815 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
8816   [(set (match_operand:VNx8HI 0 "register_operand")
8817         (ANY_PLUS:VNx8HI
8818           (vec_duplicate:VNx8HI
8819             (truncate:HI
8820               (unspec:SI [(match_operand:DI 2 "const_int_operand")
8821                           (match_operand:DI 3 "const_int_operand")
8822                           (match_operand:DI 4 "const_int_operand")]
8823                          UNSPEC_SVE_CNT_PAT)))
8824           (match_operand:VNx8HI_ONLY 1 "register_operand")))]
8825   "TARGET_SVE"
8826 )
8827
8828 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
8829   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8830         (ANY_PLUS:VNx8HI
8831           (vec_duplicate:VNx8HI
8832             (match_operator:HI 5 "subreg_lowpart_operator"
8833               [(unspec:SI [(match_operand:DI 2 "const_int_operand")
8834                            (match_operand:DI 3 "const_int_operand")
8835                            (match_operand:DI 4 "const_int_operand")]
8836                           UNSPEC_SVE_CNT_PAT)]))
8837           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
8838   "TARGET_SVE"
8839   {
8840     if (which_alternative == 1)
8841       output_asm_insn ("movprfx\t%0, %1", operands);
8842     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8843                                                  operands + 2);
8844   }
8845   [(set_attr "movprfx" "*,yes")]
8846 )
8847
8848 ;; -------------------------------------------------------------------------
8849 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
8850 ;; -------------------------------------------------------------------------
8851 ;; Includes:
8852 ;; - DEC
8853 ;; - SQDEC
8854 ;; - UQDEC
8855 ;; -------------------------------------------------------------------------
8856
8857 ;; Decrement a DImode register by the number of elements in an svpattern.
8858 ;; See aarch64_sve_cnt_pat for the counting behavior.
8859 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8860   [(set (match_operand:DI 0 "register_operand" "=r")
8861         (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
8862                       (zero_extend:DI
8863                         (unspec:SI [(match_operand:DI 2 "const_int_operand")
8864                                     (match_operand:DI 3 "const_int_operand")
8865                                     (match_operand:DI 4 "const_int_operand")]
8866                                    UNSPEC_SVE_CNT_PAT))))]
8867   "TARGET_SVE"
8868   {
8869     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
8870                                                  operands + 2);
8871   }
8872 )
8873
8874 ;; Decrement an SImode register by the number of elements in an svpattern
8875 ;; using modular arithmetic.  See aarch64_sve_cnt_pat for the counting
8876 ;; behavior.
8877 (define_insn "*aarch64_sve_decsi_pat"
8878   [(set (match_operand:SI 0 "register_operand" "=r")
8879         (minus:SI (match_operand:SI 1 "register_operand" "0")
8880                   (unspec:SI [(match_operand:DI 2 "const_int_operand")
8881                               (match_operand:DI 3 "const_int_operand")
8882                               (match_operand:DI 4 "const_int_operand")]
8883                              UNSPEC_SVE_CNT_PAT)))]
8884   "TARGET_SVE"
8885   {
8886     return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
8887   }
8888 )
8889
8890 ;; Decrement an SImode register by the number of elements in an svpattern
8891 ;; using saturating arithmetic, extending the result to 64 bits.
8892 ;;
8893 ;; See aarch64_sve_cnt_pat for the counting behavior.
8894 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8895   [(set (match_operand:DI 0 "register_operand" "=r")
8896         (<paired_extend>:DI
8897           (SAT_MINUS:SI
8898             (match_operand:SI_ONLY 1 "register_operand" "0")
8899             (unspec:SI [(match_operand:DI 2 "const_int_operand")
8900                         (match_operand:DI 3 "const_int_operand")
8901                         (match_operand:DI 4 "const_int_operand")]
8902                        UNSPEC_SVE_CNT_PAT))))]
8903   "TARGET_SVE"
8904   {
8905     const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
8906     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
8907                                                  operands + 2);
8908   }
8909 )
8910
8911 ;; -------------------------------------------------------------------------
8912 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
8913 ;; -------------------------------------------------------------------------
8914 ;; Includes:
8915 ;; - DEC
8916 ;; - SQDEC
8917 ;; - UQDEC
8918 ;; -------------------------------------------------------------------------
8919
8920 ;; Decrement a vector of DIs by the number of elements in an svpattern.
8921 ;; See aarch64_sve_cnt_pat for the counting behavior.
8922 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8923   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
8924         (ANY_MINUS:VNx2DI
8925           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
8926           (vec_duplicate:VNx2DI
8927             (zero_extend:DI
8928               (unspec:SI [(match_operand:DI 2 "const_int_operand")
8929                           (match_operand:DI 3 "const_int_operand")
8930                           (match_operand:DI 4 "const_int_operand")]
8931                          UNSPEC_SVE_CNT_PAT)))))]
8932   "TARGET_SVE"
8933   {
8934     if (which_alternative == 1)
8935       output_asm_insn ("movprfx\t%0, %1", operands);
8936     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8937                                                  operands + 2);
8938   }
8939   [(set_attr "movprfx" "*,yes")]
8940 )
8941
8942 ;; Decrement a vector of SIs by the number of elements in an svpattern.
8943 ;; See aarch64_sve_cnt_pat for the counting behavior.
8944 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
8945   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
8946         (ANY_MINUS:VNx4SI
8947           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
8948           (vec_duplicate:VNx4SI
8949             (unspec:SI [(match_operand:DI 2 "const_int_operand")
8950                         (match_operand:DI 3 "const_int_operand")
8951                         (match_operand:DI 4 "const_int_operand")]
8952                        UNSPEC_SVE_CNT_PAT))))]
8953   "TARGET_SVE"
8954   {
8955     if (which_alternative == 1)
8956       output_asm_insn ("movprfx\t%0, %1", operands);
8957     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8958                                                  operands + 2);
8959   }
8960   [(set_attr "movprfx" "*,yes")]
8961 )
8962
8963 ;; Decrement a vector of HIs by the number of elements in an svpattern.
8964 ;; See aarch64_sve_cnt_pat for the counting behavior.
8965 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
8966   [(set (match_operand:VNx8HI 0 "register_operand")
8967         (ANY_MINUS:VNx8HI
8968           (match_operand:VNx8HI_ONLY 1 "register_operand")
8969           (vec_duplicate:VNx8HI
8970             (truncate:HI
8971               (unspec:SI [(match_operand:DI 2 "const_int_operand")
8972                           (match_operand:DI 3 "const_int_operand")
8973                           (match_operand:DI 4 "const_int_operand")]
8974                          UNSPEC_SVE_CNT_PAT)))))]
8975   "TARGET_SVE"
8976 )
8977
8978 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
8979   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
8980         (ANY_MINUS:VNx8HI
8981           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
8982           (vec_duplicate:VNx8HI
8983             (match_operator:HI 5 "subreg_lowpart_operator"
8984               [(unspec:SI [(match_operand:DI 2 "const_int_operand")
8985                            (match_operand:DI 3 "const_int_operand")
8986                            (match_operand:DI 4 "const_int_operand")]
8987                           UNSPEC_SVE_CNT_PAT)]))))]
8988   "TARGET_SVE"
8989   {
8990     if (which_alternative == 1)
8991       output_asm_insn ("movprfx\t%0, %1", operands);
8992     return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
8993                                                  operands + 2);
8994   }
8995   [(set_attr "movprfx" "*,yes")]
8996 )
8997
8998 ;; -------------------------------------------------------------------------
8999 ;; ---- [INT] Count elements in a predicate (scalar)
9000 ;; -------------------------------------------------------------------------
9001 ;; Includes:
9002 ;; - CNTP
9003 ;; -------------------------------------------------------------------------
9004
9005 ;; Count the number of set bits in a predicate.  Operand 3 is true if
9006 ;; operand 1 is known to be all-true.
9007 (define_insn "@aarch64_pred_cntp<mode>"
9008   [(set (match_operand:DI 0 "register_operand" "=r")
9009         (zero_extend:DI
9010           (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
9011                       (match_operand:SI 2 "aarch64_sve_ptrue_flag")
9012                       (match_operand:PRED_ALL 3 "register_operand" "Upa")]
9013                      UNSPEC_CNTP)))]
9014   "TARGET_SVE"
9015   "cntp\t%x0, %1, %3.<Vetype>")
9016
9017 ;; -------------------------------------------------------------------------
9018 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
9019 ;; -------------------------------------------------------------------------
9020 ;; Includes:
9021 ;; - INCP
9022 ;; - SQINCP
9023 ;; - UQINCP
9024 ;; -------------------------------------------------------------------------
9025
9026 ;; Increment a DImode register by the number of set bits in a predicate.
9027 ;; See aarch64_sve_cntp for a description of the operands.
9028 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
9029   [(set (match_operand:DI 0 "register_operand")
9030         (ANY_PLUS:DI
9031           (zero_extend:DI
9032             (unspec:SI [(match_dup 3)
9033                         (const_int SVE_KNOWN_PTRUE)
9034                         (match_operand:PRED_ALL 2 "register_operand")]
9035                        UNSPEC_CNTP))
9036           (match_operand:DI_ONLY 1 "register_operand")))]
9037   "TARGET_SVE"
9038   {
9039     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9040   }
9041 )
9042
9043 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
9044   [(set (match_operand:DI 0 "register_operand" "=r")
9045         (ANY_PLUS:DI
9046           (zero_extend:DI
9047             (unspec:SI [(match_operand 3)
9048                         (const_int SVE_KNOWN_PTRUE)
9049                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9050                        UNSPEC_CNTP))
9051           (match_operand:DI_ONLY 1 "register_operand" "0")))]
9052   "TARGET_SVE"
9053   "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
9054   "&& !CONSTANT_P (operands[3])"
9055   {
9056     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9057   }
9058 )
9059
9060 ;; Increment an SImode register by the number of set bits in a predicate
9061 ;; using modular arithmetic.  See aarch64_sve_cntp for a description of
9062 ;; the operands.
9063 (define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
9064   [(set (match_operand:SI 0 "register_operand" "=r")
9065         (plus:SI
9066           (unspec:SI [(match_operand 3)
9067                       (const_int SVE_KNOWN_PTRUE)
9068                       (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9069                      UNSPEC_CNTP)
9070           (match_operand:SI 1 "register_operand" "0")))]
9071   "TARGET_SVE"
9072   "incp\t%x0, %2.<Vetype>"
9073   "&& !CONSTANT_P (operands[3])"
9074   {
9075     operands[3] = CONSTM1_RTX (<MODE>mode);
9076   }
9077 )
9078
9079 ;; Increment an SImode register by the number of set bits in a predicate
9080 ;; using saturating arithmetic, extending the result to 64 bits.
9081 ;;
9082 ;; See aarch64_sve_cntp for a description of the operands.
9083 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9084   [(set (match_operand:DI 0 "register_operand")
9085         (<paired_extend>:DI
9086           (SAT_PLUS:SI
9087             (unspec:SI [(match_dup 3)
9088                         (const_int SVE_KNOWN_PTRUE)
9089                         (match_operand:PRED_ALL 2 "register_operand")]
9090                        UNSPEC_CNTP)
9091             (match_operand:SI_ONLY 1 "register_operand"))))]
9092   "TARGET_SVE"
9093   {
9094     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9095   }
9096 )
9097
9098 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9099   [(set (match_operand:DI 0 "register_operand" "=r")
9100         (<paired_extend>:DI
9101           (SAT_PLUS:SI
9102             (unspec:SI [(match_operand 3)
9103                         (const_int SVE_KNOWN_PTRUE)
9104                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9105                        UNSPEC_CNTP)
9106             (match_operand:SI_ONLY 1 "register_operand" "0"))))]
9107   "TARGET_SVE"
9108   {
9109     if (<CODE> == SS_PLUS)
9110       return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
9111     else
9112       return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
9113   }
9114   "&& !CONSTANT_P (operands[3])"
9115   {
9116     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9117   }
9118 )
9119
9120 ;; -------------------------------------------------------------------------
9121 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
9122 ;; -------------------------------------------------------------------------
9123 ;; Includes:
9124 ;; - INCP
9125 ;; - SQINCP
9126 ;; - UQINCP
9127 ;; -------------------------------------------------------------------------
9128
9129 ;; Increment a vector of DIs by the number of set bits in a predicate.
9130 ;; See aarch64_sve_cntp for a description of the operands.
9131 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9132   [(set (match_operand:VNx2DI 0 "register_operand")
9133         (ANY_PLUS:VNx2DI
9134           (vec_duplicate:VNx2DI
9135             (zero_extend:DI
9136               (unspec:SI
9137                 [(match_dup 3)
9138                  (const_int SVE_KNOWN_PTRUE)
9139                  (match_operand:<VPRED> 2 "register_operand")]
9140                 UNSPEC_CNTP)))
9141           (match_operand:VNx2DI_ONLY 1 "register_operand")))]
9142   "TARGET_SVE"
9143   {
9144     operands[3] = CONSTM1_RTX (<VPRED>mode);
9145   }
9146 )
9147
9148 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9149   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9150         (ANY_PLUS:VNx2DI
9151           (vec_duplicate:VNx2DI
9152             (zero_extend:DI
9153               (unspec:SI
9154                 [(match_operand 3)
9155                  (const_int SVE_KNOWN_PTRUE)
9156                  (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9157                 UNSPEC_CNTP)))
9158           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
9159   "TARGET_SVE"
9160   "@
9161    <inc_dec>p\t%0.d, %2
9162    movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
9163   "&& !CONSTANT_P (operands[3])"
9164   {
9165     operands[3] = CONSTM1_RTX (<VPRED>mode);
9166   }
9167   [(set_attr "movprfx" "*,yes")]
9168 )
9169
9170 ;; Increment a vector of SIs by the number of set bits in a predicate.
9171 ;; See aarch64_sve_cntp for a description of the operands.
9172 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9173   [(set (match_operand:VNx4SI 0 "register_operand")
9174         (ANY_PLUS:VNx4SI
9175           (vec_duplicate:VNx4SI
9176             (unspec:SI
9177               [(match_dup 3)
9178                (const_int SVE_KNOWN_PTRUE)
9179                (match_operand:<VPRED> 2 "register_operand")]
9180               UNSPEC_CNTP))
9181           (match_operand:VNx4SI_ONLY 1 "register_operand")))]
9182   "TARGET_SVE"
9183   {
9184     operands[3] = CONSTM1_RTX (<VPRED>mode);
9185   }
9186 )
9187
9188 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9189   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9190         (ANY_PLUS:VNx4SI
9191           (vec_duplicate:VNx4SI
9192             (unspec:SI
9193               [(match_operand 3)
9194                (const_int SVE_KNOWN_PTRUE)
9195                (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9196               UNSPEC_CNTP))
9197           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
9198   "TARGET_SVE"
9199   "@
9200    <inc_dec>p\t%0.s, %2
9201    movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
9202   "&& !CONSTANT_P (operands[3])"
9203   {
9204     operands[3] = CONSTM1_RTX (<VPRED>mode);
9205   }
9206   [(set_attr "movprfx" "*,yes")]
9207 )
9208
9209 ;; Increment a vector of HIs by the number of set bits in a predicate.
9210 ;; See aarch64_sve_cntp for a description of the operands.
9211 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9212   [(set (match_operand:VNx8HI 0 "register_operand")
9213         (ANY_PLUS:VNx8HI
9214           (vec_duplicate:VNx8HI
9215             (truncate:HI
9216               (unspec:SI
9217                 [(match_dup 3)
9218                  (const_int SVE_KNOWN_PTRUE)
9219                  (match_operand:<VPRED> 2 "register_operand")]
9220                 UNSPEC_CNTP)))
9221           (match_operand:VNx8HI_ONLY 1 "register_operand")))]
9222   "TARGET_SVE"
9223   {
9224     operands[3] = CONSTM1_RTX (<VPRED>mode);
9225   }
9226 )
9227
9228 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9229   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
9230         (ANY_PLUS:VNx8HI
9231           (vec_duplicate:VNx8HI
9232             (match_operator:HI 3 "subreg_lowpart_operator"
9233               [(unspec:SI
9234                  [(match_operand 4)
9235                   (const_int SVE_KNOWN_PTRUE)
9236                   (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9237                  UNSPEC_CNTP)]))
9238           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
9239   "TARGET_SVE"
9240   "@
9241    <inc_dec>p\t%0.h, %2
9242    movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
9243   "&& !CONSTANT_P (operands[4])"
9244   {
9245     operands[4] = CONSTM1_RTX (<VPRED>mode);
9246   }
9247   [(set_attr "movprfx" "*,yes")]
9248 )
9249
9250 ;; -------------------------------------------------------------------------
9251 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
9252 ;; -------------------------------------------------------------------------
9253 ;; Includes:
9254 ;; - DECP
9255 ;; - SQDECP
9256 ;; - UQDECP
9257 ;; -------------------------------------------------------------------------
9258
9259 ;; Decrement a DImode register by the number of set bits in a predicate.
9260 ;; See aarch64_sve_cntp for a description of the operands.
9261 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
9262   [(set (match_operand:DI 0 "register_operand")
9263         (ANY_MINUS:DI
9264           (match_operand:DI_ONLY 1 "register_operand")
9265           (zero_extend:DI
9266             (unspec:SI [(match_dup 3)
9267                         (const_int SVE_KNOWN_PTRUE)
9268                         (match_operand:PRED_ALL 2 "register_operand")]
9269                        UNSPEC_CNTP))))]
9270   "TARGET_SVE"
9271   {
9272     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9273   }
9274 )
9275
9276 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
9277   [(set (match_operand:DI 0 "register_operand" "=r")
9278         (ANY_MINUS:DI
9279           (match_operand:DI_ONLY 1 "register_operand" "0")
9280           (zero_extend:DI
9281             (unspec:SI [(match_operand 3)
9282                         (const_int SVE_KNOWN_PTRUE)
9283                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9284                        UNSPEC_CNTP))))]
9285   "TARGET_SVE"
9286   "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
9287   "&& !CONSTANT_P (operands[3])"
9288   {
9289     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9290   }
9291 )
9292
9293 ;; Decrement an SImode register by the number of set bits in a predicate
9294 ;; using modular arithmetic.  See aarch64_sve_cntp for a description of the
9295 ;; operands.
9296 (define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
9297   [(set (match_operand:SI 0 "register_operand" "=r")
9298         (minus:SI
9299           (match_operand:SI 1 "register_operand" "0")
9300           (unspec:SI [(match_operand 3)
9301                       (const_int SVE_KNOWN_PTRUE)
9302                       (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9303                      UNSPEC_CNTP)))]
9304   "TARGET_SVE"
9305   "decp\t%x0, %2.<Vetype>"
9306   "&& !CONSTANT_P (operands[3])"
9307   {
9308     operands[3] = CONSTM1_RTX (<MODE>mode);
9309   }
9310 )
9311
9312 ;; Decrement an SImode register by the number of set bits in a predicate
9313 ;; using saturating arithmetic, extending the result to 64 bits.
9314 ;;
9315 ;; See aarch64_sve_cntp for a description of the operands.
9316 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9317   [(set (match_operand:DI 0 "register_operand")
9318         (<paired_extend>:DI
9319           (SAT_MINUS:SI
9320             (match_operand:SI_ONLY 1 "register_operand")
9321             (unspec:SI [(match_dup 3)
9322                         (const_int SVE_KNOWN_PTRUE)
9323                         (match_operand:PRED_ALL 2 "register_operand")]
9324                        UNSPEC_CNTP))))]
9325   "TARGET_SVE"
9326   {
9327     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9328   }
9329 )
9330
9331 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
9332   [(set (match_operand:DI 0 "register_operand" "=r")
9333         (<paired_extend>:DI
9334           (SAT_MINUS:SI
9335             (match_operand:SI_ONLY 1 "register_operand" "0")
9336             (unspec:SI [(match_operand 3)
9337                         (const_int SVE_KNOWN_PTRUE)
9338                         (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9339                        UNSPEC_CNTP))))]
9340   "TARGET_SVE"
9341   {
9342     if (<CODE> == SS_MINUS)
9343       return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
9344     else
9345       return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
9346   }
9347   "&& !CONSTANT_P (operands[3])"
9348   {
9349     operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
9350   }
9351 )
9352
9353 ;; -------------------------------------------------------------------------
9354 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
9355 ;; -------------------------------------------------------------------------
9356 ;; Includes:
9357 ;; - DECP
9358 ;; - SQDECP
9359 ;; - UQDECP
9360 ;; -------------------------------------------------------------------------
9361
9362 ;; Decrement a vector of DIs by the number of set bits in a predicate.
9363 ;; See aarch64_sve_cntp for a description of the operands.
9364 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9365   [(set (match_operand:VNx2DI 0 "register_operand")
9366         (ANY_MINUS:VNx2DI
9367           (match_operand:VNx2DI_ONLY 1 "register_operand")
9368           (vec_duplicate:VNx2DI
9369             (zero_extend:DI
9370               (unspec:SI
9371                 [(match_dup 3)
9372                  (const_int SVE_KNOWN_PTRUE)
9373                  (match_operand:<VPRED> 2 "register_operand")]
9374                 UNSPEC_CNTP)))))]
9375   "TARGET_SVE"
9376   {
9377     operands[3] = CONSTM1_RTX (<VPRED>mode);
9378   }
9379 )
9380
9381 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9382   [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
9383         (ANY_MINUS:VNx2DI
9384           (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
9385           (vec_duplicate:VNx2DI
9386             (zero_extend:DI
9387               (unspec:SI
9388                 [(match_operand 3)
9389                  (const_int SVE_KNOWN_PTRUE)
9390                  (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9391                 UNSPEC_CNTP)))))]
9392   "TARGET_SVE"
9393   "@
9394    <inc_dec>p\t%0.d, %2
9395    movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2"
9396   "&& !CONSTANT_P (operands[3])"
9397   {
9398     operands[3] = CONSTM1_RTX (<VPRED>mode);
9399   }
9400   [(set_attr "movprfx" "*,yes")]
9401 )
9402
9403 ;; Decrement a vector of SIs by the number of set bits in a predicate.
9404 ;; See aarch64_sve_cntp for a description of the operands.
9405 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9406   [(set (match_operand:VNx4SI 0 "register_operand")
9407         (ANY_MINUS:VNx4SI
9408           (match_operand:VNx4SI_ONLY 1 "register_operand")
9409           (vec_duplicate:VNx4SI
9410             (unspec:SI
9411               [(match_dup 3)
9412                (const_int SVE_KNOWN_PTRUE)
9413                (match_operand:<VPRED> 2 "register_operand")]
9414               UNSPEC_CNTP))))]
9415   "TARGET_SVE"
9416   {
9417     operands[3] = CONSTM1_RTX (<VPRED>mode);
9418   }
9419 )
9420
9421 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9422   [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
9423         (ANY_MINUS:VNx4SI
9424           (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
9425           (vec_duplicate:VNx4SI
9426             (unspec:SI
9427               [(match_operand 3)
9428                (const_int SVE_KNOWN_PTRUE)
9429                (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9430               UNSPEC_CNTP))))]
9431   "TARGET_SVE"
9432   "@
9433    <inc_dec>p\t%0.s, %2
9434    movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2"
9435   "&& !CONSTANT_P (operands[3])"
9436   {
9437     operands[3] = CONSTM1_RTX (<VPRED>mode);
9438   }
9439   [(set_attr "movprfx" "*,yes")]
9440 )
9441
9442 ;; Decrement a vector of HIs by the number of set bits in a predicate.
9443 ;; See aarch64_sve_cntp for a description of the operands.
9444 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
9445   [(set (match_operand:VNx8HI 0 "register_operand")
9446         (ANY_MINUS:VNx8HI
9447           (match_operand:VNx8HI_ONLY 1 "register_operand")
9448           (vec_duplicate:VNx8HI
9449             (truncate:HI
9450               (unspec:SI
9451                 [(match_dup 3)
9452                  (const_int SVE_KNOWN_PTRUE)
9453                  (match_operand:<VPRED> 2 "register_operand")]
9454                 UNSPEC_CNTP)))))]
9455   "TARGET_SVE"
9456   {
9457     operands[3] = CONSTM1_RTX (<VPRED>mode);
9458   }
9459 )
9460
9461 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
9462   [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
9463         (ANY_MINUS:VNx8HI
9464           (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
9465           (vec_duplicate:VNx8HI
9466             (match_operator:HI 3 "subreg_lowpart_operator"
9467               [(unspec:SI
9468                  [(match_operand 4)
9469                   (const_int SVE_KNOWN_PTRUE)
9470                   (match_operand:<VPRED> 2 "register_operand" "Upa, Upa")]
9471                  UNSPEC_CNTP)]))))]
9472   "TARGET_SVE"
9473   "@
9474    <inc_dec>p\t%0.h, %2
9475    movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2"
9476   "&& !CONSTANT_P (operands[4])"
9477   {
9478     operands[4] = CONSTM1_RTX (<VPRED>mode);
9479   }
9480   [(set_attr "movprfx" "*,yes")]
9481 )