1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2013 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
23 (match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "aarch64_simd_nonimmediate_operand" "")
33 (match_operand:VALL 1 "aarch64_simd_general_operand" ""))]
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ 0 "register_operand" "=w, w")
46 (vec_duplicate:VDQ (match_operand:<VEL> 1 "register_operand" "r, w")))]
49 dup\\t%0.<Vtype>, %<vw>1
50 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
51 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQF 0 "register_operand" "=w")
56 (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
58 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
59 [(set_attr "type" "neon_dup<q>")]
62 (define_insn "aarch64_dup_lane<mode>"
63 [(set (match_operand:VALL 0 "register_operand" "=w")
66 (match_operand:VALL 1 "register_operand" "w")
67 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
70 "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
71 [(set_attr "type" "neon_dup<q>")]
74 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
75 [(set (match_operand:VALL 0 "register_operand" "=w")
78 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
82 "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"
83 [(set_attr "type" "neon_dup<q>")]
86 (define_insn "*aarch64_simd_mov<mode>"
87 [(set (match_operand:VD 0 "aarch64_simd_nonimmediate_operand"
88 "=w, m, w, ?r, ?w, ?r, w")
89 (match_operand:VD 1 "aarch64_simd_general_operand"
90 "m, w, w, w, r, r, Dn"))]
92 && (register_operand (operands[0], <MODE>mode)
93 || register_operand (operands[1], <MODE>mode))"
95 switch (which_alternative)
97 case 0: return "ldr\\t%d0, %1";
98 case 1: return "str\\t%d1, %0";
99 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
100 case 3: return "umov\t%0, %1.d[0]";
101 case 4: return "ins\t%0.d[0], %1";
102 case 5: return "mov\t%0, %1";
104 return aarch64_output_simd_mov_immediate (operands[1],
106 default: gcc_unreachable ();
109 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
110 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
111 mov_reg, neon_move<q>")]
114 (define_insn "*aarch64_simd_mov<mode>"
115 [(set (match_operand:VQ 0 "aarch64_simd_nonimmediate_operand"
116 "=w, m, w, ?r, ?w, ?r, w")
117 (match_operand:VQ 1 "aarch64_simd_general_operand"
118 "m, w, w, w, r, r, Dn"))]
120 && (register_operand (operands[0], <MODE>mode)
121 || register_operand (operands[1], <MODE>mode))"
123 switch (which_alternative)
126 return "ldr\\t%q0, %1";
128 return "str\\t%q1, %0";
130 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
136 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
141 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
142 neon_logic<q>, multiple, multiple, multiple,\
144 (set_attr "length" "4,4,4,8,8,8,4")]
148 [(set (match_operand:VQ 0 "register_operand" "")
149 (match_operand:VQ 1 "register_operand" ""))]
150 "TARGET_SIMD && reload_completed
151 && GP_REGNUM_P (REGNO (operands[0]))
152 && GP_REGNUM_P (REGNO (operands[1]))"
153 [(set (match_dup 0) (match_dup 1))
154 (set (match_dup 2) (match_dup 3))]
156 int rdest = REGNO (operands[0]);
157 int rsrc = REGNO (operands[1]);
160 dest[0] = gen_rtx_REG (DImode, rdest);
161 src[0] = gen_rtx_REG (DImode, rsrc);
162 dest[1] = gen_rtx_REG (DImode, rdest + 1);
163 src[1] = gen_rtx_REG (DImode, rsrc + 1);
165 aarch64_simd_disambiguate_copy (operands, dest, src, 2);
169 [(set (match_operand:VQ 0 "register_operand" "")
170 (match_operand:VQ 1 "register_operand" ""))]
171 "TARGET_SIMD && reload_completed
172 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
173 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
176 aarch64_split_simd_move (operands[0], operands[1]);
180 (define_expand "aarch64_split_simd_mov<mode>"
181 [(set (match_operand:VQ 0)
182 (match_operand:VQ 1))]
185 rtx dst = operands[0];
186 rtx src = operands[1];
188 if (GP_REGNUM_P (REGNO (src)))
190 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
191 rtx src_high_part = gen_highpart (<VHALF>mode, src);
194 (gen_move_lo_quad_<mode> (dst, src_low_part));
196 (gen_move_hi_quad_<mode> (dst, src_high_part));
201 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
202 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
203 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
204 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
207 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
209 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
215 (define_insn "aarch64_simd_mov_from_<mode>low"
216 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
218 (match_operand:VQ 1 "register_operand" "w")
219 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
220 "TARGET_SIMD && reload_completed"
222 [(set_attr "type" "neon_to_gp<q>")
223 (set_attr "length" "4")
226 (define_insn "aarch64_simd_mov_from_<mode>high"
227 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
229 (match_operand:VQ 1 "register_operand" "w")
230 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
231 "TARGET_SIMD && reload_completed"
233 [(set_attr "type" "neon_to_gp<q>")
234 (set_attr "length" "4")
237 (define_insn "orn<mode>3"
238 [(set (match_operand:VDQ 0 "register_operand" "=w")
239 (ior:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w"))
240 (match_operand:VDQ 2 "register_operand" "w")))]
242 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
243 [(set_attr "type" "neon_logic<q>")]
246 (define_insn "bic<mode>3"
247 [(set (match_operand:VDQ 0 "register_operand" "=w")
248 (and:VDQ (not:VDQ (match_operand:VDQ 1 "register_operand" "w"))
249 (match_operand:VDQ 2 "register_operand" "w")))]
251 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
252 [(set_attr "type" "neon_logic<q>")]
255 (define_insn "add<mode>3"
256 [(set (match_operand:VDQ 0 "register_operand" "=w")
257 (plus:VDQ (match_operand:VDQ 1 "register_operand" "w")
258 (match_operand:VDQ 2 "register_operand" "w")))]
260 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
261 [(set_attr "type" "neon_add<q>")]
264 (define_insn "sub<mode>3"
265 [(set (match_operand:VDQ 0 "register_operand" "=w")
266 (minus:VDQ (match_operand:VDQ 1 "register_operand" "w")
267 (match_operand:VDQ 2 "register_operand" "w")))]
269 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
270 [(set_attr "type" "neon_sub<q>")]
273 (define_insn "mul<mode>3"
274 [(set (match_operand:VDQM 0 "register_operand" "=w")
275 (mult:VDQM (match_operand:VDQM 1 "register_operand" "w")
276 (match_operand:VDQM 2 "register_operand" "w")))]
278 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
279 [(set_attr "type" "neon_mul_<Vetype><q>")]
282 (define_insn "*aarch64_mul3_elt<mode>"
283 [(set (match_operand:VMUL 0 "register_operand" "=w")
287 (match_operand:VMUL 1 "register_operand" "<h_con>")
288 (parallel [(match_operand:SI 2 "immediate_operand")])))
289 (match_operand:VMUL 3 "register_operand" "w")))]
291 "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"
292 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
295 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
296 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
297 (mult:VMUL_CHANGE_NLANES
298 (vec_duplicate:VMUL_CHANGE_NLANES
300 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
301 (parallel [(match_operand:SI 2 "immediate_operand")])))
302 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
304 "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"
305 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
308 (define_insn "*aarch64_mul3_elt_to_128df"
309 [(set (match_operand:V2DF 0 "register_operand" "=w")
312 (match_operand:DF 2 "register_operand" "w"))
313 (match_operand:V2DF 1 "register_operand" "w")))]
315 "fmul\\t%0.2d, %1.2d, %2.d[0]"
316 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
319 (define_insn "*aarch64_mul3_elt_to_64v2df"
320 [(set (match_operand:DF 0 "register_operand" "=w")
323 (match_operand:V2DF 1 "register_operand" "w")
324 (parallel [(match_operand:SI 2 "immediate_operand")]))
325 (match_operand:DF 3 "register_operand" "w")))]
327 "fmul\\t%0.2d, %3.2d, %1.d[%2]"
328 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
331 (define_insn "neg<mode>2"
332 [(set (match_operand:VDQ 0 "register_operand" "=w")
333 (neg:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
335 "neg\t%0.<Vtype>, %1.<Vtype>"
336 [(set_attr "type" "neon_neg<q>")]
339 (define_insn "abs<mode>2"
340 [(set (match_operand:VDQ 0 "register_operand" "=w")
341 (abs:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
343 "abs\t%0.<Vtype>, %1.<Vtype>"
344 [(set_attr "type" "neon_abs<q>")]
347 (define_insn "abd<mode>_3"
348 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
349 (abs:VDQ_BHSI (minus:VDQ_BHSI
350 (match_operand:VDQ_BHSI 1 "register_operand" "w")
351 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
353 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
354 [(set_attr "type" "neon_abd<q>")]
357 (define_insn "aba<mode>_3"
358 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
359 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
360 (match_operand:VDQ_BHSI 1 "register_operand" "w")
361 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
362 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
364 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
365 [(set_attr "type" "neon_arith_acc<q>")]
368 (define_insn "fabd<mode>_3"
369 [(set (match_operand:VDQF 0 "register_operand" "=w")
370 (abs:VDQF (minus:VDQF
371 (match_operand:VDQF 1 "register_operand" "w")
372 (match_operand:VDQF 2 "register_operand" "w"))))]
374 "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
375 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
378 (define_insn "*fabd_scalar<mode>3"
379 [(set (match_operand:GPF 0 "register_operand" "=w")
381 (match_operand:GPF 1 "register_operand" "w")
382 (match_operand:GPF 2 "register_operand" "w"))))]
384 "fabd\t%<s>0, %<s>1, %<s>2"
385 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
388 (define_insn "and<mode>3"
389 [(set (match_operand:VDQ 0 "register_operand" "=w")
390 (and:VDQ (match_operand:VDQ 1 "register_operand" "w")
391 (match_operand:VDQ 2 "register_operand" "w")))]
393 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
394 [(set_attr "type" "neon_logic<q>")]
397 (define_insn "ior<mode>3"
398 [(set (match_operand:VDQ 0 "register_operand" "=w")
399 (ior:VDQ (match_operand:VDQ 1 "register_operand" "w")
400 (match_operand:VDQ 2 "register_operand" "w")))]
402 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
403 [(set_attr "type" "neon_logic<q>")]
406 (define_insn "xor<mode>3"
407 [(set (match_operand:VDQ 0 "register_operand" "=w")
408 (xor:VDQ (match_operand:VDQ 1 "register_operand" "w")
409 (match_operand:VDQ 2 "register_operand" "w")))]
411 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
412 [(set_attr "type" "neon_logic<q>")]
415 (define_insn "one_cmpl<mode>2"
416 [(set (match_operand:VDQ 0 "register_operand" "=w")
417 (not:VDQ (match_operand:VDQ 1 "register_operand" "w")))]
419 "not\t%0.<Vbtype>, %1.<Vbtype>"
420 [(set_attr "type" "neon_logic<q>")]
423 (define_insn "aarch64_simd_vec_set<mode>"
424 [(set (match_operand:VQ_S 0 "register_operand" "=w,w")
427 (match_operand:<VEL> 1 "register_operand" "r,w"))
428 (match_operand:VQ_S 3 "register_operand" "0,0")
429 (match_operand:SI 2 "immediate_operand" "i,i")))]
432 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
433 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
434 switch (which_alternative)
437 return "ins\\t%0.<Vetype>[%p2], %w1";
439 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
444 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>")]
447 (define_insn "aarch64_simd_lshr<mode>"
448 [(set (match_operand:VDQ 0 "register_operand" "=w")
449 (lshiftrt:VDQ (match_operand:VDQ 1 "register_operand" "w")
450 (match_operand:VDQ 2 "aarch64_simd_rshift_imm" "Dr")))]
452 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
453 [(set_attr "type" "neon_shift_imm<q>")]
456 (define_insn "aarch64_simd_ashr<mode>"
457 [(set (match_operand:VDQ 0 "register_operand" "=w")
458 (ashiftrt:VDQ (match_operand:VDQ 1 "register_operand" "w")
459 (match_operand:VDQ 2 "aarch64_simd_rshift_imm" "Dr")))]
461 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
462 [(set_attr "type" "neon_shift_imm<q>")]
465 (define_insn "aarch64_simd_imm_shl<mode>"
466 [(set (match_operand:VDQ 0 "register_operand" "=w")
467 (ashift:VDQ (match_operand:VDQ 1 "register_operand" "w")
468 (match_operand:VDQ 2 "aarch64_simd_lshift_imm" "Dl")))]
470 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
471 [(set_attr "type" "neon_shift_imm<q>")]
474 (define_insn "aarch64_simd_reg_sshl<mode>"
475 [(set (match_operand:VDQ 0 "register_operand" "=w")
476 (ashift:VDQ (match_operand:VDQ 1 "register_operand" "w")
477 (match_operand:VDQ 2 "register_operand" "w")))]
479 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
480 [(set_attr "type" "neon_shift_reg<q>")]
483 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
484 [(set (match_operand:VDQ 0 "register_operand" "=w")
485 (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w")
486 (match_operand:VDQ 2 "register_operand" "w")]
487 UNSPEC_ASHIFT_UNSIGNED))]
489 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
490 [(set_attr "type" "neon_shift_reg<q>")]
493 (define_insn "aarch64_simd_reg_shl<mode>_signed"
494 [(set (match_operand:VDQ 0 "register_operand" "=w")
495 (unspec:VDQ [(match_operand:VDQ 1 "register_operand" "w")
496 (match_operand:VDQ 2 "register_operand" "w")]
497 UNSPEC_ASHIFT_SIGNED))]
499 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
500 [(set_attr "type" "neon_shift_reg<q>")]
503 (define_expand "ashl<mode>3"
504 [(match_operand:VDQ 0 "register_operand" "")
505 (match_operand:VDQ 1 "register_operand" "")
506 (match_operand:SI 2 "general_operand" "")]
509 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
512 if (CONST_INT_P (operands[2]))
514 shift_amount = INTVAL (operands[2]);
515 if (shift_amount >= 0 && shift_amount < bit_width)
517 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
519 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
526 operands[2] = force_reg (SImode, operands[2]);
529 else if (MEM_P (operands[2]))
531 operands[2] = force_reg (SImode, operands[2]);
534 if (REG_P (operands[2]))
536 rtx tmp = gen_reg_rtx (<MODE>mode);
537 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
538 convert_to_mode (<VEL>mode,
541 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
550 (define_expand "lshr<mode>3"
551 [(match_operand:VDQ 0 "register_operand" "")
552 (match_operand:VDQ 1 "register_operand" "")
553 (match_operand:SI 2 "general_operand" "")]
556 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
559 if (CONST_INT_P (operands[2]))
561 shift_amount = INTVAL (operands[2]);
562 if (shift_amount > 0 && shift_amount <= bit_width)
564 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
566 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
572 operands[2] = force_reg (SImode, operands[2]);
574 else if (MEM_P (operands[2]))
576 operands[2] = force_reg (SImode, operands[2]);
579 if (REG_P (operands[2]))
581 rtx tmp = gen_reg_rtx (SImode);
582 rtx tmp1 = gen_reg_rtx (<MODE>mode);
583 emit_insn (gen_negsi2 (tmp, operands[2]));
584 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
585 convert_to_mode (<VEL>mode,
587 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
597 (define_expand "ashr<mode>3"
598 [(match_operand:VDQ 0 "register_operand" "")
599 (match_operand:VDQ 1 "register_operand" "")
600 (match_operand:SI 2 "general_operand" "")]
603 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
606 if (CONST_INT_P (operands[2]))
608 shift_amount = INTVAL (operands[2]);
609 if (shift_amount > 0 && shift_amount <= bit_width)
611 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
613 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
619 operands[2] = force_reg (SImode, operands[2]);
621 else if (MEM_P (operands[2]))
623 operands[2] = force_reg (SImode, operands[2]);
626 if (REG_P (operands[2]))
628 rtx tmp = gen_reg_rtx (SImode);
629 rtx tmp1 = gen_reg_rtx (<MODE>mode);
630 emit_insn (gen_negsi2 (tmp, operands[2]));
631 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
632 convert_to_mode (<VEL>mode,
634 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
644 (define_expand "vashl<mode>3"
645 [(match_operand:VDQ 0 "register_operand" "")
646 (match_operand:VDQ 1 "register_operand" "")
647 (match_operand:VDQ 2 "register_operand" "")]
650 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
655 ;; Using mode VQ_S as there is no V2DImode neg!
656 ;; Negating individual lanes most certainly offsets the
657 ;; gain from vectorization.
658 (define_expand "vashr<mode>3"
659 [(match_operand:VQ_S 0 "register_operand" "")
660 (match_operand:VQ_S 1 "register_operand" "")
661 (match_operand:VQ_S 2 "register_operand" "")]
664 rtx neg = gen_reg_rtx (<MODE>mode);
665 emit (gen_neg<mode>2 (neg, operands[2]));
666 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
671 (define_expand "vlshr<mode>3"
672 [(match_operand:VQ_S 0 "register_operand" "")
673 (match_operand:VQ_S 1 "register_operand" "")
674 (match_operand:VQ_S 2 "register_operand" "")]
677 rtx neg = gen_reg_rtx (<MODE>mode);
678 emit (gen_neg<mode>2 (neg, operands[2]));
679 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
684 (define_expand "vec_set<mode>"
685 [(match_operand:VQ_S 0 "register_operand")
686 (match_operand:<VEL> 1 "register_operand")
687 (match_operand:SI 2 "immediate_operand")]
690 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
691 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
692 GEN_INT (elem), operands[0]));
697 (define_insn "aarch64_simd_vec_setv2di"
698 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
701 (match_operand:DI 1 "register_operand" "r,w"))
702 (match_operand:V2DI 3 "register_operand" "0,0")
703 (match_operand:SI 2 "immediate_operand" "i,i")))]
706 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
707 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
708 switch (which_alternative)
711 return "ins\\t%0.d[%p2], %1";
713 return "ins\\t%0.d[%p2], %1.d[0]";
718 [(set_attr "type" "neon_from_gp, neon_ins_q")]
721 (define_expand "vec_setv2di"
722 [(match_operand:V2DI 0 "register_operand")
723 (match_operand:DI 1 "register_operand")
724 (match_operand:SI 2 "immediate_operand")]
727 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
728 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
729 GEN_INT (elem), operands[0]));
734 (define_insn "aarch64_simd_vec_set<mode>"
735 [(set (match_operand:VDQF 0 "register_operand" "=w")
738 (match_operand:<VEL> 1 "register_operand" "w"))
739 (match_operand:VDQF 3 "register_operand" "0")
740 (match_operand:SI 2 "immediate_operand" "i")))]
743 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
745 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
746 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
748 [(set_attr "type" "neon_ins<q>")]
751 (define_expand "vec_set<mode>"
752 [(match_operand:VDQF 0 "register_operand" "+w")
753 (match_operand:<VEL> 1 "register_operand" "w")
754 (match_operand:SI 2 "immediate_operand" "")]
757 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
758 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
759 GEN_INT (elem), operands[0]));
765 (define_insn "aarch64_mla<mode>"
766 [(set (match_operand:VQ_S 0 "register_operand" "=w")
767 (plus:VQ_S (mult:VQ_S (match_operand:VQ_S 2 "register_operand" "w")
768 (match_operand:VQ_S 3 "register_operand" "w"))
769 (match_operand:VQ_S 1 "register_operand" "0")))]
771 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
772 [(set_attr "type" "neon_mla_<Vetype><q>")]
775 (define_insn "*aarch64_mla_elt<mode>"
776 [(set (match_operand:VDQHS 0 "register_operand" "=w")
781 (match_operand:VDQHS 1 "register_operand" "<h_con>")
782 (parallel [(match_operand:SI 2 "immediate_operand")])))
783 (match_operand:VDQHS 3 "register_operand" "w"))
784 (match_operand:VDQHS 4 "register_operand" "0")))]
786 "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
787 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
790 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
791 [(set (match_operand:VDQHS 0 "register_operand" "=w")
796 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
797 (parallel [(match_operand:SI 2 "immediate_operand")])))
798 (match_operand:VDQHS 3 "register_operand" "w"))
799 (match_operand:VDQHS 4 "register_operand" "0")))]
801 "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
802 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
805 (define_insn "aarch64_mls<mode>"
806 [(set (match_operand:VQ_S 0 "register_operand" "=w")
807 (minus:VQ_S (match_operand:VQ_S 1 "register_operand" "0")
808 (mult:VQ_S (match_operand:VQ_S 2 "register_operand" "w")
809 (match_operand:VQ_S 3 "register_operand" "w"))))]
811 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
812 [(set_attr "type" "neon_mla_<Vetype><q>")]
815 (define_insn "*aarch64_mls_elt<mode>"
816 [(set (match_operand:VDQHS 0 "register_operand" "=w")
818 (match_operand:VDQHS 4 "register_operand" "0")
822 (match_operand:VDQHS 1 "register_operand" "<h_con>")
823 (parallel [(match_operand:SI 2 "immediate_operand")])))
824 (match_operand:VDQHS 3 "register_operand" "w"))))]
826 "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
827 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
830 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
831 [(set (match_operand:VDQHS 0 "register_operand" "=w")
833 (match_operand:VDQHS 4 "register_operand" "0")
837 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
838 (parallel [(match_operand:SI 2 "immediate_operand")])))
839 (match_operand:VDQHS 3 "register_operand" "w"))))]
841 "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
842 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
845 ;; Max/Min operations.
846 (define_insn "<su><maxmin><mode>3"
847 [(set (match_operand:VQ_S 0 "register_operand" "=w")
848 (MAXMIN:VQ_S (match_operand:VQ_S 1 "register_operand" "w")
849 (match_operand:VQ_S 2 "register_operand" "w")))]
851 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
852 [(set_attr "type" "neon_minmax<q>")]
855 ;; Move into low-half clearing high half to 0.
857 (define_insn "move_lo_quad_<mode>"
858 [(set (match_operand:VQ 0 "register_operand" "=w,w,w")
860 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
861 (vec_duplicate:<VHALF> (const_int 0))))]
867 [(set_attr "type" "neon_dup<q>,fmov,neon_dup<q>")
868 (set_attr "simd" "yes,*,yes")
869 (set_attr "fp" "*,yes,*")
870 (set_attr "length" "4")]
873 ;; Move into high-half.
875 (define_insn "aarch64_simd_move_hi_quad_<mode>"
876 [(set (match_operand:VQ 0 "register_operand" "+w,w")
880 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
881 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
884 ins\\t%0.d[1], %1.d[0]
886 [(set_attr "type" "neon_ins")
887 (set_attr "length" "4")]
890 (define_expand "move_hi_quad_<mode>"
891 [(match_operand:VQ 0 "register_operand" "")
892 (match_operand:<VHALF> 1 "register_operand" "")]
895 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
896 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
901 ;; Narrowing operations.
904 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
905 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
906 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
908 "xtn\\t%0.<Vntype>, %1.<Vtype>"
909 [(set_attr "type" "neon_shift_imm_narrow_q")]
912 (define_expand "vec_pack_trunc_<mode>"
913 [(match_operand:<VNARROWD> 0 "register_operand" "")
914 (match_operand:VDN 1 "register_operand" "")
915 (match_operand:VDN 2 "register_operand" "")]
918 rtx tempreg = gen_reg_rtx (<VDBL>mode);
919 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
920 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
922 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
923 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
924 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
930 (define_insn "vec_pack_trunc_<mode>"
931 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "+&w")
932 (vec_concat:<VNARROWQ2>
933 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
934 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
937 if (BYTES_BIG_ENDIAN)
938 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
940 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
942 [(set_attr "type" "multiple")
943 (set_attr "length" "8")]
946 ;; Widening operations.
948 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
949 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
950 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
951 (match_operand:VQW 1 "register_operand" "w")
952 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
955 "<su>shll %0.<Vwtype>, %1.<Vhalftype>, 0"
956 [(set_attr "type" "neon_shift_imm_long")]
959 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
960 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
961 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
962 (match_operand:VQW 1 "register_operand" "w")
963 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
966 "<su>shll2 %0.<Vwtype>, %1.<Vtype>, 0"
967 [(set_attr "type" "neon_shift_imm_long")]
970 (define_expand "vec_unpack<su>_hi_<mode>"
971 [(match_operand:<VWIDE> 0 "register_operand" "")
972 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
975 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
976 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
982 (define_expand "vec_unpack<su>_lo_<mode>"
983 [(match_operand:<VWIDE> 0 "register_operand" "")
984 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
987 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
988 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
994 ;; Widening arithmetic.
996 (define_insn "*aarch64_<su>mlal_lo<mode>"
997 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1000 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1001 (match_operand:VQW 2 "register_operand" "w")
1002 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1003 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1004 (match_operand:VQW 4 "register_operand" "w")
1006 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1008 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1009 [(set_attr "type" "neon_mla_<Vetype>_long")]
1012 (define_insn "*aarch64_<su>mlal_hi<mode>"
1013 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1016 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1017 (match_operand:VQW 2 "register_operand" "w")
1018 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1019 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1020 (match_operand:VQW 4 "register_operand" "w")
1022 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1024 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1025 [(set_attr "type" "neon_mla_<Vetype>_long")]
1028 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1029 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1031 (match_operand:<VWIDE> 1 "register_operand" "0")
1033 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1034 (match_operand:VQW 2 "register_operand" "w")
1035 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1036 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1037 (match_operand:VQW 4 "register_operand" "w")
1040 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1041 [(set_attr "type" "neon_mla_<Vetype>_long")]
1044 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1045 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1047 (match_operand:<VWIDE> 1 "register_operand" "0")
1049 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1050 (match_operand:VQW 2 "register_operand" "w")
1051 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1052 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1053 (match_operand:VQW 4 "register_operand" "w")
1056 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1057 [(set_attr "type" "neon_mla_<Vetype>_long")]
1060 (define_insn "*aarch64_<su>mlal<mode>"
1061 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1065 (match_operand:VDW 1 "register_operand" "w"))
1067 (match_operand:VDW 2 "register_operand" "w")))
1068 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1070 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1071 [(set_attr "type" "neon_mla_<Vetype>_long")]
1074 (define_insn "*aarch64_<su>mlsl<mode>"
1075 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1077 (match_operand:<VWIDE> 1 "register_operand" "0")
1080 (match_operand:VDW 2 "register_operand" "w"))
1082 (match_operand:VDW 3 "register_operand" "w")))))]
1084 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1085 [(set_attr "type" "neon_mla_<Vetype>_long")]
1088 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1089 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1090 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1091 (match_operand:VQW 1 "register_operand" "w")
1092 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1093 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1094 (match_operand:VQW 2 "register_operand" "w")
1097 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1098 [(set_attr "type" "neon_mul_<Vetype>_long")]
1101 (define_expand "vec_widen_<su>mult_lo_<mode>"
1102 [(match_operand:<VWIDE> 0 "register_operand" "")
1103 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1104 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1107 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1108 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1115 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1116 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1117 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1118 (match_operand:VQW 1 "register_operand" "w")
1119 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1120 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1121 (match_operand:VQW 2 "register_operand" "w")
1124 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1125 [(set_attr "type" "neon_mul_<Vetype>_long")]
1128 (define_expand "vec_widen_<su>mult_hi_<mode>"
1129 [(match_operand:<VWIDE> 0 "register_operand" "")
1130 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1131 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1134 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1135 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1143 ;; FP vector operations.
1144 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1145 ;; double-precision (64-bit) floating-point data types and arithmetic as
1146 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1147 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1149 ;; Floating-point operations can raise an exception. Vectorizing such
1150 ;; operations are safe because of reasons explained below.
1152 ;; ARMv8 permits an extension to enable trapped floating-point
1153 ;; exception handling, however this is an optional feature. In the
1154 ;; event of a floating-point exception being raised by vectorised
1156 ;; 1. If trapped floating-point exceptions are available, then a trap
1157 ;; will be taken when any lane raises an enabled exception. A trap
1158 ;; handler may determine which lane raised the exception.
1159 ;; 2. Alternatively a sticky exception flag is set in the
1160 ;; floating-point status register (FPSR). Software may explicitly
1161 ;; test the exception flags, in which case the tests will either
1162 ;; prevent vectorisation, allowing precise identification of the
1163 ;; failing operation, or if tested outside of vectorisable regions
1164 ;; then the specific operation and lane are not of interest.
1166 ;; FP arithmetic operations.
1168 (define_insn "add<mode>3"
1169 [(set (match_operand:VDQF 0 "register_operand" "=w")
1170 (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1171 (match_operand:VDQF 2 "register_operand" "w")))]
1173 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1174 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1177 (define_insn "sub<mode>3"
1178 [(set (match_operand:VDQF 0 "register_operand" "=w")
1179 (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1180 (match_operand:VDQF 2 "register_operand" "w")))]
1182 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1183 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1186 (define_insn "mul<mode>3"
1187 [(set (match_operand:VDQF 0 "register_operand" "=w")
1188 (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
1189 (match_operand:VDQF 2 "register_operand" "w")))]
1191 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1192 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
1195 (define_insn "div<mode>3"
1196 [(set (match_operand:VDQF 0 "register_operand" "=w")
1197 (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
1198 (match_operand:VDQF 2 "register_operand" "w")))]
1200 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1201 [(set_attr "type" "neon_fp_div_<Vetype><q>")]
1204 (define_insn "neg<mode>2"
1205 [(set (match_operand:VDQF 0 "register_operand" "=w")
1206 (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1208 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1209 [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
1212 (define_insn "abs<mode>2"
1213 [(set (match_operand:VDQF 0 "register_operand" "=w")
1214 (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1216 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1217 [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
1220 (define_insn "fma<mode>4"
1221 [(set (match_operand:VDQF 0 "register_operand" "=w")
1222 (fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
1223 (match_operand:VDQF 2 "register_operand" "w")
1224 (match_operand:VDQF 3 "register_operand" "0")))]
1226 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1227 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1230 (define_insn "*aarch64_fma4_elt<mode>"
1231 [(set (match_operand:VDQF 0 "register_operand" "=w")
1235 (match_operand:VDQF 1 "register_operand" "<h_con>")
1236 (parallel [(match_operand:SI 2 "immediate_operand")])))
1237 (match_operand:VDQF 3 "register_operand" "w")
1238 (match_operand:VDQF 4 "register_operand" "0")))]
1240 "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1241 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1244 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1245 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1247 (vec_duplicate:VDQSF
1249 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1250 (parallel [(match_operand:SI 2 "immediate_operand")])))
1251 (match_operand:VDQSF 3 "register_operand" "w")
1252 (match_operand:VDQSF 4 "register_operand" "0")))]
1254 "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1255 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1258 (define_insn "*aarch64_fma4_elt_to_128df"
1259 [(set (match_operand:V2DF 0 "register_operand" "=w")
1262 (match_operand:DF 1 "register_operand" "w"))
1263 (match_operand:V2DF 2 "register_operand" "w")
1264 (match_operand:V2DF 3 "register_operand" "0")))]
1266 "fmla\\t%0.2d, %2.2d, %1.2d[0]"
1267 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1270 (define_insn "*aarch64_fma4_elt_to_64v2df"
1271 [(set (match_operand:DF 0 "register_operand" "=w")
1274 (match_operand:V2DF 1 "register_operand" "w")
1275 (parallel [(match_operand:SI 2 "immediate_operand")]))
1276 (match_operand:DF 3 "register_operand" "w")
1277 (match_operand:DF 4 "register_operand" "0")))]
1279 "fmla\\t%0.2d, %3.2d, %1.2d[%2]"
1280 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1283 (define_insn "fnma<mode>4"
1284 [(set (match_operand:VDQF 0 "register_operand" "=w")
1286 (match_operand:VDQF 1 "register_operand" "w")
1288 (match_operand:VDQF 2 "register_operand" "w"))
1289 (match_operand:VDQF 3 "register_operand" "0")))]
1291 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1292 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1295 (define_insn "*aarch64_fnma4_elt<mode>"
1296 [(set (match_operand:VDQF 0 "register_operand" "=w")
1299 (match_operand:VDQF 3 "register_operand" "w"))
1302 (match_operand:VDQF 1 "register_operand" "<h_con>")
1303 (parallel [(match_operand:SI 2 "immediate_operand")])))
1304 (match_operand:VDQF 4 "register_operand" "0")))]
1306 "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1307 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1310 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1311 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1314 (match_operand:VDQSF 3 "register_operand" "w"))
1315 (vec_duplicate:VDQSF
1317 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1318 (parallel [(match_operand:SI 2 "immediate_operand")])))
1319 (match_operand:VDQSF 4 "register_operand" "0")))]
1321 "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"
1322 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1325 (define_insn "*aarch64_fnma4_elt_to_128df"
1326 [(set (match_operand:V2DF 0 "register_operand" "=w")
1329 (match_operand:V2DF 2 "register_operand" "w"))
1331 (match_operand:DF 1 "register_operand" "w"))
1332 (match_operand:V2DF 3 "register_operand" "0")))]
1334 "fmls\\t%0.2d, %2.2d, %1.2d[0]"
1335 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1338 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1339 [(set (match_operand:DF 0 "register_operand" "=w")
1342 (match_operand:V2DF 1 "register_operand" "w")
1343 (parallel [(match_operand:SI 2 "immediate_operand")]))
1345 (match_operand:DF 3 "register_operand" "w"))
1346 (match_operand:DF 4 "register_operand" "0")))]
1348 "fmls\\t%0.2d, %3.2d, %1.2d[%2]"
1349 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1352 ;; Vector versions of the floating-point frint patterns.
1353 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round.
1354 (define_insn "<frint_pattern><mode>2"
1355 [(set (match_operand:VDQF 0 "register_operand" "=w")
1356 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1359 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1360 [(set_attr "type" "neon_fp_round_<Vetype><q>")]
1363 ;; Vector versions of the fcvt standard patterns.
1364 ;; Expands to lbtrunc, lround, lceil, lfloor
1365 (define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
1366 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1367 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1368 [(match_operand:VDQF 1 "register_operand" "w")]
1371 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1372 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1375 (define_expand "<optab><VDQF:mode><fcvt_target>2"
1376 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1377 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1378 [(match_operand:VDQF 1 "register_operand")]
1383 (define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
1384 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1385 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1386 [(match_operand:VDQF 1 "register_operand")]
1391 (define_expand "ftrunc<VDQF:mode>2"
1392 [(set (match_operand:VDQF 0 "register_operand")
1393 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1398 (define_insn "<optab><fcvt_target><VDQF:mode>2"
1399 [(set (match_operand:VDQF 0 "register_operand" "=w")
1401 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1403 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1404 [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
1407 ;; Conversions between vectors of floats and doubles.
1408 ;; Contains a mix of patterns to match standard pattern names
1409 ;; and those for intrinsics.
1411 ;; Float widening operations.
1413 (define_insn "vec_unpacks_lo_v4sf"
1414 [(set (match_operand:V2DF 0 "register_operand" "=w")
1417 (match_operand:V4SF 1 "register_operand" "w")
1418 (parallel [(const_int 0) (const_int 1)])
1421 "fcvtl\\t%0.2d, %1.2s"
1422 [(set_attr "type" "neon_fp_cvt_widen_s")]
1425 (define_insn "aarch64_float_extend_lo_v2df"
1426 [(set (match_operand:V2DF 0 "register_operand" "=w")
1428 (match_operand:V2SF 1 "register_operand" "w")))]
1430 "fcvtl\\t%0.2d, %1.2s"
1431 [(set_attr "type" "neon_fp_cvt_widen_s")]
1434 (define_insn "vec_unpacks_hi_v4sf"
1435 [(set (match_operand:V2DF 0 "register_operand" "=w")
1438 (match_operand:V4SF 1 "register_operand" "w")
1439 (parallel [(const_int 2) (const_int 3)])
1442 "fcvtl2\\t%0.2d, %1.4s"
1443 [(set_attr "type" "neon_fp_cvt_widen_s")]
1446 ;; Float narrowing operations.
1448 (define_insn "aarch64_float_truncate_lo_v2sf"
1449 [(set (match_operand:V2SF 0 "register_operand" "=w")
1450 (float_truncate:V2SF
1451 (match_operand:V2DF 1 "register_operand" "w")))]
1453 "fcvtn\\t%0.2s, %1.2d"
1454 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1457 (define_insn "aarch64_float_truncate_hi_v4sf"
1458 [(set (match_operand:V4SF 0 "register_operand" "=w")
1460 (match_operand:V2SF 1 "register_operand" "0")
1461 (float_truncate:V2SF
1462 (match_operand:V2DF 2 "register_operand" "w"))))]
1464 "fcvtn2\\t%0.4s, %2.2d"
1465 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1468 (define_expand "vec_pack_trunc_v2df"
1469 [(set (match_operand:V4SF 0 "register_operand")
1471 (float_truncate:V2SF
1472 (match_operand:V2DF 1 "register_operand"))
1473 (float_truncate:V2SF
1474 (match_operand:V2DF 2 "register_operand"))
1478 rtx tmp = gen_reg_rtx (V2SFmode);
1479 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1480 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1482 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1483 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1484 tmp, operands[hi]));
1489 (define_expand "vec_pack_trunc_df"
1490 [(set (match_operand:V2SF 0 "register_operand")
1493 (match_operand:DF 1 "register_operand"))
1495 (match_operand:DF 2 "register_operand"))
1499 rtx tmp = gen_reg_rtx (V2SFmode);
1500 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1501 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1503 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
1504 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
1505 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
1510 (define_insn "aarch64_vmls<mode>"
1511 [(set (match_operand:VDQF 0 "register_operand" "=w")
1512 (minus:VDQF (match_operand:VDQF 1 "register_operand" "0")
1513 (mult:VDQF (match_operand:VDQF 2 "register_operand" "w")
1514 (match_operand:VDQF 3 "register_operand" "w"))))]
1516 "fmls\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1517 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1521 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
1523 ;; a = (b < c) ? b : c;
1524 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
1525 ;; either explicitly or indirectly via -ffast-math.
1527 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
1528 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
1529 ;; operand will be returned when both operands are zero (i.e. they may not
1530 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
1531 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
1534 (define_insn "<su><maxmin><mode>3"
1535 [(set (match_operand:VDQF 0 "register_operand" "=w")
1536 (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
1537 (match_operand:VDQF 2 "register_operand" "w")))]
1539 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1540 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1543 (define_insn "<maxmin_uns><mode>3"
1544 [(set (match_operand:VDQF 0 "register_operand" "=w")
1545 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1546 (match_operand:VDQF 2 "register_operand" "w")]
1549 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1550 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1553 ;; 'across lanes' add.
1555 (define_insn "reduc_<sur>plus_<mode>"
1556 [(set (match_operand:VDQV 0 "register_operand" "=w")
1557 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
1560 "addv\\t%<Vetype>0, %1.<Vtype>"
1561 [(set_attr "type" "neon_reduc_add<q>")]
1564 (define_insn "reduc_<sur>plus_v2di"
1565 [(set (match_operand:V2DI 0 "register_operand" "=w")
1566 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
1570 [(set_attr "type" "neon_reduc_add_q")]
1573 (define_insn "reduc_<sur>plus_v2si"
1574 [(set (match_operand:V2SI 0 "register_operand" "=w")
1575 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1578 "addp\\t%0.2s, %1.2s, %1.2s"
1579 [(set_attr "type" "neon_reduc_add")]
1582 (define_insn "reduc_<sur>plus_<mode>"
1583 [(set (match_operand:V2F 0 "register_operand" "=w")
1584 (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
1587 "faddp\\t%<Vetype>0, %1.<Vtype>"
1588 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
1591 (define_insn "aarch64_addpv4sf"
1592 [(set (match_operand:V4SF 0 "register_operand" "=w")
1593 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
1596 "faddp\\t%0.4s, %1.4s, %1.4s"
1597 [(set_attr "type" "neon_fp_reduc_add_s_q")]
1600 (define_expand "reduc_<sur>plus_v4sf"
1601 [(set (match_operand:V4SF 0 "register_operand")
1602 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
1606 rtx tmp = gen_reg_rtx (V4SFmode);
1607 emit_insn (gen_aarch64_addpv4sf (tmp, operands[1]));
1608 emit_insn (gen_aarch64_addpv4sf (operands[0], tmp));
1612 (define_insn "clz<mode>2"
1613 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1614 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
1616 "clz\\t%0.<Vtype>, %1.<Vtype>"
1617 [(set_attr "type" "neon_cls<q>")]
1620 ;; 'across lanes' max and min ops.
1622 (define_insn "reduc_<maxmin_uns>_<mode>"
1623 [(set (match_operand:VDQV 0 "register_operand" "=w")
1624 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
1627 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
1628 [(set_attr "type" "neon_reduc_minmax<q>")]
1631 (define_insn "reduc_<maxmin_uns>_v2di"
1632 [(set (match_operand:V2DI 0 "register_operand" "=w")
1633 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "w")]
1636 "<maxmin_uns_op>p\\t%d0, %1.2d"
1637 [(set_attr "type" "neon_reduc_minmax_q")]
1640 (define_insn "reduc_<maxmin_uns>_v2si"
1641 [(set (match_operand:V2SI 0 "register_operand" "=w")
1642 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1645 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
1646 [(set_attr "type" "neon_reduc_minmax")]
1649 (define_insn "reduc_<maxmin_uns>_<mode>"
1650 [(set (match_operand:V2F 0 "register_operand" "=w")
1651 (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
1654 "<maxmin_uns_op>p\\t%<Vetype>0, %1.<Vtype>"
1655 [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
1658 (define_insn "reduc_<maxmin_uns>_v4sf"
1659 [(set (match_operand:V4SF 0 "register_operand" "=w")
1660 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
1663 "<maxmin_uns_op>v\\t%s0, %1.4s"
1664 [(set_attr "type" "neon_fp_reduc_minmax_s_q")]
1667 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
1669 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
1672 ;; Thus our BSL is of the form:
1673 ;; op0 = bsl (mask, op2, op3)
1674 ;; We can use any of:
1677 ;; bsl mask, op1, op2
1678 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
1679 ;; bit op0, op2, mask
1680 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
1681 ;; bif op0, op1, mask
1683 (define_insn "aarch64_simd_bsl<mode>_internal"
1684 [(set (match_operand:VALL 0 "register_operand" "=w,w,w")
1687 (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w")
1688 (match_operand:VALL 2 "register_operand" " w,w,0"))
1691 (match_dup:<V_cmp_result> 1))
1692 (match_operand:VALL 3 "register_operand" " w,0,w"))
1696 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
1697 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
1698 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
1699 [(set_attr "type" "neon_bsl<q>")]
1702 (define_expand "aarch64_simd_bsl<mode>"
1703 [(match_operand:VALL 0 "register_operand")
1704 (match_operand:<V_cmp_result> 1 "register_operand")
1705 (match_operand:VALL 2 "register_operand")
1706 (match_operand:VALL 3 "register_operand")]
1709 /* We can't alias operands together if they have different modes. */
1710 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
1711 emit_insn (gen_aarch64_simd_bsl<mode>_internal (operands[0], operands[1],
1712 operands[2], operands[3]));
1716 (define_expand "aarch64_vcond_internal<mode><mode>"
1717 [(set (match_operand:VDQ 0 "register_operand")
1719 (match_operator 3 "comparison_operator"
1720 [(match_operand:VDQ 4 "register_operand")
1721 (match_operand:VDQ 5 "nonmemory_operand")])
1722 (match_operand:VDQ 1 "nonmemory_operand")
1723 (match_operand:VDQ 2 "nonmemory_operand")))]
1726 int inverse = 0, has_zero_imm_form = 0;
1727 rtx op1 = operands[1];
1728 rtx op2 = operands[2];
1729 rtx mask = gen_reg_rtx (<MODE>mode);
1731 switch (GET_CODE (operands[3]))
1741 has_zero_imm_form = 1;
1751 if (!REG_P (operands[5])
1752 && (operands[5] != CONST0_RTX (<MODE>mode) || !has_zero_imm_form))
1753 operands[5] = force_reg (<MODE>mode, operands[5]);
1755 switch (GET_CODE (operands[3]))
1759 emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
1764 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
1769 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
1774 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
1779 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
1792 /* If we have (a = (b CMP c) ? -1 : 0);
1793 Then we can simply move the generated mask. */
1795 if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
1796 && op2 == CONST0_RTX (<V_cmp_result>mode))
1797 emit_move_insn (operands[0], mask);
1801 op1 = force_reg (<MODE>mode, op1);
1803 op2 = force_reg (<MODE>mode, op2);
1804 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
1811 (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
1812 [(set (match_operand:VDQF_COND 0 "register_operand")
1814 (match_operator 3 "comparison_operator"
1815 [(match_operand:VDQF 4 "register_operand")
1816 (match_operand:VDQF 5 "nonmemory_operand")])
1817 (match_operand:VDQF_COND 1 "nonmemory_operand")
1818 (match_operand:VDQF_COND 2 "nonmemory_operand")))]
1822 int use_zero_form = 0;
1823 int swap_bsl_operands = 0;
1824 rtx op1 = operands[1];
1825 rtx op2 = operands[2];
1826 rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
1827 rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
1829 rtx (*base_comparison) (rtx, rtx, rtx);
1830 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1832 switch (GET_CODE (operands[3]))
1839 if (operands[5] == CONST0_RTX (<MODE>mode))
1846 if (!REG_P (operands[5]))
1847 operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
1850 switch (GET_CODE (operands[3]))
1860 base_comparison = gen_aarch64_cmge<VDQF:mode>;
1861 complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
1869 base_comparison = gen_aarch64_cmgt<VDQF:mode>;
1870 complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
1875 base_comparison = gen_aarch64_cmeq<VDQF:mode>;
1876 complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
1882 switch (GET_CODE (operands[3]))
1889 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
1890 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1896 Note that there also exist direct comparison against 0 forms,
1897 so catch those as a special case. */
1901 switch (GET_CODE (operands[3]))
1904 base_comparison = gen_aarch64_cmlt<VDQF:mode>;
1907 base_comparison = gen_aarch64_cmle<VDQF:mode>;
1910 /* Do nothing, other zero form cases already have the correct
1917 emit_insn (base_comparison (mask, operands[4], operands[5]));
1919 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1926 /* FCM returns false for lanes which are unordered, so if we use
1927 the inverse of the comparison we actually want to emit, then
1928 swap the operands to BSL, we will end up with the correct result.
1929 Note that a NE NaN and NaN NE b are true for all a, b.
1931 Our transformations are:
1936 a NE b -> !(a EQ b) */
1939 emit_insn (base_comparison (mask, operands[4], operands[5]));
1941 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1943 swap_bsl_operands = 1;
1946 /* We check (a > b || b > a). combining these comparisons give us
1947 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1948 will then give us (a == b || a UNORDERED b) as intended. */
1950 emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
1951 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
1952 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
1953 swap_bsl_operands = 1;
1956 /* Operands are ORDERED iff (a > b || b >= a).
1957 Swapping the operands to BSL will give the UNORDERED case. */
1958 swap_bsl_operands = 1;
1961 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
1962 emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
1963 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
1969 if (swap_bsl_operands)
1975 /* If we have (a = (b CMP c) ? -1 : 0);
1976 Then we can simply move the generated mask. */
1978 if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
1979 && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
1980 emit_move_insn (operands[0], mask);
1984 op1 = force_reg (<VDQF_COND:MODE>mode, op1);
1986 op2 = force_reg (<VDQF_COND:MODE>mode, op2);
1987 emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
1994 (define_expand "vcond<mode><mode>"
1995 [(set (match_operand:VALL 0 "register_operand")
1997 (match_operator 3 "comparison_operator"
1998 [(match_operand:VALL 4 "register_operand")
1999 (match_operand:VALL 5 "nonmemory_operand")])
2000 (match_operand:VALL 1 "nonmemory_operand")
2001 (match_operand:VALL 2 "nonmemory_operand")))]
2004 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2005 operands[2], operands[3],
2006 operands[4], operands[5]));
2010 (define_expand "vcond<v_cmp_result><mode>"
2011 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2012 (if_then_else:<V_cmp_result>
2013 (match_operator 3 "comparison_operator"
2014 [(match_operand:VDQF 4 "register_operand")
2015 (match_operand:VDQF 5 "nonmemory_operand")])
2016 (match_operand:<V_cmp_result> 1 "nonmemory_operand")
2017 (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
2020 emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
2021 operands[0], operands[1],
2022 operands[2], operands[3],
2023 operands[4], operands[5]));
2027 (define_expand "vcondu<mode><mode>"
2028 [(set (match_operand:VDQ 0 "register_operand")
2030 (match_operator 3 "comparison_operator"
2031 [(match_operand:VDQ 4 "register_operand")
2032 (match_operand:VDQ 5 "nonmemory_operand")])
2033 (match_operand:VDQ 1 "nonmemory_operand")
2034 (match_operand:VDQ 2 "nonmemory_operand")))]
2037 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2038 operands[2], operands[3],
2039 operands[4], operands[5]));
2043 ;; Patterns for AArch64 SIMD Intrinsics.
2045 (define_expand "aarch64_create<mode>"
2046 [(match_operand:VD_RE 0 "register_operand" "")
2047 (match_operand:DI 1 "general_operand" "")]
2050 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2051 emit_move_insn (operands[0], src);
2055 ;; Lane extraction with sign extension to general purpose register.
2056 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2057 [(set (match_operand:GPI 0 "register_operand" "=r")
2060 (match_operand:VDQQH 1 "register_operand" "w")
2061 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2064 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2065 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2067 [(set_attr "type" "neon_to_gp<q>")]
2070 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2071 [(set (match_operand:SI 0 "register_operand" "=r")
2074 (match_operand:VDQQH 1 "register_operand" "w")
2075 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2078 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2079 return "umov\\t%w0, %1.<Vetype>[%2]";
2081 [(set_attr "type" "neon_to_gp<q>")]
2084 ;; Lane extraction of a value, neither sign nor zero extension
2085 ;; is guaranteed so upper bits should be considered undefined.
2086 (define_insn "aarch64_get_lane<mode>"
2087 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2089 (match_operand:VALL 1 "register_operand" "w, w, w")
2090 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2093 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2094 switch (which_alternative)
2097 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2099 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2101 return "st1\\t{%1.<Vetype>}[%2], %0";
2106 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2109 (define_expand "aarch64_get_lanedi"
2110 [(match_operand:DI 0 "register_operand")
2111 (match_operand:DI 1 "register_operand")
2112 (match_operand:SI 2 "immediate_operand")]
2115 aarch64_simd_lane_bounds (operands[2], 0, 1);
2116 emit_move_insn (operands[0], operands[1]);
2120 (define_expand "aarch64_reinterpretv8qi<mode>"
2121 [(match_operand:V8QI 0 "register_operand" "")
2122 (match_operand:VDC 1 "register_operand" "")]
2125 aarch64_simd_reinterpret (operands[0], operands[1]);
2129 (define_expand "aarch64_reinterpretv4hi<mode>"
2130 [(match_operand:V4HI 0 "register_operand" "")
2131 (match_operand:VDC 1 "register_operand" "")]
2134 aarch64_simd_reinterpret (operands[0], operands[1]);
2138 (define_expand "aarch64_reinterpretv2si<mode>"
2139 [(match_operand:V2SI 0 "register_operand" "")
2140 (match_operand:VDC 1 "register_operand" "")]
2143 aarch64_simd_reinterpret (operands[0], operands[1]);
2147 (define_expand "aarch64_reinterpretv2sf<mode>"
2148 [(match_operand:V2SF 0 "register_operand" "")
2149 (match_operand:VDC 1 "register_operand" "")]
2152 aarch64_simd_reinterpret (operands[0], operands[1]);
2156 (define_expand "aarch64_reinterpretdi<mode>"
2157 [(match_operand:DI 0 "register_operand" "")
2158 (match_operand:VD_RE 1 "register_operand" "")]
2161 aarch64_simd_reinterpret (operands[0], operands[1]);
2165 (define_expand "aarch64_reinterpretv16qi<mode>"
2166 [(match_operand:V16QI 0 "register_operand" "")
2167 (match_operand:VQ 1 "register_operand" "")]
2170 aarch64_simd_reinterpret (operands[0], operands[1]);
2174 (define_expand "aarch64_reinterpretv8hi<mode>"
2175 [(match_operand:V8HI 0 "register_operand" "")
2176 (match_operand:VQ 1 "register_operand" "")]
2179 aarch64_simd_reinterpret (operands[0], operands[1]);
2183 (define_expand "aarch64_reinterpretv4si<mode>"
2184 [(match_operand:V4SI 0 "register_operand" "")
2185 (match_operand:VQ 1 "register_operand" "")]
2188 aarch64_simd_reinterpret (operands[0], operands[1]);
2192 (define_expand "aarch64_reinterpretv4sf<mode>"
2193 [(match_operand:V4SF 0 "register_operand" "")
2194 (match_operand:VQ 1 "register_operand" "")]
2197 aarch64_simd_reinterpret (operands[0], operands[1]);
2201 (define_expand "aarch64_reinterpretv2di<mode>"
2202 [(match_operand:V2DI 0 "register_operand" "")
2203 (match_operand:VQ 1 "register_operand" "")]
2206 aarch64_simd_reinterpret (operands[0], operands[1]);
2210 (define_expand "aarch64_reinterpretv2df<mode>"
2211 [(match_operand:V2DF 0 "register_operand" "")
2212 (match_operand:VQ 1 "register_operand" "")]
2215 aarch64_simd_reinterpret (operands[0], operands[1]);
2219 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2222 (define_insn "*aarch64_combinez<mode>"
2223 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2225 (match_operand:VDIC 1 "register_operand" "w")
2226 (match_operand:VDIC 2 "aarch64_simd_imm_zero" "Dz")))]
2228 "mov\\t%0.8b, %1.8b"
2229 [(set_attr "type" "neon_move<q>")]
2232 (define_insn_and_split "aarch64_combine<mode>"
2233 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2234 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2235 (match_operand:VDC 2 "register_operand" "w")))]
2238 "&& reload_completed"
2241 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2244 [(set_attr "type" "multiple")]
2247 (define_expand "aarch64_simd_combine<mode>"
2248 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2249 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2250 (match_operand:VDC 2 "register_operand" "w")))]
2253 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2254 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2257 [(set_attr "type" "multiple")]
2260 ;; <su><addsub>l<q>.
2262 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2263 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2264 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2265 (match_operand:VQW 1 "register_operand" "w")
2266 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2267 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2268 (match_operand:VQW 2 "register_operand" "w")
2271 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2272 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2275 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2276 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2277 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2278 (match_operand:VQW 1 "register_operand" "w")
2279 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2280 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2281 (match_operand:VQW 2 "register_operand" "w")
2284 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2285 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2289 (define_expand "aarch64_saddl2<mode>"
2290 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2291 (match_operand:VQW 1 "register_operand" "w")
2292 (match_operand:VQW 2 "register_operand" "w")]
2295 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2296 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2301 (define_expand "aarch64_uaddl2<mode>"
2302 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2303 (match_operand:VQW 1 "register_operand" "w")
2304 (match_operand:VQW 2 "register_operand" "w")]
2307 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2308 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2313 (define_expand "aarch64_ssubl2<mode>"
2314 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2315 (match_operand:VQW 1 "register_operand" "w")
2316 (match_operand:VQW 2 "register_operand" "w")]
2319 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2320 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2325 (define_expand "aarch64_usubl2<mode>"
2326 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2327 (match_operand:VQW 1 "register_operand" "w")
2328 (match_operand:VQW 2 "register_operand" "w")]
2331 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2332 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2337 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2338 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2339 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2340 (match_operand:VDW 1 "register_operand" "w"))
2342 (match_operand:VDW 2 "register_operand" "w"))))]
2344 "<ANY_EXTEND:su><ADDSUB:optab>l %0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2345 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2348 ;; <su><addsub>w<q>.
2350 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2351 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2352 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2354 (match_operand:VDW 2 "register_operand" "w"))))]
2356 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2357 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2360 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2361 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2362 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2365 (match_operand:VQW 2 "register_operand" "w")
2366 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2368 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2369 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2372 (define_expand "aarch64_saddw2<mode>"
2373 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2374 (match_operand:<VWIDE> 1 "register_operand" "w")
2375 (match_operand:VQW 2 "register_operand" "w")]
2378 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2379 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2384 (define_expand "aarch64_uaddw2<mode>"
2385 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2386 (match_operand:<VWIDE> 1 "register_operand" "w")
2387 (match_operand:VQW 2 "register_operand" "w")]
2390 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2391 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
2397 (define_expand "aarch64_ssubw2<mode>"
2398 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2399 (match_operand:<VWIDE> 1 "register_operand" "w")
2400 (match_operand:VQW 2 "register_operand" "w")]
2403 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2404 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
2409 (define_expand "aarch64_usubw2<mode>"
2410 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2411 (match_operand:<VWIDE> 1 "register_operand" "w")
2412 (match_operand:VQW 2 "register_operand" "w")]
2415 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2416 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
2421 ;; <su><r>h<addsub>.
2423 (define_insn "aarch64_<sur>h<addsub><mode>"
2424 [(set (match_operand:VQ_S 0 "register_operand" "=w")
2425 (unspec:VQ_S [(match_operand:VQ_S 1 "register_operand" "w")
2426 (match_operand:VQ_S 2 "register_operand" "w")]
2429 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2430 [(set_attr "type" "neon_<addsub>_halve<q>")]
2433 ;; <r><addsub>hn<q>.
2435 (define_insn "aarch64_<sur><addsub>hn<mode>"
2436 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2437 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
2438 (match_operand:VQN 2 "register_operand" "w")]
2441 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
2442 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2445 (define_insn "aarch64_<sur><addsub>hn2<mode>"
2446 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2447 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
2448 (match_operand:VQN 2 "register_operand" "w")
2449 (match_operand:VQN 3 "register_operand" "w")]
2452 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
2453 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2458 (define_insn "aarch64_pmul<mode>"
2459 [(set (match_operand:VB 0 "register_operand" "=w")
2460 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
2461 (match_operand:VB 2 "register_operand" "w")]
2464 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2465 [(set_attr "type" "neon_mul_<Vetype><q>")]
2470 (define_insn "aarch64_<su_optab><optab><mode>"
2471 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2472 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
2473 (match_operand:VSDQ_I 2 "register_operand" "w")))]
2475 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2476 [(set_attr "type" "neon_<optab><q>")]
2479 ;; suqadd and usqadd
2481 (define_insn "aarch64_<sur>qadd<mode>"
2482 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2483 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
2484 (match_operand:VSDQ_I 2 "register_operand" "w")]
2487 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
2488 [(set_attr "type" "neon_qadd<q>")]
2493 (define_insn "aarch64_sqmovun<mode>"
2494 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2495 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
2498 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
2499 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
2502 ;; sqmovn and uqmovn
2504 (define_insn "aarch64_<sur>qmovn<mode>"
2505 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2506 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
2509 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
2510 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
2515 (define_insn "aarch64_s<optab><mode>"
2516 [(set (match_operand:VSDQ_I_BHSI 0 "register_operand" "=w")
2518 (match_operand:VSDQ_I_BHSI 1 "register_operand" "w")))]
2520 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
2521 [(set_attr "type" "neon_<optab><q>")]
2526 (define_insn "aarch64_sq<r>dmulh<mode>"
2527 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
2529 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
2530 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
2533 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2534 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
2539 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
2540 [(set (match_operand:VDQHS 0 "register_operand" "=w")
2542 [(match_operand:VDQHS 1 "register_operand" "w")
2544 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
2545 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2549 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCOND>mode));
2550 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
2551 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2554 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
2555 [(set (match_operand:VDQHS 0 "register_operand" "=w")
2557 [(match_operand:VDQHS 1 "register_operand" "w")
2559 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
2560 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2564 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
2565 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
2566 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2569 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
2570 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
2572 [(match_operand:SD_HSI 1 "register_operand" "w")
2574 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
2575 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2579 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode));
2580 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
2581 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2586 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
2587 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2589 (match_operand:<VWIDE> 1 "register_operand" "0")
2592 (sign_extend:<VWIDE>
2593 (match_operand:VSD_HSI 2 "register_operand" "w"))
2594 (sign_extend:<VWIDE>
2595 (match_operand:VSD_HSI 3 "register_operand" "w")))
2598 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
2599 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
2604 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal"
2605 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2607 (match_operand:<VWIDE> 1 "register_operand" "0")
2610 (sign_extend:<VWIDE>
2611 (match_operand:VD_HSI 2 "register_operand" "w"))
2612 (sign_extend:<VWIDE>
2613 (vec_duplicate:VD_HSI
2615 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2616 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2620 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"
2621 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2624 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>_internal"
2625 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2627 (match_operand:<VWIDE> 1 "register_operand" "0")
2630 (sign_extend:<VWIDE>
2631 (match_operand:SD_HSI 2 "register_operand" "w"))
2632 (sign_extend:<VWIDE>
2634 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2635 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2639 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"
2640 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2643 (define_expand "aarch64_sqdmlal_lane<mode>"
2644 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2645 (match_operand:<VWIDE> 1 "register_operand" "0")
2646 (match_operand:VSD_HSI 2 "register_operand" "w")
2647 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2648 (match_operand:SI 4 "immediate_operand" "i")]
2651 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
2652 emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
2653 operands[2], operands[3],
2658 (define_expand "aarch64_sqdmlal_laneq<mode>"
2659 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2660 (match_operand:<VWIDE> 1 "register_operand" "0")
2661 (match_operand:VSD_HSI 2 "register_operand" "w")
2662 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2663 (match_operand:SI 4 "immediate_operand" "i")]
2666 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
2667 emit_insn (gen_aarch64_sqdmlal_lane<mode>_internal (operands[0], operands[1],
2668 operands[2], operands[3],
2673 (define_expand "aarch64_sqdmlsl_lane<mode>"
2674 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2675 (match_operand:<VWIDE> 1 "register_operand" "0")
2676 (match_operand:VSD_HSI 2 "register_operand" "w")
2677 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2678 (match_operand:SI 4 "immediate_operand" "i")]
2681 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
2682 emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
2683 operands[2], operands[3],
2688 (define_expand "aarch64_sqdmlsl_laneq<mode>"
2689 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2690 (match_operand:<VWIDE> 1 "register_operand" "0")
2691 (match_operand:VSD_HSI 2 "register_operand" "w")
2692 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2693 (match_operand:SI 4 "immediate_operand" "i")]
2696 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<VCON>mode));
2697 emit_insn (gen_aarch64_sqdmlsl_lane<mode>_internal (operands[0], operands[1],
2698 operands[2], operands[3],
2705 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
2706 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2708 (match_operand:<VWIDE> 1 "register_operand" "0")
2711 (sign_extend:<VWIDE>
2712 (match_operand:VD_HSI 2 "register_operand" "w"))
2713 (sign_extend:<VWIDE>
2714 (vec_duplicate:VD_HSI
2715 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
2718 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
2719 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2724 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
2725 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2727 (match_operand:<VWIDE> 1 "register_operand" "0")
2730 (sign_extend:<VWIDE>
2732 (match_operand:VQ_HSI 2 "register_operand" "w")
2733 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
2734 (sign_extend:<VWIDE>
2736 (match_operand:VQ_HSI 3 "register_operand" "w")
2740 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
2741 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2744 (define_expand "aarch64_sqdmlal2<mode>"
2745 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2746 (match_operand:<VWIDE> 1 "register_operand" "w")
2747 (match_operand:VQ_HSI 2 "register_operand" "w")
2748 (match_operand:VQ_HSI 3 "register_operand" "w")]
2751 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2752 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
2753 operands[2], operands[3], p));
2757 (define_expand "aarch64_sqdmlsl2<mode>"
2758 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2759 (match_operand:<VWIDE> 1 "register_operand" "w")
2760 (match_operand:VQ_HSI 2 "register_operand" "w")
2761 (match_operand:VQ_HSI 3 "register_operand" "w")]
2764 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2765 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
2766 operands[2], operands[3], p));
2772 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
2773 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2775 (match_operand:<VWIDE> 1 "register_operand" "0")
2778 (sign_extend:<VWIDE>
2780 (match_operand:VQ_HSI 2 "register_operand" "w")
2781 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
2782 (sign_extend:<VWIDE>
2783 (vec_duplicate:<VHALF>
2785 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2786 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
2790 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"
2791 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2794 (define_expand "aarch64_sqdmlal2_lane<mode>"
2795 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2796 (match_operand:<VWIDE> 1 "register_operand" "w")
2797 (match_operand:VQ_HSI 2 "register_operand" "w")
2798 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2799 (match_operand:SI 4 "immediate_operand" "i")]
2802 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2803 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
2804 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
2805 operands[2], operands[3],
2810 (define_expand "aarch64_sqdmlal2_laneq<mode>"
2811 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2812 (match_operand:<VWIDE> 1 "register_operand" "w")
2813 (match_operand:VQ_HSI 2 "register_operand" "w")
2814 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2815 (match_operand:SI 4 "immediate_operand" "i")]
2818 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2819 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
2820 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
2821 operands[2], operands[3],
2826 (define_expand "aarch64_sqdmlsl2_lane<mode>"
2827 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2828 (match_operand:<VWIDE> 1 "register_operand" "w")
2829 (match_operand:VQ_HSI 2 "register_operand" "w")
2830 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2831 (match_operand:SI 4 "immediate_operand" "i")]
2834 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2835 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
2836 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
2837 operands[2], operands[3],
2842 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
2843 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2844 (match_operand:<VWIDE> 1 "register_operand" "w")
2845 (match_operand:VQ_HSI 2 "register_operand" "w")
2846 (match_operand:<VCON> 3 "register_operand" "<vwx>")
2847 (match_operand:SI 4 "immediate_operand" "i")]
2850 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2851 aarch64_simd_lane_bounds (operands[4], 0, GET_MODE_NUNITS (<MODE>mode));
2852 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
2853 operands[2], operands[3],
2858 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
2859 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2861 (match_operand:<VWIDE> 1 "register_operand" "0")
2864 (sign_extend:<VWIDE>
2866 (match_operand:VQ_HSI 2 "register_operand" "w")
2867 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
2868 (sign_extend:<VWIDE>
2869 (vec_duplicate:<VHALF>
2870 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
2873 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
2874 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2877 (define_expand "aarch64_sqdmlal2_n<mode>"
2878 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2879 (match_operand:<VWIDE> 1 "register_operand" "w")
2880 (match_operand:VQ_HSI 2 "register_operand" "w")
2881 (match_operand:<VEL> 3 "register_operand" "w")]
2884 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2885 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
2886 operands[2], operands[3],
2891 (define_expand "aarch64_sqdmlsl2_n<mode>"
2892 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2893 (match_operand:<VWIDE> 1 "register_operand" "w")
2894 (match_operand:VQ_HSI 2 "register_operand" "w")
2895 (match_operand:<VEL> 3 "register_operand" "w")]
2898 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2899 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
2900 operands[2], operands[3],
2907 (define_insn "aarch64_sqdmull<mode>"
2908 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2911 (sign_extend:<VWIDE>
2912 (match_operand:VSD_HSI 1 "register_operand" "w"))
2913 (sign_extend:<VWIDE>
2914 (match_operand:VSD_HSI 2 "register_operand" "w")))
2917 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2918 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
2923 (define_insn "aarch64_sqdmull_lane<mode>_internal"
2924 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2927 (sign_extend:<VWIDE>
2928 (match_operand:VD_HSI 1 "register_operand" "w"))
2929 (sign_extend:<VWIDE>
2930 (vec_duplicate:VD_HSI
2932 (match_operand:<VCON> 2 "register_operand" "<vwx>")
2933 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
2937 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"
2938 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
2941 (define_insn "aarch64_sqdmull_lane<mode>_internal"
2942 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2945 (sign_extend:<VWIDE>
2946 (match_operand:SD_HSI 1 "register_operand" "w"))
2947 (sign_extend:<VWIDE>
2949 (match_operand:<VCON> 2 "register_operand" "<vwx>")
2950 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
2954 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"
2955 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
2958 (define_expand "aarch64_sqdmull_lane<mode>"
2959 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2960 (match_operand:VSD_HSI 1 "register_operand" "w")
2961 (match_operand:<VCON> 2 "register_operand" "<vwx>")
2962 (match_operand:SI 3 "immediate_operand" "i")]
2965 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode) / 2);
2966 emit_insn (gen_aarch64_sqdmull_lane<mode>_internal (operands[0], operands[1],
2967 operands[2], operands[3]));
2971 (define_expand "aarch64_sqdmull_laneq<mode>"
2972 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2973 (match_operand:VD_HSI 1 "register_operand" "w")
2974 (match_operand:<VCON> 2 "register_operand" "<vwx>")
2975 (match_operand:SI 3 "immediate_operand" "i")]
2978 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCON>mode));
2979 emit_insn (gen_aarch64_sqdmull_lane<mode>_internal
2980 (operands[0], operands[1], operands[2], operands[3]));
2986 (define_insn "aarch64_sqdmull_n<mode>"
2987 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2990 (sign_extend:<VWIDE>
2991 (match_operand:VD_HSI 1 "register_operand" "w"))
2992 (sign_extend:<VWIDE>
2993 (vec_duplicate:VD_HSI
2994 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
2998 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
2999 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3006 (define_insn "aarch64_sqdmull2<mode>_internal"
3007 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3010 (sign_extend:<VWIDE>
3012 (match_operand:VQ_HSI 1 "register_operand" "w")
3013 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3014 (sign_extend:<VWIDE>
3016 (match_operand:VQ_HSI 2 "register_operand" "w")
3021 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3022 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3025 (define_expand "aarch64_sqdmull2<mode>"
3026 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3027 (match_operand:VQ_HSI 1 "register_operand" "w")
3028 (match_operand:<VCON> 2 "register_operand" "w")]
3031 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3032 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3039 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3040 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3043 (sign_extend:<VWIDE>
3045 (match_operand:VQ_HSI 1 "register_operand" "w")
3046 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3047 (sign_extend:<VWIDE>
3048 (vec_duplicate:<VHALF>
3050 (match_operand:<VCON> 2 "register_operand" "<vwx>")
3051 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3055 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"
3056 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3059 (define_expand "aarch64_sqdmull2_lane<mode>"
3060 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3061 (match_operand:VQ_HSI 1 "register_operand" "w")
3062 (match_operand:<VCON> 2 "register_operand" "<vwx>")
3063 (match_operand:SI 3 "immediate_operand" "i")]
3066 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3067 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode) / 2);
3068 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3069 operands[2], operands[3],
3074 (define_expand "aarch64_sqdmull2_laneq<mode>"
3075 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3076 (match_operand:VQ_HSI 1 "register_operand" "w")
3077 (match_operand:<VCON> 2 "register_operand" "<vwx>")
3078 (match_operand:SI 3 "immediate_operand" "i")]
3081 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3082 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3083 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3084 operands[2], operands[3],
3091 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3092 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3095 (sign_extend:<VWIDE>
3097 (match_operand:VQ_HSI 1 "register_operand" "w")
3098 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3099 (sign_extend:<VWIDE>
3100 (vec_duplicate:<VHALF>
3101 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3105 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3106 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3109 (define_expand "aarch64_sqdmull2_n<mode>"
3110 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3111 (match_operand:VQ_HSI 1 "register_operand" "w")
3112 (match_operand:<VEL> 2 "register_operand" "w")]
3115 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3116 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3123 (define_insn "aarch64_<sur>shl<mode>"
3124 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3126 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3127 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3130 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3131 [(set_attr "type" "neon_shift_reg<q>")]
3137 (define_insn "aarch64_<sur>q<r>shl<mode>"
3138 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3140 [(match_operand:VSDQ_I 1 "register_operand" "w")
3141 (match_operand:VSDQ_I 2 "register_operand" "w")]
3144 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3145 [(set_attr "type" "neon_sat_shift_reg<q>")]
3150 (define_insn "aarch64_<sur>shll_n<mode>"
3151 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3152 (unspec:<VWIDE> [(match_operand:VDW 1 "register_operand" "w")
3153 (match_operand:SI 2 "immediate_operand" "i")]
3157 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3158 aarch64_simd_const_bounds (operands[2], 0, bit_width + 1);
3159 if (INTVAL (operands[2]) == bit_width)
3161 return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3164 return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3166 [(set_attr "type" "neon_shift_imm_long")]
3171 (define_insn "aarch64_<sur>shll2_n<mode>"
3172 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3173 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
3174 (match_operand:SI 2 "immediate_operand" "i")]
3178 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3179 aarch64_simd_const_bounds (operands[2], 0, bit_width + 1);
3180 if (INTVAL (operands[2]) == bit_width)
3182 return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3185 return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3187 [(set_attr "type" "neon_shift_imm_long")]
3192 (define_insn "aarch64_<sur>shr_n<mode>"
3193 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3194 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3195 (match_operand:SI 2 "immediate_operand" "i")]
3199 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3200 aarch64_simd_const_bounds (operands[2], 1, bit_width + 1);
3201 return \"<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2\";"
3202 [(set_attr "type" "neon_sat_shift_imm<q>")]
3207 (define_insn "aarch64_<sur>sra_n<mode>"
3208 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3209 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3210 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3211 (match_operand:SI 3 "immediate_operand" "i")]
3215 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3216 aarch64_simd_const_bounds (operands[3], 1, bit_width + 1);
3217 return \"<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\";"
3218 [(set_attr "type" "neon_shift_acc<q>")]
3223 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
3224 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3225 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3226 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3227 (match_operand:SI 3 "immediate_operand" "i")]
3231 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3232 aarch64_simd_const_bounds (operands[3], 1 - <VSLRI:offsetlr>,
3233 bit_width - <VSLRI:offsetlr> + 1);
3234 return \"s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3\";"
3235 [(set_attr "type" "neon_shift_imm<q>")]
3240 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
3241 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3242 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
3243 (match_operand:SI 2 "immediate_operand" "i")]
3247 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3248 aarch64_simd_const_bounds (operands[2], 0, bit_width);
3249 return \"<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2\";"
3250 [(set_attr "type" "neon_sat_shift_imm<q>")]
3256 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
3257 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3258 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
3259 (match_operand:SI 2 "immediate_operand" "i")]
3263 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3264 aarch64_simd_const_bounds (operands[2], 1, bit_width + 1);
3265 return \"<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2\";"
3266 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3270 ;; cm(eq|ge|gt|lt|le)
3271 ;; Note, we have constraints for Dz and Z as different expanders
3272 ;; have different ideas of what should be passed to this pattern.
3274 (define_insn "aarch64_cm<optab><mode>"
3275 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
3277 (COMPARISONS:<V_cmp_result>
3278 (match_operand:VDQ 1 "register_operand" "w,w")
3279 (match_operand:VDQ 2 "aarch64_simd_reg_or_zero" "w,ZDz")
3283 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
3284 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
3285 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
3288 (define_insn_and_split "aarch64_cm<optab>di"
3289 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
3292 (match_operand:DI 1 "register_operand" "w,w,r")
3293 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
3295 (clobber (reg:CC CC_REGNUM))]
3298 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
3299 cm<optab>\t%d0, %d1, #0
3302 /* We need to prevent the split from
3303 happening in the 'w' constraint cases. */
3304 && GP_REGNUM_P (REGNO (operands[0]))
3305 && GP_REGNUM_P (REGNO (operands[1]))"
3308 enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
3309 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
3310 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
3311 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3314 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
3319 (define_insn "aarch64_cm<optab><mode>"
3320 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3322 (UCOMPARISONS:<V_cmp_result>
3323 (match_operand:VDQ 1 "register_operand" "w")
3324 (match_operand:VDQ 2 "register_operand" "w")
3327 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
3328 [(set_attr "type" "neon_compare<q>")]
3331 (define_insn_and_split "aarch64_cm<optab>di"
3332 [(set (match_operand:DI 0 "register_operand" "=w,r")
3335 (match_operand:DI 1 "register_operand" "w,r")
3336 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
3338 (clobber (reg:CC CC_REGNUM))]
3341 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
3344 /* We need to prevent the split from
3345 happening in the 'w' constraint cases. */
3346 && GP_REGNUM_P (REGNO (operands[0]))
3347 && GP_REGNUM_P (REGNO (operands[1]))"
3350 enum machine_mode mode = CCmode;
3351 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
3352 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
3353 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3356 [(set_attr "type" "neon_compare, neon_compare_zero")]
3361 (define_insn "aarch64_cmtst<mode>"
3362 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3366 (match_operand:VDQ 1 "register_operand" "w")
3367 (match_operand:VDQ 2 "register_operand" "w"))
3368 (vec_duplicate:<V_cmp_result> (const_int 0)))))]
3370 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3371 [(set_attr "type" "neon_tst<q>")]
3374 (define_insn_and_split "aarch64_cmtstdi"
3375 [(set (match_operand:DI 0 "register_operand" "=w,r")
3379 (match_operand:DI 1 "register_operand" "w,r")
3380 (match_operand:DI 2 "register_operand" "w,r"))
3382 (clobber (reg:CC CC_REGNUM))]
3385 cmtst\t%d0, %d1, %d2
3388 /* We need to prevent the split from
3389 happening in the 'w' constraint cases. */
3390 && GP_REGNUM_P (REGNO (operands[0]))
3391 && GP_REGNUM_P (REGNO (operands[1]))"
3394 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
3395 enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
3396 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
3397 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
3398 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3401 [(set_attr "type" "neon_tst")]
3404 ;; fcm(eq|ge|gt|le|lt)
3406 (define_insn "aarch64_cm<optab><mode>"
3407 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
3409 (COMPARISONS:<V_cmp_result>
3410 (match_operand:VALLF 1 "register_operand" "w,w")
3411 (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
3415 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
3416 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
3417 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
3421 ;; Note we can also handle what would be fac(le|lt) by
3422 ;; generating fac(ge|gt).
3424 (define_insn "*aarch64_fac<optab><mode>"
3425 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3427 (FAC_COMPARISONS:<V_cmp_result>
3428 (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
3429 (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
3432 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
3433 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
3438 (define_insn "aarch64_addp<mode>"
3439 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
3441 [(match_operand:VD_BHSI 1 "register_operand" "w")
3442 (match_operand:VD_BHSI 2 "register_operand" "w")]
3445 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3446 [(set_attr "type" "neon_reduc_add<q>")]
3449 (define_insn "aarch64_addpdi"
3450 [(set (match_operand:DI 0 "register_operand" "=w")
3452 [(match_operand:V2DI 1 "register_operand" "w")]
3456 [(set_attr "type" "neon_reduc_add")]
3461 (define_insn "sqrt<mode>2"
3462 [(set (match_operand:VDQF 0 "register_operand" "=w")
3463 (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
3465 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
3466 [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
3469 ;; Patterns for vector struct loads and stores.
3471 (define_insn "vec_load_lanesoi<mode>"
3472 [(set (match_operand:OI 0 "register_operand" "=w")
3473 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
3474 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3477 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
3478 [(set_attr "type" "neon_load2_2reg<q>")]
3481 (define_insn "vec_store_lanesoi<mode>"
3482 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
3483 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
3484 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3487 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
3488 [(set_attr "type" "neon_store2_2reg<q>")]
3491 (define_insn "vec_load_lanesci<mode>"
3492 [(set (match_operand:CI 0 "register_operand" "=w")
3493 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
3494 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3497 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
3498 [(set_attr "type" "neon_load3_3reg<q>")]
3501 (define_insn "vec_store_lanesci<mode>"
3502 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
3503 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
3504 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3507 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
3508 [(set_attr "type" "neon_store3_3reg<q>")]
3511 (define_insn "vec_load_lanesxi<mode>"
3512 [(set (match_operand:XI 0 "register_operand" "=w")
3513 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
3514 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3517 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
3518 [(set_attr "type" "neon_load4_4reg<q>")]
3521 (define_insn "vec_store_lanesxi<mode>"
3522 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
3523 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
3524 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3527 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
3528 [(set_attr "type" "neon_store4_4reg<q>")]
3531 ;; Reload patterns for AdvSIMD register list operands.
3533 (define_expand "mov<mode>"
3534 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "")
3535 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" ""))]
3538 if (can_create_pseudo_p ())
3540 if (GET_CODE (operands[0]) != REG)
3541 operands[1] = force_reg (<MODE>mode, operands[1]);
3545 (define_insn "*aarch64_mov<mode>"
3546 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
3547 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
3549 && (register_operand (operands[0], <MODE>mode)
3550 || register_operand (operands[1], <MODE>mode))"
3553 switch (which_alternative)
3556 case 1: return "st1\\t{%S1.16b - %<Vendreg>1.16b}, %0";
3557 case 2: return "ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1";
3558 default: gcc_unreachable ();
3561 [(set_attr "type" "neon_move,neon_store<nregs>_<nregs>reg_q,\
3562 neon_load<nregs>_<nregs>reg_q")
3563 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
3567 [(set (match_operand:OI 0 "register_operand" "")
3568 (match_operand:OI 1 "register_operand" ""))]
3569 "TARGET_SIMD && reload_completed"
3570 [(set (match_dup 0) (match_dup 1))
3571 (set (match_dup 2) (match_dup 3))]
3573 int rdest = REGNO (operands[0]);
3574 int rsrc = REGNO (operands[1]);
3575 rtx dest[2], src[2];
3577 dest[0] = gen_rtx_REG (TFmode, rdest);
3578 src[0] = gen_rtx_REG (TFmode, rsrc);
3579 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
3580 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
3582 aarch64_simd_disambiguate_copy (operands, dest, src, 2);
3586 [(set (match_operand:CI 0 "register_operand" "")
3587 (match_operand:CI 1 "register_operand" ""))]
3588 "TARGET_SIMD && reload_completed"
3589 [(set (match_dup 0) (match_dup 1))
3590 (set (match_dup 2) (match_dup 3))
3591 (set (match_dup 4) (match_dup 5))]
3593 int rdest = REGNO (operands[0]);
3594 int rsrc = REGNO (operands[1]);
3595 rtx dest[3], src[3];
3597 dest[0] = gen_rtx_REG (TFmode, rdest);
3598 src[0] = gen_rtx_REG (TFmode, rsrc);
3599 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
3600 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
3601 dest[2] = gen_rtx_REG (TFmode, rdest + 2);
3602 src[2] = gen_rtx_REG (TFmode, rsrc + 2);
3604 aarch64_simd_disambiguate_copy (operands, dest, src, 3);
3608 [(set (match_operand:XI 0 "register_operand" "")
3609 (match_operand:XI 1 "register_operand" ""))]
3610 "TARGET_SIMD && reload_completed"
3611 [(set (match_dup 0) (match_dup 1))
3612 (set (match_dup 2) (match_dup 3))
3613 (set (match_dup 4) (match_dup 5))
3614 (set (match_dup 6) (match_dup 7))]
3616 int rdest = REGNO (operands[0]);
3617 int rsrc = REGNO (operands[1]);
3618 rtx dest[4], src[4];
3620 dest[0] = gen_rtx_REG (TFmode, rdest);
3621 src[0] = gen_rtx_REG (TFmode, rsrc);
3622 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
3623 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
3624 dest[2] = gen_rtx_REG (TFmode, rdest + 2);
3625 src[2] = gen_rtx_REG (TFmode, rsrc + 2);
3626 dest[3] = gen_rtx_REG (TFmode, rdest + 3);
3627 src[3] = gen_rtx_REG (TFmode, rsrc + 3);
3629 aarch64_simd_disambiguate_copy (operands, dest, src, 4);
3632 (define_insn "aarch64_ld2<mode>_dreg"
3633 [(set (match_operand:OI 0 "register_operand" "=w")
3637 (unspec:VD [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
3639 (vec_duplicate:VD (const_int 0)))
3641 (unspec:VD [(match_dup 1)]
3643 (vec_duplicate:VD (const_int 0)))) 0))]
3645 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
3646 [(set_attr "type" "neon_load2_2reg<q>")]
3649 (define_insn "aarch64_ld2<mode>_dreg"
3650 [(set (match_operand:OI 0 "register_operand" "=w")
3654 (unspec:DX [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
3658 (unspec:DX [(match_dup 1)]
3660 (const_int 0))) 0))]
3662 "ld1\\t{%S0.1d - %T0.1d}, %1"
3663 [(set_attr "type" "neon_load1_2reg<q>")]
3666 (define_insn "aarch64_ld3<mode>_dreg"
3667 [(set (match_operand:CI 0 "register_operand" "=w")
3672 (unspec:VD [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
3674 (vec_duplicate:VD (const_int 0)))
3676 (unspec:VD [(match_dup 1)]
3678 (vec_duplicate:VD (const_int 0))))
3680 (unspec:VD [(match_dup 1)]
3682 (vec_duplicate:VD (const_int 0)))) 0))]
3684 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
3685 [(set_attr "type" "neon_load3_3reg<q>")]
3688 (define_insn "aarch64_ld3<mode>_dreg"
3689 [(set (match_operand:CI 0 "register_operand" "=w")
3694 (unspec:DX [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
3698 (unspec:DX [(match_dup 1)]
3702 (unspec:DX [(match_dup 1)]
3704 (const_int 0))) 0))]
3706 "ld1\\t{%S0.1d - %U0.1d}, %1"
3707 [(set_attr "type" "neon_load1_3reg<q>")]
3710 (define_insn "aarch64_ld4<mode>_dreg"
3711 [(set (match_operand:XI 0 "register_operand" "=w")
3716 (unspec:VD [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
3718 (vec_duplicate:VD (const_int 0)))
3720 (unspec:VD [(match_dup 1)]
3722 (vec_duplicate:VD (const_int 0))))
3725 (unspec:VD [(match_dup 1)]
3727 (vec_duplicate:VD (const_int 0)))
3729 (unspec:VD [(match_dup 1)]
3731 (vec_duplicate:VD (const_int 0))))) 0))]
3733 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
3734 [(set_attr "type" "neon_load4_4reg<q>")]
3737 (define_insn "aarch64_ld4<mode>_dreg"
3738 [(set (match_operand:XI 0 "register_operand" "=w")
3743 (unspec:DX [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
3747 (unspec:DX [(match_dup 1)]
3752 (unspec:DX [(match_dup 1)]
3756 (unspec:DX [(match_dup 1)]
3758 (const_int 0)))) 0))]
3760 "ld1\\t{%S0.1d - %V0.1d}, %1"
3761 [(set_attr "type" "neon_load1_4reg<q>")]
3764 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
3765 [(match_operand:VSTRUCT 0 "register_operand" "=w")
3766 (match_operand:DI 1 "register_operand" "r")
3767 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3770 enum machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
3771 rtx mem = gen_rtx_MEM (mode, operands[1]);
3773 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
3777 (define_expand "aarch64_ld1<VALL:mode>"
3778 [(match_operand:VALL 0 "register_operand")
3779 (match_operand:DI 1 "register_operand")]
3782 enum machine_mode mode = <VALL:MODE>mode;
3783 rtx mem = gen_rtx_MEM (mode, operands[1]);
3784 emit_move_insn (operands[0], mem);
3788 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
3789 [(match_operand:VSTRUCT 0 "register_operand" "=w")
3790 (match_operand:DI 1 "register_operand" "r")
3791 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3794 enum machine_mode mode = <VSTRUCT:MODE>mode;
3795 rtx mem = gen_rtx_MEM (mode, operands[1]);
3797 emit_insn (gen_vec_load_lanes<VSTRUCT:mode><VQ:mode> (operands[0], mem));
3801 ;; Expanders for builtins to extract vector registers from large
3802 ;; opaque integer modes.
3806 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
3807 [(match_operand:VDC 0 "register_operand" "=w")
3808 (match_operand:VSTRUCT 1 "register_operand" "w")
3809 (match_operand:SI 2 "immediate_operand" "i")]
3812 int part = INTVAL (operands[2]);
3813 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
3814 int offset = part * 16;
3816 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
3817 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
3823 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
3824 [(match_operand:VQ 0 "register_operand" "=w")
3825 (match_operand:VSTRUCT 1 "register_operand" "w")
3826 (match_operand:SI 2 "immediate_operand" "i")]
3829 int part = INTVAL (operands[2]);
3830 int offset = part * 16;
3832 emit_move_insn (operands[0],
3833 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
3837 ;; Permuted-store expanders for neon intrinsics.
3839 ;; Permute instructions
3843 (define_expand "vec_perm_const<mode>"
3844 [(match_operand:VALL 0 "register_operand")
3845 (match_operand:VALL 1 "register_operand")
3846 (match_operand:VALL 2 "register_operand")
3847 (match_operand:<V_cmp_result> 3)]
3850 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
3851 operands[2], operands[3]))
3857 (define_expand "vec_perm<mode>"
3858 [(match_operand:VB 0 "register_operand")
3859 (match_operand:VB 1 "register_operand")
3860 (match_operand:VB 2 "register_operand")
3861 (match_operand:VB 3 "register_operand")]
3864 aarch64_expand_vec_perm (operands[0], operands[1],
3865 operands[2], operands[3]);
3869 (define_insn "aarch64_tbl1<mode>"
3870 [(set (match_operand:VB 0 "register_operand" "=w")
3871 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
3872 (match_operand:VB 2 "register_operand" "w")]
3875 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
3876 [(set_attr "type" "neon_tbl1<q>")]
3879 ;; Two source registers.
3881 (define_insn "aarch64_tbl2v16qi"
3882 [(set (match_operand:V16QI 0 "register_operand" "=w")
3883 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
3884 (match_operand:V16QI 2 "register_operand" "w")]
3887 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
3888 [(set_attr "type" "neon_tbl2_q")]
3891 (define_insn_and_split "aarch64_combinev16qi"
3892 [(set (match_operand:OI 0 "register_operand" "=w")
3893 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
3894 (match_operand:V16QI 2 "register_operand" "w")]
3898 "&& reload_completed"
3901 aarch64_split_combinev16qi (operands);
3904 [(set_attr "type" "multiple")]
3907 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
3908 [(set (match_operand:VALL 0 "register_operand" "=w")
3909 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
3910 (match_operand:VALL 2 "register_operand" "w")]
3913 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3914 [(set_attr "type" "neon_permute<q>")]
3917 (define_insn "aarch64_st2<mode>_dreg"
3918 [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
3919 (unspec:TI [(match_operand:OI 1 "register_operand" "w")
3920 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3923 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
3924 [(set_attr "type" "neon_store2_2reg")]
3927 (define_insn "aarch64_st2<mode>_dreg"
3928 [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
3929 (unspec:TI [(match_operand:OI 1 "register_operand" "w")
3930 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3933 "st1\\t{%S1.1d - %T1.1d}, %0"
3934 [(set_attr "type" "neon_store1_2reg")]
3937 (define_insn "aarch64_st3<mode>_dreg"
3938 [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
3939 (unspec:EI [(match_operand:CI 1 "register_operand" "w")
3940 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3943 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
3944 [(set_attr "type" "neon_store3_3reg")]
3947 (define_insn "aarch64_st3<mode>_dreg"
3948 [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
3949 (unspec:EI [(match_operand:CI 1 "register_operand" "w")
3950 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3953 "st1\\t{%S1.1d - %U1.1d}, %0"
3954 [(set_attr "type" "neon_store1_3reg")]
3957 (define_insn "aarch64_st4<mode>_dreg"
3958 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
3959 (unspec:OI [(match_operand:XI 1 "register_operand" "w")
3960 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3963 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
3964 [(set_attr "type" "neon_store4_4reg")]
3967 (define_insn "aarch64_st4<mode>_dreg"
3968 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
3969 (unspec:OI [(match_operand:XI 1 "register_operand" "w")
3970 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3973 "st1\\t{%S1.1d - %V1.1d}, %0"
3974 [(set_attr "type" "neon_store1_4reg")]
3977 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
3978 [(match_operand:DI 0 "register_operand" "r")
3979 (match_operand:VSTRUCT 1 "register_operand" "w")
3980 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3983 enum machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
3984 rtx mem = gen_rtx_MEM (mode, operands[0]);
3986 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
3990 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
3991 [(match_operand:DI 0 "register_operand" "r")
3992 (match_operand:VSTRUCT 1 "register_operand" "w")
3993 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3996 enum machine_mode mode = <VSTRUCT:MODE>mode;
3997 rtx mem = gen_rtx_MEM (mode, operands[0]);
3999 emit_insn (gen_vec_store_lanes<VSTRUCT:mode><VQ:mode> (mem, operands[1]));
4003 (define_expand "aarch64_st1<VALL:mode>"
4004 [(match_operand:DI 0 "register_operand")
4005 (match_operand:VALL 1 "register_operand")]
4008 enum machine_mode mode = <VALL:MODE>mode;
4009 rtx mem = gen_rtx_MEM (mode, operands[0]);
4010 emit_move_insn (mem, operands[1]);
4014 ;; Expander for builtins to insert vector registers into large
4015 ;; opaque integer modes.
4017 ;; Q-register list. We don't need a D-reg inserter as we zero
4018 ;; extend them in arm_neon.h and insert the resulting Q-regs.
4020 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
4021 [(match_operand:VSTRUCT 0 "register_operand" "+w")
4022 (match_operand:VSTRUCT 1 "register_operand" "0")
4023 (match_operand:VQ 2 "register_operand" "w")
4024 (match_operand:SI 3 "immediate_operand" "i")]
4027 int part = INTVAL (operands[3]);
4028 int offset = part * 16;
4030 emit_move_insn (operands[0], operands[1]);
4031 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
4036 ;; Standard pattern name vec_init<mode>.
4038 (define_expand "vec_init<mode>"
4039 [(match_operand:VALL 0 "register_operand" "")
4040 (match_operand 1 "" "")]
4043 aarch64_expand_vector_init (operands[0], operands[1]);
4047 (define_insn "*aarch64_simd_ld1r<mode>"
4048 [(set (match_operand:VALLDI 0 "register_operand" "=w")
4049 (vec_duplicate:VALLDI
4050 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
4052 "ld1r\\t{%0.<Vtype>}, %1"
4053 [(set_attr "type" "neon_load1_all_lanes")]
4056 (define_insn "aarch64_frecpe<mode>"
4057 [(set (match_operand:VDQF 0 "register_operand" "=w")
4058 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
4061 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
4062 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
4065 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
4066 [(set (match_operand:GPF 0 "register_operand" "=w")
4067 (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
4070 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
4071 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
4074 (define_insn "aarch64_frecps<mode>"
4075 [(set (match_operand:VALLF 0 "register_operand" "=w")
4076 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
4077 (match_operand:VALLF 2 "register_operand" "w")]
4080 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4081 [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
4084 ;; Standard pattern name vec_extract<mode>.
4086 (define_expand "vec_extract<mode>"
4087 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
4088 (match_operand:VALL 1 "register_operand" "")
4089 (match_operand:SI 2 "immediate_operand" "")]
4093 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));