[AArch64] Implement usadv16qi and ssadv16qi standard names
[gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39 )
40
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
44 "TARGET_SIMD"
45 {
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52 })
53
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58 "TARGET_SIMD"
59 "@
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63 )
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
72 )
73
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
77 (vec_select:<VEL>
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
82 {
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
86 [(set_attr "type" "neon_dup<q>")]
87 )
88
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
96 "TARGET_SIMD"
97 {
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
101 [(set_attr "type" "neon_dup<q>")]
102 )
103
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112 {
113 switch (which_alternative)
114 {
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
125 }
126 }
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
130 )
131
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umq, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
137 "TARGET_SIMD
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
140 {
141 switch (which_alternative)
142 {
143 case 0:
144 return "ldr\t%q0, %1";
145 case 1:
146 return "stp\txzr, xzr, %0";
147 case 2:
148 return "str\t%q1, %0";
149 case 3:
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151 case 4:
152 case 5:
153 case 6:
154 return "#";
155 case 7:
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
157 default:
158 gcc_unreachable ();
159 }
160 }
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
165 )
166
167 ;; When storing lane zero we can use the normal STR and its more permissive
168 ;; addressing modes.
169
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
174 "TARGET_SIMD
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
178 )
179
180 (define_insn "load_pair<mode>"
181 [(set (match_operand:VD 0 "register_operand" "=w")
182 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:VD 2 "register_operand" "=w")
184 (match_operand:VD 3 "memory_operand" "m"))]
185 "TARGET_SIMD
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<MODE>mode)))"
190 "ldp\\t%d0, %d2, %1"
191 [(set_attr "type" "neon_ldp")]
192 )
193
194 (define_insn "store_pair<mode>"
195 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:VD 1 "register_operand" "w"))
197 (set (match_operand:VD 2 "memory_operand" "=m")
198 (match_operand:VD 3 "register_operand" "w"))]
199 "TARGET_SIMD
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<MODE>mode)))"
204 "stp\\t%d1, %d3, %0"
205 [(set_attr "type" "neon_stp")]
206 )
207
208 (define_split
209 [(set (match_operand:VQ 0 "register_operand" "")
210 (match_operand:VQ 1 "register_operand" ""))]
211 "TARGET_SIMD && reload_completed
212 && GP_REGNUM_P (REGNO (operands[0]))
213 && GP_REGNUM_P (REGNO (operands[1]))"
214 [(const_int 0)]
215 {
216 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
217 DONE;
218 })
219
220 (define_split
221 [(set (match_operand:VQ 0 "register_operand" "")
222 (match_operand:VQ 1 "register_operand" ""))]
223 "TARGET_SIMD && reload_completed
224 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
225 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
226 [(const_int 0)]
227 {
228 aarch64_split_simd_move (operands[0], operands[1]);
229 DONE;
230 })
231
232 (define_expand "aarch64_split_simd_mov<mode>"
233 [(set (match_operand:VQ 0)
234 (match_operand:VQ 1))]
235 "TARGET_SIMD"
236 {
237 rtx dst = operands[0];
238 rtx src = operands[1];
239
240 if (GP_REGNUM_P (REGNO (src)))
241 {
242 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
243 rtx src_high_part = gen_highpart (<VHALF>mode, src);
244
245 emit_insn
246 (gen_move_lo_quad_<mode> (dst, src_low_part));
247 emit_insn
248 (gen_move_hi_quad_<mode> (dst, src_high_part));
249 }
250
251 else
252 {
253 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
254 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
255 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
256 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
257
258 emit_insn
259 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
260 emit_insn
261 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
262 }
263 DONE;
264 }
265 )
266
267 (define_insn "aarch64_simd_mov_from_<mode>low"
268 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
269 (vec_select:<VHALF>
270 (match_operand:VQ 1 "register_operand" "w")
271 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
272 "TARGET_SIMD && reload_completed"
273 "umov\t%0, %1.d[0]"
274 [(set_attr "type" "neon_to_gp<q>")
275 (set_attr "length" "4")
276 ])
277
278 (define_insn "aarch64_simd_mov_from_<mode>high"
279 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
280 (vec_select:<VHALF>
281 (match_operand:VQ 1 "register_operand" "w")
282 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
283 "TARGET_SIMD && reload_completed"
284 "umov\t%0, %1.d[1]"
285 [(set_attr "type" "neon_to_gp<q>")
286 (set_attr "length" "4")
287 ])
288
289 (define_insn "orn<mode>3"
290 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
291 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
292 (match_operand:VDQ_I 2 "register_operand" "w")))]
293 "TARGET_SIMD"
294 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
295 [(set_attr "type" "neon_logic<q>")]
296 )
297
298 (define_insn "bic<mode>3"
299 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
300 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
301 (match_operand:VDQ_I 2 "register_operand" "w")))]
302 "TARGET_SIMD"
303 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
304 [(set_attr "type" "neon_logic<q>")]
305 )
306
307 (define_insn "add<mode>3"
308 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
309 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
310 (match_operand:VDQ_I 2 "register_operand" "w")))]
311 "TARGET_SIMD"
312 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
313 [(set_attr "type" "neon_add<q>")]
314 )
315
316 (define_insn "sub<mode>3"
317 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
318 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
319 (match_operand:VDQ_I 2 "register_operand" "w")))]
320 "TARGET_SIMD"
321 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
322 [(set_attr "type" "neon_sub<q>")]
323 )
324
325 (define_insn "mul<mode>3"
326 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
327 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
328 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
329 "TARGET_SIMD"
330 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
331 [(set_attr "type" "neon_mul_<Vetype><q>")]
332 )
333
334 (define_insn "bswap<mode>2"
335 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
336 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
337 "TARGET_SIMD"
338 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
339 [(set_attr "type" "neon_rev<q>")]
340 )
341
342 (define_insn "aarch64_rbit<mode>"
343 [(set (match_operand:VB 0 "register_operand" "=w")
344 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
345 UNSPEC_RBIT))]
346 "TARGET_SIMD"
347 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
348 [(set_attr "type" "neon_rbit")]
349 )
350
351 (define_expand "ctz<mode>2"
352 [(set (match_operand:VS 0 "register_operand")
353 (ctz:VS (match_operand:VS 1 "register_operand")))]
354 "TARGET_SIMD"
355 {
356 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
357 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
358 <MODE>mode, 0);
359 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
360 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
361 DONE;
362 }
363 )
364
365 (define_expand "xorsign<mode>3"
366 [(match_operand:VHSDF 0 "register_operand")
367 (match_operand:VHSDF 1 "register_operand")
368 (match_operand:VHSDF 2 "register_operand")]
369 "TARGET_SIMD"
370 {
371
372 machine_mode imode = <V_INT_EQUIV>mode;
373 rtx v_bitmask = gen_reg_rtx (imode);
374 rtx op1x = gen_reg_rtx (imode);
375 rtx op2x = gen_reg_rtx (imode);
376
377 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
378 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
379
380 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
381
382 emit_move_insn (v_bitmask,
383 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
384 HOST_WIDE_INT_M1U << bits));
385
386 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
387 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
388 emit_move_insn (operands[0],
389 lowpart_subreg (<MODE>mode, op1x, imode));
390 DONE;
391 }
392 )
393
394 ;; These instructions map to the __builtins for the Dot Product operations.
395 (define_insn "aarch64_<sur>dot<vsi2qi>"
396 [(set (match_operand:VS 0 "register_operand" "=w")
397 (plus:VS (match_operand:VS 1 "register_operand" "0")
398 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
399 (match_operand:<VSI2QI> 3 "register_operand" "w")]
400 DOTPROD)))]
401 "TARGET_DOTPROD"
402 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
403 [(set_attr "type" "neon_dot")]
404 )
405
406 ;; These expands map to the Dot Product optab the vectorizer checks for.
407 ;; The auto-vectorizer expects a dot product builtin that also does an
408 ;; accumulation into the provided register.
409 ;; Given the following pattern
410 ;;
411 ;; for (i=0; i<len; i++) {
412 ;; c = a[i] * b[i];
413 ;; r += c;
414 ;; }
415 ;; return result;
416 ;;
417 ;; This can be auto-vectorized to
418 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
419 ;;
420 ;; given enough iterations. However the vectorizer can keep unrolling the loop
421 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
422 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
423 ;; ...
424 ;;
425 ;; and so the vectorizer provides r, in which the result has to be accumulated.
426 (define_expand "<sur>dot_prod<vsi2qi>"
427 [(set (match_operand:VS 0 "register_operand")
428 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
429 (match_operand:<VSI2QI> 2 "register_operand")]
430 DOTPROD)
431 (match_operand:VS 3 "register_operand")))]
432 "TARGET_DOTPROD"
433 {
434 emit_insn (
435 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
436 operands[2]));
437 emit_insn (gen_rtx_SET (operands[0], operands[3]));
438 DONE;
439 })
440
441 ;; These instructions map to the __builtins for the Dot Product
442 ;; indexed operations.
443 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
444 [(set (match_operand:VS 0 "register_operand" "=w")
445 (plus:VS (match_operand:VS 1 "register_operand" "0")
446 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
447 (match_operand:V8QI 3 "register_operand" "<h_con>")
448 (match_operand:SI 4 "immediate_operand" "i")]
449 DOTPROD)))]
450 "TARGET_DOTPROD"
451 {
452 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
453 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
454 }
455 [(set_attr "type" "neon_dot")]
456 )
457
458 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
459 [(set (match_operand:VS 0 "register_operand" "=w")
460 (plus:VS (match_operand:VS 1 "register_operand" "0")
461 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
462 (match_operand:V16QI 3 "register_operand" "<h_con>")
463 (match_operand:SI 4 "immediate_operand" "i")]
464 DOTPROD)))]
465 "TARGET_DOTPROD"
466 {
467 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
468 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
469 }
470 [(set_attr "type" "neon_dot")]
471 )
472
473 (define_expand "copysign<mode>3"
474 [(match_operand:VHSDF 0 "register_operand")
475 (match_operand:VHSDF 1 "register_operand")
476 (match_operand:VHSDF 2 "register_operand")]
477 "TARGET_FLOAT && TARGET_SIMD"
478 {
479 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
480 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
481
482 emit_move_insn (v_bitmask,
483 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
484 HOST_WIDE_INT_M1U << bits));
485 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
486 operands[2], operands[1]));
487 DONE;
488 }
489 )
490
491 (define_insn "*aarch64_mul3_elt<mode>"
492 [(set (match_operand:VMUL 0 "register_operand" "=w")
493 (mult:VMUL
494 (vec_duplicate:VMUL
495 (vec_select:<VEL>
496 (match_operand:VMUL 1 "register_operand" "<h_con>")
497 (parallel [(match_operand:SI 2 "immediate_operand")])))
498 (match_operand:VMUL 3 "register_operand" "w")))]
499 "TARGET_SIMD"
500 {
501 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
502 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
503 }
504 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
505 )
506
507 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
508 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
509 (mult:VMUL_CHANGE_NLANES
510 (vec_duplicate:VMUL_CHANGE_NLANES
511 (vec_select:<VEL>
512 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
513 (parallel [(match_operand:SI 2 "immediate_operand")])))
514 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
515 "TARGET_SIMD"
516 {
517 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
518 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
519 }
520 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
521 )
522
523 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
524 [(set (match_operand:VMUL 0 "register_operand" "=w")
525 (mult:VMUL
526 (vec_duplicate:VMUL
527 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
528 (match_operand:VMUL 2 "register_operand" "w")))]
529 "TARGET_SIMD"
530 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
531 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
532 )
533
534 (define_insn "aarch64_rsqrte<mode>"
535 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
536 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
537 UNSPEC_RSQRTE))]
538 "TARGET_SIMD"
539 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
540 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
541
542 (define_insn "aarch64_rsqrts<mode>"
543 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
544 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
545 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
546 UNSPEC_RSQRTS))]
547 "TARGET_SIMD"
548 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
549 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
550
551 (define_expand "rsqrt<mode>2"
552 [(set (match_operand:VALLF 0 "register_operand" "=w")
553 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
554 UNSPEC_RSQRT))]
555 "TARGET_SIMD"
556 {
557 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
558 DONE;
559 })
560
561 (define_insn "*aarch64_mul3_elt_to_64v2df"
562 [(set (match_operand:DF 0 "register_operand" "=w")
563 (mult:DF
564 (vec_select:DF
565 (match_operand:V2DF 1 "register_operand" "w")
566 (parallel [(match_operand:SI 2 "immediate_operand")]))
567 (match_operand:DF 3 "register_operand" "w")))]
568 "TARGET_SIMD"
569 {
570 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
571 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
572 }
573 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
574 )
575
576 (define_insn "neg<mode>2"
577 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
578 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
579 "TARGET_SIMD"
580 "neg\t%0.<Vtype>, %1.<Vtype>"
581 [(set_attr "type" "neon_neg<q>")]
582 )
583
584 (define_insn "abs<mode>2"
585 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
586 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
587 "TARGET_SIMD"
588 "abs\t%0.<Vtype>, %1.<Vtype>"
589 [(set_attr "type" "neon_abs<q>")]
590 )
591
592 ;; The intrinsic version of integer ABS must not be allowed to
593 ;; combine with any operation with an integerated ABS step, such
594 ;; as SABD.
595 (define_insn "aarch64_abs<mode>"
596 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
597 (unspec:VSDQ_I_DI
598 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
599 UNSPEC_ABS))]
600 "TARGET_SIMD"
601 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
602 [(set_attr "type" "neon_abs<q>")]
603 )
604
605 (define_insn "abd<mode>_3"
606 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
607 (abs:VDQ_BHSI (minus:VDQ_BHSI
608 (match_operand:VDQ_BHSI 1 "register_operand" "w")
609 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
610 "TARGET_SIMD"
611 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
612 [(set_attr "type" "neon_abd<q>")]
613 )
614
615 (define_insn "aarch64_<sur>abdl2<mode>_3"
616 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
617 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
618 (match_operand:VDQV_S 2 "register_operand" "w")]
619 ABDL2))]
620 "TARGET_SIMD"
621 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
622 [(set_attr "type" "neon_abd<q>")]
623 )
624
625 (define_insn "aarch64_<sur>abal<mode>_4"
626 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
627 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
628 (match_operand:VDQV_S 2 "register_operand" "w")
629 (match_operand:<VDBLW> 3 "register_operand" "0")]
630 ABAL))]
631 "TARGET_SIMD"
632 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
633 [(set_attr "type" "neon_arith_acc<q>")]
634 )
635
636 (define_insn "aarch64_<sur>adalp<mode>_3"
637 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
638 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
639 (match_operand:<VDBLW> 2 "register_operand" "0")]
640 ADALP))]
641 "TARGET_SIMD"
642 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
643 [(set_attr "type" "neon_reduc_add<q>")]
644 )
645
646 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
647 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
648 ;; reduction of the difference into a V4SI vector and accumulate that into
649 ;; operand 3 before copying that into the result operand 0.
650 ;; Perform that with a sequence of:
651 ;; UABDL2 tmp.8h, op1.16b, op2.16b
652 ;; UABAL tmp.8h, op1.16b, op2.16b
653 ;; UADALP op3.4s, tmp.8h
654 ;; MOV op0, op3 // should be eliminated in later passes.
655 ;; The signed version just uses the signed variants of the above instructions.
656
657 (define_expand "<sur>sadv16qi"
658 [(use (match_operand:V4SI 0 "register_operand"))
659 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
660 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
661 (use (match_operand:V4SI 3 "register_operand"))]
662 "TARGET_SIMD"
663 {
664 rtx reduc = gen_reg_rtx (V8HImode);
665 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
666 operands[2]));
667 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
668 operands[2], reduc));
669 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
670 operands[3]));
671 emit_move_insn (operands[0], operands[3]);
672 DONE;
673 }
674 )
675
676 (define_insn "aba<mode>_3"
677 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
678 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
679 (match_operand:VDQ_BHSI 1 "register_operand" "w")
680 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
681 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
682 "TARGET_SIMD"
683 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
684 [(set_attr "type" "neon_arith_acc<q>")]
685 )
686
687 (define_insn "fabd<mode>3"
688 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
689 (abs:VHSDF_HSDF
690 (minus:VHSDF_HSDF
691 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
692 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
693 "TARGET_SIMD"
694 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
695 [(set_attr "type" "neon_fp_abd_<stype><q>")]
696 )
697
698 ;; For AND (vector, register) and BIC (vector, immediate)
699 (define_insn "and<mode>3"
700 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
701 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
702 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
703 "TARGET_SIMD"
704 {
705 switch (which_alternative)
706 {
707 case 0:
708 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
709 case 1:
710 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
711 AARCH64_CHECK_BIC);
712 default:
713 gcc_unreachable ();
714 }
715 }
716 [(set_attr "type" "neon_logic<q>")]
717 )
718
719 ;; For ORR (vector, register) and ORR (vector, immediate)
720 (define_insn "ior<mode>3"
721 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
722 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
723 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
724 "TARGET_SIMD"
725 {
726 switch (which_alternative)
727 {
728 case 0:
729 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
730 case 1:
731 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
732 AARCH64_CHECK_ORR);
733 default:
734 gcc_unreachable ();
735 }
736 }
737 [(set_attr "type" "neon_logic<q>")]
738 )
739
740 (define_insn "xor<mode>3"
741 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
742 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
743 (match_operand:VDQ_I 2 "register_operand" "w")))]
744 "TARGET_SIMD"
745 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
746 [(set_attr "type" "neon_logic<q>")]
747 )
748
749 (define_insn "one_cmpl<mode>2"
750 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
751 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
752 "TARGET_SIMD"
753 "not\t%0.<Vbtype>, %1.<Vbtype>"
754 [(set_attr "type" "neon_logic<q>")]
755 )
756
757 (define_insn "aarch64_simd_vec_set<mode>"
758 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
759 (vec_merge:VALL_F16
760 (vec_duplicate:VALL_F16
761 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
762 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
763 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
764 "TARGET_SIMD"
765 {
766 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
767 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
768 switch (which_alternative)
769 {
770 case 0:
771 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
772 case 1:
773 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
774 case 2:
775 return "ld1\\t{%0.<Vetype>}[%p2], %1";
776 default:
777 gcc_unreachable ();
778 }
779 }
780 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
781 )
782
783 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
784 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
785 (vec_merge:VALL_F16
786 (vec_duplicate:VALL_F16
787 (vec_select:<VEL>
788 (match_operand:VALL_F16 3 "register_operand" "w")
789 (parallel
790 [(match_operand:SI 4 "immediate_operand" "i")])))
791 (match_operand:VALL_F16 1 "register_operand" "0")
792 (match_operand:SI 2 "immediate_operand" "i")))]
793 "TARGET_SIMD"
794 {
795 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
796 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
797 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
798
799 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
800 }
801 [(set_attr "type" "neon_ins<q>")]
802 )
803
804 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
805 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
806 (vec_merge:VALL_F16_NO_V2Q
807 (vec_duplicate:VALL_F16_NO_V2Q
808 (vec_select:<VEL>
809 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
810 (parallel
811 [(match_operand:SI 4 "immediate_operand" "i")])))
812 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
813 (match_operand:SI 2 "immediate_operand" "i")))]
814 "TARGET_SIMD"
815 {
816 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
817 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
818 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
819 INTVAL (operands[4]));
820
821 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
822 }
823 [(set_attr "type" "neon_ins<q>")]
824 )
825
826 (define_insn "aarch64_simd_lshr<mode>"
827 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
828 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
829 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
830 "TARGET_SIMD"
831 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
832 [(set_attr "type" "neon_shift_imm<q>")]
833 )
834
835 (define_insn "aarch64_simd_ashr<mode>"
836 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
837 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
838 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
839 "TARGET_SIMD"
840 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
841 [(set_attr "type" "neon_shift_imm<q>")]
842 )
843
844 (define_insn "aarch64_simd_imm_shl<mode>"
845 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
846 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
847 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
848 "TARGET_SIMD"
849 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
850 [(set_attr "type" "neon_shift_imm<q>")]
851 )
852
853 (define_insn "aarch64_simd_reg_sshl<mode>"
854 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
855 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
856 (match_operand:VDQ_I 2 "register_operand" "w")))]
857 "TARGET_SIMD"
858 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
859 [(set_attr "type" "neon_shift_reg<q>")]
860 )
861
862 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
863 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
864 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
865 (match_operand:VDQ_I 2 "register_operand" "w")]
866 UNSPEC_ASHIFT_UNSIGNED))]
867 "TARGET_SIMD"
868 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
869 [(set_attr "type" "neon_shift_reg<q>")]
870 )
871
872 (define_insn "aarch64_simd_reg_shl<mode>_signed"
873 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
874 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
875 (match_operand:VDQ_I 2 "register_operand" "w")]
876 UNSPEC_ASHIFT_SIGNED))]
877 "TARGET_SIMD"
878 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
879 [(set_attr "type" "neon_shift_reg<q>")]
880 )
881
882 (define_expand "ashl<mode>3"
883 [(match_operand:VDQ_I 0 "register_operand" "")
884 (match_operand:VDQ_I 1 "register_operand" "")
885 (match_operand:SI 2 "general_operand" "")]
886 "TARGET_SIMD"
887 {
888 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
889 int shift_amount;
890
891 if (CONST_INT_P (operands[2]))
892 {
893 shift_amount = INTVAL (operands[2]);
894 if (shift_amount >= 0 && shift_amount < bit_width)
895 {
896 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
897 shift_amount);
898 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
899 operands[1],
900 tmp));
901 DONE;
902 }
903 else
904 {
905 operands[2] = force_reg (SImode, operands[2]);
906 }
907 }
908 else if (MEM_P (operands[2]))
909 {
910 operands[2] = force_reg (SImode, operands[2]);
911 }
912
913 if (REG_P (operands[2]))
914 {
915 rtx tmp = gen_reg_rtx (<MODE>mode);
916 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
917 convert_to_mode (<VEL>mode,
918 operands[2],
919 0)));
920 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
921 tmp));
922 DONE;
923 }
924 else
925 FAIL;
926 }
927 )
928
929 (define_expand "lshr<mode>3"
930 [(match_operand:VDQ_I 0 "register_operand" "")
931 (match_operand:VDQ_I 1 "register_operand" "")
932 (match_operand:SI 2 "general_operand" "")]
933 "TARGET_SIMD"
934 {
935 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
936 int shift_amount;
937
938 if (CONST_INT_P (operands[2]))
939 {
940 shift_amount = INTVAL (operands[2]);
941 if (shift_amount > 0 && shift_amount <= bit_width)
942 {
943 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
944 shift_amount);
945 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
946 operands[1],
947 tmp));
948 DONE;
949 }
950 else
951 operands[2] = force_reg (SImode, operands[2]);
952 }
953 else if (MEM_P (operands[2]))
954 {
955 operands[2] = force_reg (SImode, operands[2]);
956 }
957
958 if (REG_P (operands[2]))
959 {
960 rtx tmp = gen_reg_rtx (SImode);
961 rtx tmp1 = gen_reg_rtx (<MODE>mode);
962 emit_insn (gen_negsi2 (tmp, operands[2]));
963 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
964 convert_to_mode (<VEL>mode,
965 tmp, 0)));
966 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
967 operands[1],
968 tmp1));
969 DONE;
970 }
971 else
972 FAIL;
973 }
974 )
975
976 (define_expand "ashr<mode>3"
977 [(match_operand:VDQ_I 0 "register_operand" "")
978 (match_operand:VDQ_I 1 "register_operand" "")
979 (match_operand:SI 2 "general_operand" "")]
980 "TARGET_SIMD"
981 {
982 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
983 int shift_amount;
984
985 if (CONST_INT_P (operands[2]))
986 {
987 shift_amount = INTVAL (operands[2]);
988 if (shift_amount > 0 && shift_amount <= bit_width)
989 {
990 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
991 shift_amount);
992 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
993 operands[1],
994 tmp));
995 DONE;
996 }
997 else
998 operands[2] = force_reg (SImode, operands[2]);
999 }
1000 else if (MEM_P (operands[2]))
1001 {
1002 operands[2] = force_reg (SImode, operands[2]);
1003 }
1004
1005 if (REG_P (operands[2]))
1006 {
1007 rtx tmp = gen_reg_rtx (SImode);
1008 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1009 emit_insn (gen_negsi2 (tmp, operands[2]));
1010 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1011 convert_to_mode (<VEL>mode,
1012 tmp, 0)));
1013 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1014 operands[1],
1015 tmp1));
1016 DONE;
1017 }
1018 else
1019 FAIL;
1020 }
1021 )
1022
1023 (define_expand "vashl<mode>3"
1024 [(match_operand:VDQ_I 0 "register_operand" "")
1025 (match_operand:VDQ_I 1 "register_operand" "")
1026 (match_operand:VDQ_I 2 "register_operand" "")]
1027 "TARGET_SIMD"
1028 {
1029 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1030 operands[2]));
1031 DONE;
1032 })
1033
1034 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1035 ;; Negating individual lanes most certainly offsets the
1036 ;; gain from vectorization.
1037 (define_expand "vashr<mode>3"
1038 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1039 (match_operand:VDQ_BHSI 1 "register_operand" "")
1040 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1041 "TARGET_SIMD"
1042 {
1043 rtx neg = gen_reg_rtx (<MODE>mode);
1044 emit (gen_neg<mode>2 (neg, operands[2]));
1045 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1046 neg));
1047 DONE;
1048 })
1049
1050 ;; DI vector shift
1051 (define_expand "aarch64_ashr_simddi"
1052 [(match_operand:DI 0 "register_operand" "=w")
1053 (match_operand:DI 1 "register_operand" "w")
1054 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1055 "TARGET_SIMD"
1056 {
1057 /* An arithmetic shift right by 64 fills the result with copies of the sign
1058 bit, just like asr by 63 - however the standard pattern does not handle
1059 a shift by 64. */
1060 if (INTVAL (operands[2]) == 64)
1061 operands[2] = GEN_INT (63);
1062 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1063 DONE;
1064 }
1065 )
1066
1067 (define_expand "vlshr<mode>3"
1068 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1069 (match_operand:VDQ_BHSI 1 "register_operand" "")
1070 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1071 "TARGET_SIMD"
1072 {
1073 rtx neg = gen_reg_rtx (<MODE>mode);
1074 emit (gen_neg<mode>2 (neg, operands[2]));
1075 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1076 neg));
1077 DONE;
1078 })
1079
1080 (define_expand "aarch64_lshr_simddi"
1081 [(match_operand:DI 0 "register_operand" "=w")
1082 (match_operand:DI 1 "register_operand" "w")
1083 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1084 "TARGET_SIMD"
1085 {
1086 if (INTVAL (operands[2]) == 64)
1087 emit_move_insn (operands[0], const0_rtx);
1088 else
1089 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1090 DONE;
1091 }
1092 )
1093
1094 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1095 (define_insn "vec_shr_<mode>"
1096 [(set (match_operand:VD 0 "register_operand" "=w")
1097 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1098 (match_operand:SI 2 "immediate_operand" "i")]
1099 UNSPEC_VEC_SHR))]
1100 "TARGET_SIMD"
1101 {
1102 if (BYTES_BIG_ENDIAN)
1103 return "shl %d0, %d1, %2";
1104 else
1105 return "ushr %d0, %d1, %2";
1106 }
1107 [(set_attr "type" "neon_shift_imm")]
1108 )
1109
1110 (define_expand "vec_set<mode>"
1111 [(match_operand:VALL_F16 0 "register_operand" "+w")
1112 (match_operand:<VEL> 1 "register_operand" "w")
1113 (match_operand:SI 2 "immediate_operand" "")]
1114 "TARGET_SIMD"
1115 {
1116 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1117 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1118 GEN_INT (elem), operands[0]));
1119 DONE;
1120 }
1121 )
1122
1123
1124 (define_insn "aarch64_mla<mode>"
1125 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1126 (plus:VDQ_BHSI (mult:VDQ_BHSI
1127 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1128 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1129 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1130 "TARGET_SIMD"
1131 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1132 [(set_attr "type" "neon_mla_<Vetype><q>")]
1133 )
1134
1135 (define_insn "*aarch64_mla_elt<mode>"
1136 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1137 (plus:VDQHS
1138 (mult:VDQHS
1139 (vec_duplicate:VDQHS
1140 (vec_select:<VEL>
1141 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1142 (parallel [(match_operand:SI 2 "immediate_operand")])))
1143 (match_operand:VDQHS 3 "register_operand" "w"))
1144 (match_operand:VDQHS 4 "register_operand" "0")))]
1145 "TARGET_SIMD"
1146 {
1147 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1148 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1149 }
1150 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1151 )
1152
1153 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1154 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1155 (plus:VDQHS
1156 (mult:VDQHS
1157 (vec_duplicate:VDQHS
1158 (vec_select:<VEL>
1159 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1160 (parallel [(match_operand:SI 2 "immediate_operand")])))
1161 (match_operand:VDQHS 3 "register_operand" "w"))
1162 (match_operand:VDQHS 4 "register_operand" "0")))]
1163 "TARGET_SIMD"
1164 {
1165 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1166 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1167 }
1168 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1169 )
1170
1171 (define_insn "*aarch64_mla_elt_merge<mode>"
1172 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1173 (plus:VDQHS
1174 (mult:VDQHS (vec_duplicate:VDQHS
1175 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1176 (match_operand:VDQHS 2 "register_operand" "w"))
1177 (match_operand:VDQHS 3 "register_operand" "0")))]
1178 "TARGET_SIMD"
1179 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1180 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1181 )
1182
1183 (define_insn "aarch64_mls<mode>"
1184 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1185 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1186 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1187 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1188 "TARGET_SIMD"
1189 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1190 [(set_attr "type" "neon_mla_<Vetype><q>")]
1191 )
1192
1193 (define_insn "*aarch64_mls_elt<mode>"
1194 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1195 (minus:VDQHS
1196 (match_operand:VDQHS 4 "register_operand" "0")
1197 (mult:VDQHS
1198 (vec_duplicate:VDQHS
1199 (vec_select:<VEL>
1200 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1201 (parallel [(match_operand:SI 2 "immediate_operand")])))
1202 (match_operand:VDQHS 3 "register_operand" "w"))))]
1203 "TARGET_SIMD"
1204 {
1205 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1206 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1207 }
1208 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1209 )
1210
1211 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1212 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1213 (minus:VDQHS
1214 (match_operand:VDQHS 4 "register_operand" "0")
1215 (mult:VDQHS
1216 (vec_duplicate:VDQHS
1217 (vec_select:<VEL>
1218 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1219 (parallel [(match_operand:SI 2 "immediate_operand")])))
1220 (match_operand:VDQHS 3 "register_operand" "w"))))]
1221 "TARGET_SIMD"
1222 {
1223 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1224 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1225 }
1226 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1227 )
1228
1229 (define_insn "*aarch64_mls_elt_merge<mode>"
1230 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1231 (minus:VDQHS
1232 (match_operand:VDQHS 1 "register_operand" "0")
1233 (mult:VDQHS (vec_duplicate:VDQHS
1234 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1235 (match_operand:VDQHS 3 "register_operand" "w"))))]
1236 "TARGET_SIMD"
1237 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1238 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1239 )
1240
1241 ;; Max/Min operations.
1242 (define_insn "<su><maxmin><mode>3"
1243 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1244 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1245 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1246 "TARGET_SIMD"
1247 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1248 [(set_attr "type" "neon_minmax<q>")]
1249 )
1250
1251 (define_expand "<su><maxmin>v2di3"
1252 [(set (match_operand:V2DI 0 "register_operand" "")
1253 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1254 (match_operand:V2DI 2 "register_operand" "")))]
1255 "TARGET_SIMD"
1256 {
1257 enum rtx_code cmp_operator;
1258 rtx cmp_fmt;
1259
1260 switch (<CODE>)
1261 {
1262 case UMIN:
1263 cmp_operator = LTU;
1264 break;
1265 case SMIN:
1266 cmp_operator = LT;
1267 break;
1268 case UMAX:
1269 cmp_operator = GTU;
1270 break;
1271 case SMAX:
1272 cmp_operator = GT;
1273 break;
1274 default:
1275 gcc_unreachable ();
1276 }
1277
1278 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1279 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1280 operands[2], cmp_fmt, operands[1], operands[2]));
1281 DONE;
1282 })
1283
1284 ;; Pairwise Integer Max/Min operations.
1285 (define_insn "aarch64_<maxmin_uns>p<mode>"
1286 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1287 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1288 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1289 MAXMINV))]
1290 "TARGET_SIMD"
1291 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1292 [(set_attr "type" "neon_minmax<q>")]
1293 )
1294
1295 ;; Pairwise FP Max/Min operations.
1296 (define_insn "aarch64_<maxmin_uns>p<mode>"
1297 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1298 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1299 (match_operand:VHSDF 2 "register_operand" "w")]
1300 FMAXMINV))]
1301 "TARGET_SIMD"
1302 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1303 [(set_attr "type" "neon_minmax<q>")]
1304 )
1305
1306 ;; vec_concat gives a new vector with the low elements from operand 1, and
1307 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1308 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1309 ;; What that means, is that the RTL descriptions of the below patterns
1310 ;; need to change depending on endianness.
1311
1312 ;; Move to the low architectural bits of the register.
1313 ;; On little-endian this is { operand, zeroes }
1314 ;; On big-endian this is { zeroes, operand }
1315
1316 (define_insn "move_lo_quad_internal_<mode>"
1317 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1318 (vec_concat:VQ_NO2E
1319 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1320 (vec_duplicate:<VHALF> (const_int 0))))]
1321 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1322 "@
1323 dup\\t%d0, %1.d[0]
1324 fmov\\t%d0, %1
1325 dup\\t%d0, %1"
1326 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1327 (set_attr "simd" "yes,*,yes")
1328 (set_attr "fp" "*,yes,*")
1329 (set_attr "length" "4")]
1330 )
1331
1332 (define_insn "move_lo_quad_internal_<mode>"
1333 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1334 (vec_concat:VQ_2E
1335 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1336 (const_int 0)))]
1337 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1338 "@
1339 dup\\t%d0, %1.d[0]
1340 fmov\\t%d0, %1
1341 dup\\t%d0, %1"
1342 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1343 (set_attr "simd" "yes,*,yes")
1344 (set_attr "fp" "*,yes,*")
1345 (set_attr "length" "4")]
1346 )
1347
1348 (define_insn "move_lo_quad_internal_be_<mode>"
1349 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1350 (vec_concat:VQ_NO2E
1351 (vec_duplicate:<VHALF> (const_int 0))
1352 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1353 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1354 "@
1355 dup\\t%d0, %1.d[0]
1356 fmov\\t%d0, %1
1357 dup\\t%d0, %1"
1358 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1359 (set_attr "simd" "yes,*,yes")
1360 (set_attr "fp" "*,yes,*")
1361 (set_attr "length" "4")]
1362 )
1363
1364 (define_insn "move_lo_quad_internal_be_<mode>"
1365 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1366 (vec_concat:VQ_2E
1367 (const_int 0)
1368 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1369 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1370 "@
1371 dup\\t%d0, %1.d[0]
1372 fmov\\t%d0, %1
1373 dup\\t%d0, %1"
1374 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1375 (set_attr "simd" "yes,*,yes")
1376 (set_attr "fp" "*,yes,*")
1377 (set_attr "length" "4")]
1378 )
1379
1380 (define_expand "move_lo_quad_<mode>"
1381 [(match_operand:VQ 0 "register_operand")
1382 (match_operand:VQ 1 "register_operand")]
1383 "TARGET_SIMD"
1384 {
1385 if (BYTES_BIG_ENDIAN)
1386 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1387 else
1388 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1389 DONE;
1390 }
1391 )
1392
1393 ;; Move operand1 to the high architectural bits of the register, keeping
1394 ;; the low architectural bits of operand2.
1395 ;; For little-endian this is { operand2, operand1 }
1396 ;; For big-endian this is { operand1, operand2 }
1397
1398 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1399 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1400 (vec_concat:VQ
1401 (vec_select:<VHALF>
1402 (match_dup 0)
1403 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1404 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1405 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1406 "@
1407 ins\\t%0.d[1], %1.d[0]
1408 ins\\t%0.d[1], %1"
1409 [(set_attr "type" "neon_ins")]
1410 )
1411
1412 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1413 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1414 (vec_concat:VQ
1415 (match_operand:<VHALF> 1 "register_operand" "w,r")
1416 (vec_select:<VHALF>
1417 (match_dup 0)
1418 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1419 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1420 "@
1421 ins\\t%0.d[1], %1.d[0]
1422 ins\\t%0.d[1], %1"
1423 [(set_attr "type" "neon_ins")]
1424 )
1425
1426 (define_expand "move_hi_quad_<mode>"
1427 [(match_operand:VQ 0 "register_operand" "")
1428 (match_operand:<VHALF> 1 "register_operand" "")]
1429 "TARGET_SIMD"
1430 {
1431 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1432 if (BYTES_BIG_ENDIAN)
1433 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1434 operands[1], p));
1435 else
1436 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1437 operands[1], p));
1438 DONE;
1439 })
1440
1441 ;; Narrowing operations.
1442
1443 ;; For doubles.
1444 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1445 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1446 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1447 "TARGET_SIMD"
1448 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1449 [(set_attr "type" "neon_shift_imm_narrow_q")]
1450 )
1451
1452 (define_expand "vec_pack_trunc_<mode>"
1453 [(match_operand:<VNARROWD> 0 "register_operand" "")
1454 (match_operand:VDN 1 "register_operand" "")
1455 (match_operand:VDN 2 "register_operand" "")]
1456 "TARGET_SIMD"
1457 {
1458 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1459 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1460 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1461
1462 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1463 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1464 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1465 DONE;
1466 })
1467
1468 ;; For quads.
1469
1470 (define_insn "vec_pack_trunc_<mode>"
1471 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1472 (vec_concat:<VNARROWQ2>
1473 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1474 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1475 "TARGET_SIMD"
1476 {
1477 if (BYTES_BIG_ENDIAN)
1478 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1479 else
1480 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1481 }
1482 [(set_attr "type" "multiple")
1483 (set_attr "length" "8")]
1484 )
1485
1486 ;; Widening operations.
1487
1488 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1489 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1490 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1491 (match_operand:VQW 1 "register_operand" "w")
1492 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1493 )))]
1494 "TARGET_SIMD"
1495 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1496 [(set_attr "type" "neon_shift_imm_long")]
1497 )
1498
1499 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1500 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1501 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1502 (match_operand:VQW 1 "register_operand" "w")
1503 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1504 )))]
1505 "TARGET_SIMD"
1506 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1507 [(set_attr "type" "neon_shift_imm_long")]
1508 )
1509
1510 (define_expand "vec_unpack<su>_hi_<mode>"
1511 [(match_operand:<VWIDE> 0 "register_operand" "")
1512 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1513 "TARGET_SIMD"
1514 {
1515 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1516 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1517 operands[1], p));
1518 DONE;
1519 }
1520 )
1521
1522 (define_expand "vec_unpack<su>_lo_<mode>"
1523 [(match_operand:<VWIDE> 0 "register_operand" "")
1524 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1525 "TARGET_SIMD"
1526 {
1527 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1528 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1529 operands[1], p));
1530 DONE;
1531 }
1532 )
1533
1534 ;; Widening arithmetic.
1535
1536 (define_insn "*aarch64_<su>mlal_lo<mode>"
1537 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1538 (plus:<VWIDE>
1539 (mult:<VWIDE>
1540 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1541 (match_operand:VQW 2 "register_operand" "w")
1542 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1543 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1544 (match_operand:VQW 4 "register_operand" "w")
1545 (match_dup 3))))
1546 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1547 "TARGET_SIMD"
1548 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1549 [(set_attr "type" "neon_mla_<Vetype>_long")]
1550 )
1551
1552 (define_insn "*aarch64_<su>mlal_hi<mode>"
1553 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1554 (plus:<VWIDE>
1555 (mult:<VWIDE>
1556 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1557 (match_operand:VQW 2 "register_operand" "w")
1558 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1559 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1560 (match_operand:VQW 4 "register_operand" "w")
1561 (match_dup 3))))
1562 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1563 "TARGET_SIMD"
1564 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1565 [(set_attr "type" "neon_mla_<Vetype>_long")]
1566 )
1567
1568 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1569 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1570 (minus:<VWIDE>
1571 (match_operand:<VWIDE> 1 "register_operand" "0")
1572 (mult:<VWIDE>
1573 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1574 (match_operand:VQW 2 "register_operand" "w")
1575 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1576 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1577 (match_operand:VQW 4 "register_operand" "w")
1578 (match_dup 3))))))]
1579 "TARGET_SIMD"
1580 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1581 [(set_attr "type" "neon_mla_<Vetype>_long")]
1582 )
1583
1584 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1585 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1586 (minus:<VWIDE>
1587 (match_operand:<VWIDE> 1 "register_operand" "0")
1588 (mult:<VWIDE>
1589 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1590 (match_operand:VQW 2 "register_operand" "w")
1591 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1592 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1593 (match_operand:VQW 4 "register_operand" "w")
1594 (match_dup 3))))))]
1595 "TARGET_SIMD"
1596 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1597 [(set_attr "type" "neon_mla_<Vetype>_long")]
1598 )
1599
1600 (define_insn "*aarch64_<su>mlal<mode>"
1601 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1602 (plus:<VWIDE>
1603 (mult:<VWIDE>
1604 (ANY_EXTEND:<VWIDE>
1605 (match_operand:VD_BHSI 1 "register_operand" "w"))
1606 (ANY_EXTEND:<VWIDE>
1607 (match_operand:VD_BHSI 2 "register_operand" "w")))
1608 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1609 "TARGET_SIMD"
1610 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1611 [(set_attr "type" "neon_mla_<Vetype>_long")]
1612 )
1613
1614 (define_insn "*aarch64_<su>mlsl<mode>"
1615 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1616 (minus:<VWIDE>
1617 (match_operand:<VWIDE> 1 "register_operand" "0")
1618 (mult:<VWIDE>
1619 (ANY_EXTEND:<VWIDE>
1620 (match_operand:VD_BHSI 2 "register_operand" "w"))
1621 (ANY_EXTEND:<VWIDE>
1622 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1623 "TARGET_SIMD"
1624 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1625 [(set_attr "type" "neon_mla_<Vetype>_long")]
1626 )
1627
1628 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1629 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1630 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1631 (match_operand:VQW 1 "register_operand" "w")
1632 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1633 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1634 (match_operand:VQW 2 "register_operand" "w")
1635 (match_dup 3)))))]
1636 "TARGET_SIMD"
1637 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1638 [(set_attr "type" "neon_mul_<Vetype>_long")]
1639 )
1640
1641 (define_expand "vec_widen_<su>mult_lo_<mode>"
1642 [(match_operand:<VWIDE> 0 "register_operand" "")
1643 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1644 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1645 "TARGET_SIMD"
1646 {
1647 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1648 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1649 operands[1],
1650 operands[2], p));
1651 DONE;
1652 }
1653 )
1654
1655 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1656 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1657 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1658 (match_operand:VQW 1 "register_operand" "w")
1659 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1660 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1661 (match_operand:VQW 2 "register_operand" "w")
1662 (match_dup 3)))))]
1663 "TARGET_SIMD"
1664 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1665 [(set_attr "type" "neon_mul_<Vetype>_long")]
1666 )
1667
1668 (define_expand "vec_widen_<su>mult_hi_<mode>"
1669 [(match_operand:<VWIDE> 0 "register_operand" "")
1670 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1671 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1672 "TARGET_SIMD"
1673 {
1674 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1675 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1676 operands[1],
1677 operands[2], p));
1678 DONE;
1679
1680 }
1681 )
1682
1683 ;; FP vector operations.
1684 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1685 ;; double-precision (64-bit) floating-point data types and arithmetic as
1686 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1687 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1688 ;;
1689 ;; Floating-point operations can raise an exception. Vectorizing such
1690 ;; operations are safe because of reasons explained below.
1691 ;;
1692 ;; ARMv8 permits an extension to enable trapped floating-point
1693 ;; exception handling, however this is an optional feature. In the
1694 ;; event of a floating-point exception being raised by vectorised
1695 ;; code then:
1696 ;; 1. If trapped floating-point exceptions are available, then a trap
1697 ;; will be taken when any lane raises an enabled exception. A trap
1698 ;; handler may determine which lane raised the exception.
1699 ;; 2. Alternatively a sticky exception flag is set in the
1700 ;; floating-point status register (FPSR). Software may explicitly
1701 ;; test the exception flags, in which case the tests will either
1702 ;; prevent vectorisation, allowing precise identification of the
1703 ;; failing operation, or if tested outside of vectorisable regions
1704 ;; then the specific operation and lane are not of interest.
1705
1706 ;; FP arithmetic operations.
1707
1708 (define_insn "add<mode>3"
1709 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1710 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1711 (match_operand:VHSDF 2 "register_operand" "w")))]
1712 "TARGET_SIMD"
1713 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1714 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1715 )
1716
1717 (define_insn "sub<mode>3"
1718 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1719 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1720 (match_operand:VHSDF 2 "register_operand" "w")))]
1721 "TARGET_SIMD"
1722 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1723 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1724 )
1725
1726 (define_insn "mul<mode>3"
1727 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1728 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1729 (match_operand:VHSDF 2 "register_operand" "w")))]
1730 "TARGET_SIMD"
1731 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1732 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1733 )
1734
1735 (define_expand "div<mode>3"
1736 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1737 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1738 (match_operand:VHSDF 2 "register_operand" "w")))]
1739 "TARGET_SIMD"
1740 {
1741 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1742 DONE;
1743
1744 operands[1] = force_reg (<MODE>mode, operands[1]);
1745 })
1746
1747 (define_insn "*div<mode>3"
1748 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1749 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1750 (match_operand:VHSDF 2 "register_operand" "w")))]
1751 "TARGET_SIMD"
1752 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1753 [(set_attr "type" "neon_fp_div_<stype><q>")]
1754 )
1755
1756 (define_insn "neg<mode>2"
1757 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1758 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1759 "TARGET_SIMD"
1760 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1761 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1762 )
1763
1764 (define_insn "abs<mode>2"
1765 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1766 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1767 "TARGET_SIMD"
1768 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1769 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1770 )
1771
1772 (define_insn "fma<mode>4"
1773 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1774 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1775 (match_operand:VHSDF 2 "register_operand" "w")
1776 (match_operand:VHSDF 3 "register_operand" "0")))]
1777 "TARGET_SIMD"
1778 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1779 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1780 )
1781
1782 (define_insn "*aarch64_fma4_elt<mode>"
1783 [(set (match_operand:VDQF 0 "register_operand" "=w")
1784 (fma:VDQF
1785 (vec_duplicate:VDQF
1786 (vec_select:<VEL>
1787 (match_operand:VDQF 1 "register_operand" "<h_con>")
1788 (parallel [(match_operand:SI 2 "immediate_operand")])))
1789 (match_operand:VDQF 3 "register_operand" "w")
1790 (match_operand:VDQF 4 "register_operand" "0")))]
1791 "TARGET_SIMD"
1792 {
1793 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1794 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1795 }
1796 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1797 )
1798
1799 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1800 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1801 (fma:VDQSF
1802 (vec_duplicate:VDQSF
1803 (vec_select:<VEL>
1804 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1805 (parallel [(match_operand:SI 2 "immediate_operand")])))
1806 (match_operand:VDQSF 3 "register_operand" "w")
1807 (match_operand:VDQSF 4 "register_operand" "0")))]
1808 "TARGET_SIMD"
1809 {
1810 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1811 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1812 }
1813 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1814 )
1815
1816 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1817 [(set (match_operand:VMUL 0 "register_operand" "=w")
1818 (fma:VMUL
1819 (vec_duplicate:VMUL
1820 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1821 (match_operand:VMUL 2 "register_operand" "w")
1822 (match_operand:VMUL 3 "register_operand" "0")))]
1823 "TARGET_SIMD"
1824 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1825 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1826 )
1827
1828 (define_insn "*aarch64_fma4_elt_to_64v2df"
1829 [(set (match_operand:DF 0 "register_operand" "=w")
1830 (fma:DF
1831 (vec_select:DF
1832 (match_operand:V2DF 1 "register_operand" "w")
1833 (parallel [(match_operand:SI 2 "immediate_operand")]))
1834 (match_operand:DF 3 "register_operand" "w")
1835 (match_operand:DF 4 "register_operand" "0")))]
1836 "TARGET_SIMD"
1837 {
1838 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1839 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1840 }
1841 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1842 )
1843
1844 (define_insn "fnma<mode>4"
1845 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1846 (fma:VHSDF
1847 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1848 (match_operand:VHSDF 2 "register_operand" "w")
1849 (match_operand:VHSDF 3 "register_operand" "0")))]
1850 "TARGET_SIMD"
1851 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1852 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1853 )
1854
1855 (define_insn "*aarch64_fnma4_elt<mode>"
1856 [(set (match_operand:VDQF 0 "register_operand" "=w")
1857 (fma:VDQF
1858 (neg:VDQF
1859 (match_operand:VDQF 3 "register_operand" "w"))
1860 (vec_duplicate:VDQF
1861 (vec_select:<VEL>
1862 (match_operand:VDQF 1 "register_operand" "<h_con>")
1863 (parallel [(match_operand:SI 2 "immediate_operand")])))
1864 (match_operand:VDQF 4 "register_operand" "0")))]
1865 "TARGET_SIMD"
1866 {
1867 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1868 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1869 }
1870 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1871 )
1872
1873 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1874 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1875 (fma:VDQSF
1876 (neg:VDQSF
1877 (match_operand:VDQSF 3 "register_operand" "w"))
1878 (vec_duplicate:VDQSF
1879 (vec_select:<VEL>
1880 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1881 (parallel [(match_operand:SI 2 "immediate_operand")])))
1882 (match_operand:VDQSF 4 "register_operand" "0")))]
1883 "TARGET_SIMD"
1884 {
1885 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1886 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1887 }
1888 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1889 )
1890
1891 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1892 [(set (match_operand:VMUL 0 "register_operand" "=w")
1893 (fma:VMUL
1894 (neg:VMUL
1895 (match_operand:VMUL 2 "register_operand" "w"))
1896 (vec_duplicate:VMUL
1897 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1898 (match_operand:VMUL 3 "register_operand" "0")))]
1899 "TARGET_SIMD"
1900 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1901 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1902 )
1903
1904 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1905 [(set (match_operand:DF 0 "register_operand" "=w")
1906 (fma:DF
1907 (vec_select:DF
1908 (match_operand:V2DF 1 "register_operand" "w")
1909 (parallel [(match_operand:SI 2 "immediate_operand")]))
1910 (neg:DF
1911 (match_operand:DF 3 "register_operand" "w"))
1912 (match_operand:DF 4 "register_operand" "0")))]
1913 "TARGET_SIMD"
1914 {
1915 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1916 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1917 }
1918 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1919 )
1920
1921 ;; Vector versions of the floating-point frint patterns.
1922 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1923 (define_insn "<frint_pattern><mode>2"
1924 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1925 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1926 FRINT))]
1927 "TARGET_SIMD"
1928 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1929 [(set_attr "type" "neon_fp_round_<stype><q>")]
1930 )
1931
1932 ;; Vector versions of the fcvt standard patterns.
1933 ;; Expands to lbtrunc, lround, lceil, lfloor
1934 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1935 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1936 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1937 [(match_operand:VHSDF 1 "register_operand" "w")]
1938 FCVT)))]
1939 "TARGET_SIMD"
1940 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1941 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1942 )
1943
1944 ;; HF Scalar variants of related SIMD instructions.
1945 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1946 [(set (match_operand:HI 0 "register_operand" "=w")
1947 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1948 FCVT)))]
1949 "TARGET_SIMD_F16INST"
1950 "fcvt<frint_suffix><su>\t%h0, %h1"
1951 [(set_attr "type" "neon_fp_to_int_s")]
1952 )
1953
1954 (define_insn "<optab>_trunchfhi2"
1955 [(set (match_operand:HI 0 "register_operand" "=w")
1956 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1957 "TARGET_SIMD_F16INST"
1958 "fcvtz<su>\t%h0, %h1"
1959 [(set_attr "type" "neon_fp_to_int_s")]
1960 )
1961
1962 (define_insn "<optab>hihf2"
1963 [(set (match_operand:HF 0 "register_operand" "=w")
1964 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1965 "TARGET_SIMD_F16INST"
1966 "<su_optab>cvtf\t%h0, %h1"
1967 [(set_attr "type" "neon_int_to_fp_s")]
1968 )
1969
1970 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1971 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1972 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1973 [(mult:VDQF
1974 (match_operand:VDQF 1 "register_operand" "w")
1975 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1976 UNSPEC_FRINTZ)))]
1977 "TARGET_SIMD
1978 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1979 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1980 {
1981 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1982 char buf[64];
1983 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1984 output_asm_insn (buf, operands);
1985 return "";
1986 }
1987 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1988 )
1989
1990 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1991 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1992 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1993 [(match_operand:VHSDF 1 "register_operand")]
1994 UNSPEC_FRINTZ)))]
1995 "TARGET_SIMD"
1996 {})
1997
1998 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1999 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2000 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2001 [(match_operand:VHSDF 1 "register_operand")]
2002 UNSPEC_FRINTZ)))]
2003 "TARGET_SIMD"
2004 {})
2005
2006 (define_expand "ftrunc<VHSDF:mode>2"
2007 [(set (match_operand:VHSDF 0 "register_operand")
2008 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2009 UNSPEC_FRINTZ))]
2010 "TARGET_SIMD"
2011 {})
2012
2013 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2014 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2015 (FLOATUORS:VHSDF
2016 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2017 "TARGET_SIMD"
2018 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2019 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2020 )
2021
2022 ;; Conversions between vectors of floats and doubles.
2023 ;; Contains a mix of patterns to match standard pattern names
2024 ;; and those for intrinsics.
2025
2026 ;; Float widening operations.
2027
2028 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2029 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2030 (float_extend:<VWIDE> (vec_select:<VHALF>
2031 (match_operand:VQ_HSF 1 "register_operand" "w")
2032 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2033 )))]
2034 "TARGET_SIMD"
2035 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2036 [(set_attr "type" "neon_fp_cvt_widen_s")]
2037 )
2038
2039 ;; Convert between fixed-point and floating-point (vector modes)
2040
2041 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2042 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2043 (unspec:<VHSDF:FCVT_TARGET>
2044 [(match_operand:VHSDF 1 "register_operand" "w")
2045 (match_operand:SI 2 "immediate_operand" "i")]
2046 FCVT_F2FIXED))]
2047 "TARGET_SIMD"
2048 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2049 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2050 )
2051
2052 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2053 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2054 (unspec:<VDQ_HSDI:FCVT_TARGET>
2055 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2056 (match_operand:SI 2 "immediate_operand" "i")]
2057 FCVT_FIXED2F))]
2058 "TARGET_SIMD"
2059 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2060 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2061 )
2062
2063 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2064 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2065 ;; the meaning of HI and LO changes depending on the target endianness.
2066 ;; While elsewhere we map the higher numbered elements of a vector to
2067 ;; the lower architectural lanes of the vector, for these patterns we want
2068 ;; to always treat "hi" as referring to the higher architectural lanes.
2069 ;; Consequently, while the patterns below look inconsistent with our
2070 ;; other big-endian patterns their behavior is as required.
2071
2072 (define_expand "vec_unpacks_lo_<mode>"
2073 [(match_operand:<VWIDE> 0 "register_operand" "")
2074 (match_operand:VQ_HSF 1 "register_operand" "")]
2075 "TARGET_SIMD"
2076 {
2077 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2078 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2079 operands[1], p));
2080 DONE;
2081 }
2082 )
2083
2084 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2085 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2086 (float_extend:<VWIDE> (vec_select:<VHALF>
2087 (match_operand:VQ_HSF 1 "register_operand" "w")
2088 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2089 )))]
2090 "TARGET_SIMD"
2091 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2092 [(set_attr "type" "neon_fp_cvt_widen_s")]
2093 )
2094
2095 (define_expand "vec_unpacks_hi_<mode>"
2096 [(match_operand:<VWIDE> 0 "register_operand" "")
2097 (match_operand:VQ_HSF 1 "register_operand" "")]
2098 "TARGET_SIMD"
2099 {
2100 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2101 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2102 operands[1], p));
2103 DONE;
2104 }
2105 )
2106 (define_insn "aarch64_float_extend_lo_<Vwide>"
2107 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2108 (float_extend:<VWIDE>
2109 (match_operand:VDF 1 "register_operand" "w")))]
2110 "TARGET_SIMD"
2111 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2112 [(set_attr "type" "neon_fp_cvt_widen_s")]
2113 )
2114
2115 ;; Float narrowing operations.
2116
2117 (define_insn "aarch64_float_truncate_lo_<mode>"
2118 [(set (match_operand:VDF 0 "register_operand" "=w")
2119 (float_truncate:VDF
2120 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2121 "TARGET_SIMD"
2122 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2123 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2124 )
2125
2126 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2127 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2128 (vec_concat:<VDBL>
2129 (match_operand:VDF 1 "register_operand" "0")
2130 (float_truncate:VDF
2131 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2132 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2133 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2134 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2135 )
2136
2137 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2138 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2139 (vec_concat:<VDBL>
2140 (float_truncate:VDF
2141 (match_operand:<VWIDE> 2 "register_operand" "w"))
2142 (match_operand:VDF 1 "register_operand" "0")))]
2143 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2144 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2145 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2146 )
2147
2148 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2149 [(match_operand:<VDBL> 0 "register_operand" "=w")
2150 (match_operand:VDF 1 "register_operand" "0")
2151 (match_operand:<VWIDE> 2 "register_operand" "w")]
2152 "TARGET_SIMD"
2153 {
2154 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2155 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2156 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2157 emit_insn (gen (operands[0], operands[1], operands[2]));
2158 DONE;
2159 }
2160 )
2161
2162 (define_expand "vec_pack_trunc_v2df"
2163 [(set (match_operand:V4SF 0 "register_operand")
2164 (vec_concat:V4SF
2165 (float_truncate:V2SF
2166 (match_operand:V2DF 1 "register_operand"))
2167 (float_truncate:V2SF
2168 (match_operand:V2DF 2 "register_operand"))
2169 ))]
2170 "TARGET_SIMD"
2171 {
2172 rtx tmp = gen_reg_rtx (V2SFmode);
2173 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2174 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2175
2176 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2177 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2178 tmp, operands[hi]));
2179 DONE;
2180 }
2181 )
2182
2183 (define_expand "vec_pack_trunc_df"
2184 [(set (match_operand:V2SF 0 "register_operand")
2185 (vec_concat:V2SF
2186 (float_truncate:SF
2187 (match_operand:DF 1 "register_operand"))
2188 (float_truncate:SF
2189 (match_operand:DF 2 "register_operand"))
2190 ))]
2191 "TARGET_SIMD"
2192 {
2193 rtx tmp = gen_reg_rtx (V2SFmode);
2194 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2195 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2196
2197 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2198 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2199 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2200 DONE;
2201 }
2202 )
2203
2204 ;; FP Max/Min
2205 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2206 ;; expression like:
2207 ;; a = (b < c) ? b : c;
2208 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2209 ;; either explicitly or indirectly via -ffast-math.
2210 ;;
2211 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2212 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2213 ;; operand will be returned when both operands are zero (i.e. they may not
2214 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2215 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2216 ;; NaNs.
2217
2218 (define_insn "<su><maxmin><mode>3"
2219 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2220 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2221 (match_operand:VHSDF 2 "register_operand" "w")))]
2222 "TARGET_SIMD"
2223 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2224 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2225 )
2226
2227 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2228 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2229 ;; which implement the IEEE fmax ()/fmin () functions.
2230 (define_insn "<maxmin_uns><mode>3"
2231 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2232 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2233 (match_operand:VHSDF 2 "register_operand" "w")]
2234 FMAXMIN_UNS))]
2235 "TARGET_SIMD"
2236 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2237 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2238 )
2239
2240 ;; 'across lanes' add.
2241
2242 (define_expand "reduc_plus_scal_<mode>"
2243 [(match_operand:<VEL> 0 "register_operand" "=w")
2244 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2245 UNSPEC_ADDV)]
2246 "TARGET_SIMD"
2247 {
2248 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2249 rtx scratch = gen_reg_rtx (<MODE>mode);
2250 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2251 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2252 DONE;
2253 }
2254 )
2255
2256 (define_insn "aarch64_faddp<mode>"
2257 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2258 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2259 (match_operand:VHSDF 2 "register_operand" "w")]
2260 UNSPEC_FADDV))]
2261 "TARGET_SIMD"
2262 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2263 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2264 )
2265
2266 (define_insn "aarch64_reduc_plus_internal<mode>"
2267 [(set (match_operand:VDQV 0 "register_operand" "=w")
2268 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2269 UNSPEC_ADDV))]
2270 "TARGET_SIMD"
2271 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2272 [(set_attr "type" "neon_reduc_add<q>")]
2273 )
2274
2275 (define_insn "aarch64_reduc_plus_internalv2si"
2276 [(set (match_operand:V2SI 0 "register_operand" "=w")
2277 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2278 UNSPEC_ADDV))]
2279 "TARGET_SIMD"
2280 "addp\\t%0.2s, %1.2s, %1.2s"
2281 [(set_attr "type" "neon_reduc_add")]
2282 )
2283
2284 (define_insn "reduc_plus_scal_<mode>"
2285 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2286 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2287 UNSPEC_FADDV))]
2288 "TARGET_SIMD"
2289 "faddp\\t%<Vetype>0, %1.<Vtype>"
2290 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2291 )
2292
2293 (define_expand "reduc_plus_scal_v4sf"
2294 [(set (match_operand:SF 0 "register_operand")
2295 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2296 UNSPEC_FADDV))]
2297 "TARGET_SIMD"
2298 {
2299 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2300 rtx scratch = gen_reg_rtx (V4SFmode);
2301 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2302 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2303 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2304 DONE;
2305 })
2306
2307 (define_insn "clrsb<mode>2"
2308 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2309 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2310 "TARGET_SIMD"
2311 "cls\\t%0.<Vtype>, %1.<Vtype>"
2312 [(set_attr "type" "neon_cls<q>")]
2313 )
2314
2315 (define_insn "clz<mode>2"
2316 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2317 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2318 "TARGET_SIMD"
2319 "clz\\t%0.<Vtype>, %1.<Vtype>"
2320 [(set_attr "type" "neon_cls<q>")]
2321 )
2322
2323 (define_insn "popcount<mode>2"
2324 [(set (match_operand:VB 0 "register_operand" "=w")
2325 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2326 "TARGET_SIMD"
2327 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2328 [(set_attr "type" "neon_cnt<q>")]
2329 )
2330
2331 ;; 'across lanes' max and min ops.
2332
2333 ;; Template for outputting a scalar, so we can create __builtins which can be
2334 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2335 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2336 [(match_operand:<VEL> 0 "register_operand")
2337 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2338 FMAXMINV)]
2339 "TARGET_SIMD"
2340 {
2341 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2342 rtx scratch = gen_reg_rtx (<MODE>mode);
2343 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2344 operands[1]));
2345 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2346 DONE;
2347 }
2348 )
2349
2350 ;; Likewise for integer cases, signed and unsigned.
2351 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2352 [(match_operand:<VEL> 0 "register_operand")
2353 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2354 MAXMINV)]
2355 "TARGET_SIMD"
2356 {
2357 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2358 rtx scratch = gen_reg_rtx (<MODE>mode);
2359 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2360 operands[1]));
2361 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2362 DONE;
2363 }
2364 )
2365
2366 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2367 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2368 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2369 MAXMINV))]
2370 "TARGET_SIMD"
2371 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2372 [(set_attr "type" "neon_reduc_minmax<q>")]
2373 )
2374
2375 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2376 [(set (match_operand:V2SI 0 "register_operand" "=w")
2377 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2378 MAXMINV))]
2379 "TARGET_SIMD"
2380 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2381 [(set_attr "type" "neon_reduc_minmax")]
2382 )
2383
2384 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2385 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2386 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2387 FMAXMINV))]
2388 "TARGET_SIMD"
2389 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2390 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2391 )
2392
2393 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2394 ;; allocation.
2395 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2396 ;; to select.
2397 ;;
2398 ;; Thus our BSL is of the form:
2399 ;; op0 = bsl (mask, op2, op3)
2400 ;; We can use any of:
2401 ;;
2402 ;; if (op0 = mask)
2403 ;; bsl mask, op1, op2
2404 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2405 ;; bit op0, op2, mask
2406 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2407 ;; bif op0, op1, mask
2408 ;;
2409 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2410 ;; Some forms of straight-line code may generate the equivalent form
2411 ;; in *aarch64_simd_bsl<mode>_alt.
2412
2413 (define_insn "aarch64_simd_bsl<mode>_internal"
2414 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2415 (xor:VDQ_I
2416 (and:VDQ_I
2417 (xor:VDQ_I
2418 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2419 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2420 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2421 (match_dup:<V_INT_EQUIV> 3)
2422 ))]
2423 "TARGET_SIMD"
2424 "@
2425 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2426 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2427 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2428 [(set_attr "type" "neon_bsl<q>")]
2429 )
2430
2431 ;; We need this form in addition to the above pattern to match the case
2432 ;; when combine tries merging three insns such that the second operand of
2433 ;; the outer XOR matches the second operand of the inner XOR rather than
2434 ;; the first. The two are equivalent but since recog doesn't try all
2435 ;; permutations of commutative operations, we have to have a separate pattern.
2436
2437 (define_insn "*aarch64_simd_bsl<mode>_alt"
2438 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2439 (xor:VDQ_I
2440 (and:VDQ_I
2441 (xor:VDQ_I
2442 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2443 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2444 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2445 (match_dup:<V_INT_EQUIV> 2)))]
2446 "TARGET_SIMD"
2447 "@
2448 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2449 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2450 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2451 [(set_attr "type" "neon_bsl<q>")]
2452 )
2453
2454 ;; DImode is special, we want to avoid computing operations which are
2455 ;; more naturally computed in general purpose registers in the vector
2456 ;; registers. If we do that, we need to move all three operands from general
2457 ;; purpose registers to vector registers, then back again. However, we
2458 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2459 ;; optimizations based on the component operations of a BSL.
2460 ;;
2461 ;; That means we need a splitter back to the individual operations, if they
2462 ;; would be better calculated on the integer side.
2463
2464 (define_insn_and_split "aarch64_simd_bsldi_internal"
2465 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2466 (xor:DI
2467 (and:DI
2468 (xor:DI
2469 (match_operand:DI 3 "register_operand" "w,0,w,r")
2470 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2471 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2472 (match_dup:DI 3)
2473 ))]
2474 "TARGET_SIMD"
2475 "@
2476 bsl\\t%0.8b, %2.8b, %3.8b
2477 bit\\t%0.8b, %2.8b, %1.8b
2478 bif\\t%0.8b, %3.8b, %1.8b
2479 #"
2480 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2481 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2482 {
2483 /* Split back to individual operations. If we're before reload, and
2484 able to create a temporary register, do so. If we're after reload,
2485 we've got an early-clobber destination register, so use that.
2486 Otherwise, we can't create pseudos and we can't yet guarantee that
2487 operands[0] is safe to write, so FAIL to split. */
2488
2489 rtx scratch;
2490 if (reload_completed)
2491 scratch = operands[0];
2492 else if (can_create_pseudo_p ())
2493 scratch = gen_reg_rtx (DImode);
2494 else
2495 FAIL;
2496
2497 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2498 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2499 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2500 DONE;
2501 }
2502 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2503 (set_attr "length" "4,4,4,12")]
2504 )
2505
2506 (define_insn_and_split "aarch64_simd_bsldi_alt"
2507 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2508 (xor:DI
2509 (and:DI
2510 (xor:DI
2511 (match_operand:DI 3 "register_operand" "w,w,0,r")
2512 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2513 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2514 (match_dup:DI 2)
2515 ))]
2516 "TARGET_SIMD"
2517 "@
2518 bsl\\t%0.8b, %3.8b, %2.8b
2519 bit\\t%0.8b, %3.8b, %1.8b
2520 bif\\t%0.8b, %2.8b, %1.8b
2521 #"
2522 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2523 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2524 {
2525 /* Split back to individual operations. If we're before reload, and
2526 able to create a temporary register, do so. If we're after reload,
2527 we've got an early-clobber destination register, so use that.
2528 Otherwise, we can't create pseudos and we can't yet guarantee that
2529 operands[0] is safe to write, so FAIL to split. */
2530
2531 rtx scratch;
2532 if (reload_completed)
2533 scratch = operands[0];
2534 else if (can_create_pseudo_p ())
2535 scratch = gen_reg_rtx (DImode);
2536 else
2537 FAIL;
2538
2539 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2540 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2541 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2542 DONE;
2543 }
2544 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2545 (set_attr "length" "4,4,4,12")]
2546 )
2547
2548 (define_expand "aarch64_simd_bsl<mode>"
2549 [(match_operand:VALLDIF 0 "register_operand")
2550 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2551 (match_operand:VALLDIF 2 "register_operand")
2552 (match_operand:VALLDIF 3 "register_operand")]
2553 "TARGET_SIMD"
2554 {
2555 /* We can't alias operands together if they have different modes. */
2556 rtx tmp = operands[0];
2557 if (FLOAT_MODE_P (<MODE>mode))
2558 {
2559 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2560 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2561 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2562 }
2563 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2564 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2565 operands[1],
2566 operands[2],
2567 operands[3]));
2568 if (tmp != operands[0])
2569 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2570
2571 DONE;
2572 })
2573
2574 (define_expand "vcond_mask_<mode><v_int_equiv>"
2575 [(match_operand:VALLDI 0 "register_operand")
2576 (match_operand:VALLDI 1 "nonmemory_operand")
2577 (match_operand:VALLDI 2 "nonmemory_operand")
2578 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2579 "TARGET_SIMD"
2580 {
2581 /* If we have (a = (P) ? -1 : 0);
2582 Then we can simply move the generated mask (result must be int). */
2583 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2584 && operands[2] == CONST0_RTX (<MODE>mode))
2585 emit_move_insn (operands[0], operands[3]);
2586 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2587 else if (operands[1] == CONST0_RTX (<MODE>mode)
2588 && operands[2] == CONSTM1_RTX (<MODE>mode))
2589 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2590 else
2591 {
2592 if (!REG_P (operands[1]))
2593 operands[1] = force_reg (<MODE>mode, operands[1]);
2594 if (!REG_P (operands[2]))
2595 operands[2] = force_reg (<MODE>mode, operands[2]);
2596 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2597 operands[1], operands[2]));
2598 }
2599
2600 DONE;
2601 })
2602
2603 ;; Patterns comparing two vectors to produce a mask.
2604
2605 (define_expand "vec_cmp<mode><mode>"
2606 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2607 (match_operator 1 "comparison_operator"
2608 [(match_operand:VSDQ_I_DI 2 "register_operand")
2609 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2610 "TARGET_SIMD"
2611 {
2612 rtx mask = operands[0];
2613 enum rtx_code code = GET_CODE (operands[1]);
2614
2615 switch (code)
2616 {
2617 case NE:
2618 case LE:
2619 case LT:
2620 case GE:
2621 case GT:
2622 case EQ:
2623 if (operands[3] == CONST0_RTX (<MODE>mode))
2624 break;
2625
2626 /* Fall through. */
2627 default:
2628 if (!REG_P (operands[3]))
2629 operands[3] = force_reg (<MODE>mode, operands[3]);
2630
2631 break;
2632 }
2633
2634 switch (code)
2635 {
2636 case LT:
2637 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2638 break;
2639
2640 case GE:
2641 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2642 break;
2643
2644 case LE:
2645 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2646 break;
2647
2648 case GT:
2649 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2650 break;
2651
2652 case LTU:
2653 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2654 break;
2655
2656 case GEU:
2657 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2658 break;
2659
2660 case LEU:
2661 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2662 break;
2663
2664 case GTU:
2665 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2666 break;
2667
2668 case NE:
2669 /* Handle NE as !EQ. */
2670 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2671 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2672 break;
2673
2674 case EQ:
2675 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2676 break;
2677
2678 default:
2679 gcc_unreachable ();
2680 }
2681
2682 DONE;
2683 })
2684
2685 (define_expand "vec_cmp<mode><v_int_equiv>"
2686 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2687 (match_operator 1 "comparison_operator"
2688 [(match_operand:VDQF 2 "register_operand")
2689 (match_operand:VDQF 3 "nonmemory_operand")]))]
2690 "TARGET_SIMD"
2691 {
2692 int use_zero_form = 0;
2693 enum rtx_code code = GET_CODE (operands[1]);
2694 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2695
2696 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2697
2698 switch (code)
2699 {
2700 case LE:
2701 case LT:
2702 case GE:
2703 case GT:
2704 case EQ:
2705 if (operands[3] == CONST0_RTX (<MODE>mode))
2706 {
2707 use_zero_form = 1;
2708 break;
2709 }
2710 /* Fall through. */
2711 default:
2712 if (!REG_P (operands[3]))
2713 operands[3] = force_reg (<MODE>mode, operands[3]);
2714
2715 break;
2716 }
2717
2718 switch (code)
2719 {
2720 case LT:
2721 if (use_zero_form)
2722 {
2723 comparison = gen_aarch64_cmlt<mode>;
2724 break;
2725 }
2726 /* Fall through. */
2727 case UNLT:
2728 std::swap (operands[2], operands[3]);
2729 /* Fall through. */
2730 case UNGT:
2731 case GT:
2732 comparison = gen_aarch64_cmgt<mode>;
2733 break;
2734 case LE:
2735 if (use_zero_form)
2736 {
2737 comparison = gen_aarch64_cmle<mode>;
2738 break;
2739 }
2740 /* Fall through. */
2741 case UNLE:
2742 std::swap (operands[2], operands[3]);
2743 /* Fall through. */
2744 case UNGE:
2745 case GE:
2746 comparison = gen_aarch64_cmge<mode>;
2747 break;
2748 case NE:
2749 case EQ:
2750 comparison = gen_aarch64_cmeq<mode>;
2751 break;
2752 case UNEQ:
2753 case ORDERED:
2754 case UNORDERED:
2755 case LTGT:
2756 break;
2757 default:
2758 gcc_unreachable ();
2759 }
2760
2761 switch (code)
2762 {
2763 case UNGE:
2764 case UNGT:
2765 case UNLE:
2766 case UNLT:
2767 {
2768 /* All of the above must not raise any FP exceptions. Thus we first
2769 check each operand for NaNs and force any elements containing NaN to
2770 zero before using them in the compare.
2771 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2772 (cm<cc> (isnan (a) ? 0.0 : a,
2773 isnan (b) ? 0.0 : b))
2774 We use the following transformations for doing the comparisions:
2775 a UNGE b -> a GE b
2776 a UNGT b -> a GT b
2777 a UNLE b -> b GE a
2778 a UNLT b -> b GT a. */
2779
2780 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2781 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2782 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2783 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2784 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2785 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2786 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2787 lowpart_subreg (<V_INT_EQUIV>mode,
2788 operands[2],
2789 <MODE>mode)));
2790 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2791 lowpart_subreg (<V_INT_EQUIV>mode,
2792 operands[3],
2793 <MODE>mode)));
2794 gcc_assert (comparison != NULL);
2795 emit_insn (comparison (operands[0],
2796 lowpart_subreg (<MODE>mode,
2797 tmp0, <V_INT_EQUIV>mode),
2798 lowpart_subreg (<MODE>mode,
2799 tmp1, <V_INT_EQUIV>mode)));
2800 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2801 }
2802 break;
2803
2804 case LT:
2805 case LE:
2806 case GT:
2807 case GE:
2808 case EQ:
2809 case NE:
2810 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2811 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2812 a GE b -> a GE b
2813 a GT b -> a GT b
2814 a LE b -> b GE a
2815 a LT b -> b GT a
2816 a EQ b -> a EQ b
2817 a NE b -> ~(a EQ b) */
2818 gcc_assert (comparison != NULL);
2819 emit_insn (comparison (operands[0], operands[2], operands[3]));
2820 if (code == NE)
2821 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2822 break;
2823
2824 case LTGT:
2825 /* LTGT is not guranteed to not generate a FP exception. So let's
2826 go the faster way : ((a > b) || (b > a)). */
2827 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2828 operands[2], operands[3]));
2829 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2830 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2831 break;
2832
2833 case ORDERED:
2834 case UNORDERED:
2835 case UNEQ:
2836 /* cmeq (a, a) & cmeq (b, b). */
2837 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2838 operands[2], operands[2]));
2839 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2840 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2841
2842 if (code == UNORDERED)
2843 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2844 else if (code == UNEQ)
2845 {
2846 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2847 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2848 }
2849 break;
2850
2851 default:
2852 gcc_unreachable ();
2853 }
2854
2855 DONE;
2856 })
2857
2858 (define_expand "vec_cmpu<mode><mode>"
2859 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2860 (match_operator 1 "comparison_operator"
2861 [(match_operand:VSDQ_I_DI 2 "register_operand")
2862 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2863 "TARGET_SIMD"
2864 {
2865 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2866 operands[2], operands[3]));
2867 DONE;
2868 })
2869
2870 (define_expand "vcond<mode><mode>"
2871 [(set (match_operand:VALLDI 0 "register_operand")
2872 (if_then_else:VALLDI
2873 (match_operator 3 "comparison_operator"
2874 [(match_operand:VALLDI 4 "register_operand")
2875 (match_operand:VALLDI 5 "nonmemory_operand")])
2876 (match_operand:VALLDI 1 "nonmemory_operand")
2877 (match_operand:VALLDI 2 "nonmemory_operand")))]
2878 "TARGET_SIMD"
2879 {
2880 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2881 enum rtx_code code = GET_CODE (operands[3]);
2882
2883 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2884 it as well as switch operands 1/2 in order to avoid the additional
2885 NOT instruction. */
2886 if (code == NE)
2887 {
2888 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2889 operands[4], operands[5]);
2890 std::swap (operands[1], operands[2]);
2891 }
2892 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2893 operands[4], operands[5]));
2894 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2895 operands[2], mask));
2896
2897 DONE;
2898 })
2899
2900 (define_expand "vcond<v_cmp_mixed><mode>"
2901 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2902 (if_then_else:<V_cmp_mixed>
2903 (match_operator 3 "comparison_operator"
2904 [(match_operand:VDQF_COND 4 "register_operand")
2905 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2906 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2907 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2908 "TARGET_SIMD"
2909 {
2910 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2911 enum rtx_code code = GET_CODE (operands[3]);
2912
2913 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2914 it as well as switch operands 1/2 in order to avoid the additional
2915 NOT instruction. */
2916 if (code == NE)
2917 {
2918 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2919 operands[4], operands[5]);
2920 std::swap (operands[1], operands[2]);
2921 }
2922 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2923 operands[4], operands[5]));
2924 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2925 operands[0], operands[1],
2926 operands[2], mask));
2927
2928 DONE;
2929 })
2930
2931 (define_expand "vcondu<mode><mode>"
2932 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2933 (if_then_else:VSDQ_I_DI
2934 (match_operator 3 "comparison_operator"
2935 [(match_operand:VSDQ_I_DI 4 "register_operand")
2936 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2937 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2938 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2939 "TARGET_SIMD"
2940 {
2941 rtx mask = gen_reg_rtx (<MODE>mode);
2942 enum rtx_code code = GET_CODE (operands[3]);
2943
2944 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2945 it as well as switch operands 1/2 in order to avoid the additional
2946 NOT instruction. */
2947 if (code == NE)
2948 {
2949 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2950 operands[4], operands[5]);
2951 std::swap (operands[1], operands[2]);
2952 }
2953 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2954 operands[4], operands[5]));
2955 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2956 operands[2], mask));
2957 DONE;
2958 })
2959
2960 (define_expand "vcondu<mode><v_cmp_mixed>"
2961 [(set (match_operand:VDQF 0 "register_operand")
2962 (if_then_else:VDQF
2963 (match_operator 3 "comparison_operator"
2964 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2965 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2966 (match_operand:VDQF 1 "nonmemory_operand")
2967 (match_operand:VDQF 2 "nonmemory_operand")))]
2968 "TARGET_SIMD"
2969 {
2970 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2971 enum rtx_code code = GET_CODE (operands[3]);
2972
2973 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2974 it as well as switch operands 1/2 in order to avoid the additional
2975 NOT instruction. */
2976 if (code == NE)
2977 {
2978 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2979 operands[4], operands[5]);
2980 std::swap (operands[1], operands[2]);
2981 }
2982 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2983 mask, operands[3],
2984 operands[4], operands[5]));
2985 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2986 operands[2], mask));
2987 DONE;
2988 })
2989
2990 ;; Patterns for AArch64 SIMD Intrinsics.
2991
2992 ;; Lane extraction with sign extension to general purpose register.
2993 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2994 [(set (match_operand:GPI 0 "register_operand" "=r")
2995 (sign_extend:GPI
2996 (vec_select:<VEL>
2997 (match_operand:VDQQH 1 "register_operand" "w")
2998 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2999 "TARGET_SIMD"
3000 {
3001 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3002 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3003 }
3004 [(set_attr "type" "neon_to_gp<q>")]
3005 )
3006
3007 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
3008 [(set (match_operand:SI 0 "register_operand" "=r")
3009 (zero_extend:SI
3010 (vec_select:<VEL>
3011 (match_operand:VDQQH 1 "register_operand" "w")
3012 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3013 "TARGET_SIMD"
3014 {
3015 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3016 return "umov\\t%w0, %1.<Vetype>[%2]";
3017 }
3018 [(set_attr "type" "neon_to_gp<q>")]
3019 )
3020
3021 ;; Lane extraction of a value, neither sign nor zero extension
3022 ;; is guaranteed so upper bits should be considered undefined.
3023 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3024 (define_insn "aarch64_get_lane<mode>"
3025 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3026 (vec_select:<VEL>
3027 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3028 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3029 "TARGET_SIMD"
3030 {
3031 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3032 switch (which_alternative)
3033 {
3034 case 0:
3035 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3036 case 1:
3037 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3038 case 2:
3039 return "st1\\t{%1.<Vetype>}[%2], %0";
3040 default:
3041 gcc_unreachable ();
3042 }
3043 }
3044 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3045 )
3046
3047 (define_insn "load_pair_lanes<mode>"
3048 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3049 (vec_concat:<VDBL>
3050 (match_operand:VDC 1 "memory_operand" "Utq")
3051 (match_operand:VDC 2 "memory_operand" "m")))]
3052 "TARGET_SIMD && !STRICT_ALIGNMENT
3053 && rtx_equal_p (XEXP (operands[2], 0),
3054 plus_constant (Pmode,
3055 XEXP (operands[1], 0),
3056 GET_MODE_SIZE (<MODE>mode)))"
3057 "ldr\\t%q0, %1"
3058 [(set_attr "type" "neon_load1_1reg_q")]
3059 )
3060
3061 (define_insn "store_pair_lanes<mode>"
3062 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Uml, Uml")
3063 (vec_concat:<VDBL>
3064 (match_operand:VDC 1 "register_operand" "w, r")
3065 (match_operand:VDC 2 "register_operand" "w, r")))]
3066 "TARGET_SIMD"
3067 "@
3068 stp\\t%d1, %d2, %y0
3069 stp\\t%x1, %x2, %y0"
3070 [(set_attr "type" "neon_stp, store_16")]
3071 )
3072
3073 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3074 ;; dest vector.
3075
3076 (define_insn "*aarch64_combinez<mode>"
3077 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3078 (vec_concat:<VDBL>
3079 (match_operand:VDC 1 "general_operand" "w,?r,m")
3080 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3081 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3082 "@
3083 mov\\t%0.8b, %1.8b
3084 fmov\t%d0, %1
3085 ldr\\t%d0, %1"
3086 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3087 (set_attr "simd" "yes,*,yes")
3088 (set_attr "fp" "*,yes,*")]
3089 )
3090
3091 (define_insn "*aarch64_combinez_be<mode>"
3092 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3093 (vec_concat:<VDBL>
3094 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3095 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3096 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3097 "@
3098 mov\\t%0.8b, %1.8b
3099 fmov\t%d0, %1
3100 ldr\\t%d0, %1"
3101 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3102 (set_attr "simd" "yes,*,yes")
3103 (set_attr "fp" "*,yes,*")]
3104 )
3105
3106 (define_expand "aarch64_combine<mode>"
3107 [(match_operand:<VDBL> 0 "register_operand")
3108 (match_operand:VDC 1 "register_operand")
3109 (match_operand:VDC 2 "register_operand")]
3110 "TARGET_SIMD"
3111 {
3112 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3113
3114 DONE;
3115 }
3116 )
3117
3118 (define_expand "aarch64_simd_combine<mode>"
3119 [(match_operand:<VDBL> 0 "register_operand")
3120 (match_operand:VDC 1 "register_operand")
3121 (match_operand:VDC 2 "register_operand")]
3122 "TARGET_SIMD"
3123 {
3124 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3125 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3126 DONE;
3127 }
3128 [(set_attr "type" "multiple")]
3129 )
3130
3131 ;; <su><addsub>l<q>.
3132
3133 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3134 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3135 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3136 (match_operand:VQW 1 "register_operand" "w")
3137 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3138 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3139 (match_operand:VQW 2 "register_operand" "w")
3140 (match_dup 3)))))]
3141 "TARGET_SIMD"
3142 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3143 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3144 )
3145
3146 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3147 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3148 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3149 (match_operand:VQW 1 "register_operand" "w")
3150 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3151 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3152 (match_operand:VQW 2 "register_operand" "w")
3153 (match_dup 3)))))]
3154 "TARGET_SIMD"
3155 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3156 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3157 )
3158
3159
3160 (define_expand "aarch64_saddl2<mode>"
3161 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3162 (match_operand:VQW 1 "register_operand" "w")
3163 (match_operand:VQW 2 "register_operand" "w")]
3164 "TARGET_SIMD"
3165 {
3166 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3167 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3168 operands[2], p));
3169 DONE;
3170 })
3171
3172 (define_expand "aarch64_uaddl2<mode>"
3173 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3174 (match_operand:VQW 1 "register_operand" "w")
3175 (match_operand:VQW 2 "register_operand" "w")]
3176 "TARGET_SIMD"
3177 {
3178 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3179 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3180 operands[2], p));
3181 DONE;
3182 })
3183
3184 (define_expand "aarch64_ssubl2<mode>"
3185 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3186 (match_operand:VQW 1 "register_operand" "w")
3187 (match_operand:VQW 2 "register_operand" "w")]
3188 "TARGET_SIMD"
3189 {
3190 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3191 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3192 operands[2], p));
3193 DONE;
3194 })
3195
3196 (define_expand "aarch64_usubl2<mode>"
3197 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3198 (match_operand:VQW 1 "register_operand" "w")
3199 (match_operand:VQW 2 "register_operand" "w")]
3200 "TARGET_SIMD"
3201 {
3202 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3203 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3204 operands[2], p));
3205 DONE;
3206 })
3207
3208 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3209 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3210 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3211 (match_operand:VD_BHSI 1 "register_operand" "w"))
3212 (ANY_EXTEND:<VWIDE>
3213 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3214 "TARGET_SIMD"
3215 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3216 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3217 )
3218
3219 ;; <su><addsub>w<q>.
3220
3221 (define_expand "widen_ssum<mode>3"
3222 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3223 (plus:<VDBLW> (sign_extend:<VDBLW>
3224 (match_operand:VQW 1 "register_operand" ""))
3225 (match_operand:<VDBLW> 2 "register_operand" "")))]
3226 "TARGET_SIMD"
3227 {
3228 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3229 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3230
3231 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3232 operands[1], p));
3233 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3234 DONE;
3235 }
3236 )
3237
3238 (define_expand "widen_ssum<mode>3"
3239 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3240 (plus:<VWIDE> (sign_extend:<VWIDE>
3241 (match_operand:VD_BHSI 1 "register_operand" ""))
3242 (match_operand:<VWIDE> 2 "register_operand" "")))]
3243 "TARGET_SIMD"
3244 {
3245 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3246 DONE;
3247 })
3248
3249 (define_expand "widen_usum<mode>3"
3250 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3251 (plus:<VDBLW> (zero_extend:<VDBLW>
3252 (match_operand:VQW 1 "register_operand" ""))
3253 (match_operand:<VDBLW> 2 "register_operand" "")))]
3254 "TARGET_SIMD"
3255 {
3256 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3257 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3258
3259 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3260 operands[1], p));
3261 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3262 DONE;
3263 }
3264 )
3265
3266 (define_expand "widen_usum<mode>3"
3267 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3268 (plus:<VWIDE> (zero_extend:<VWIDE>
3269 (match_operand:VD_BHSI 1 "register_operand" ""))
3270 (match_operand:<VWIDE> 2 "register_operand" "")))]
3271 "TARGET_SIMD"
3272 {
3273 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3274 DONE;
3275 })
3276
3277 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3278 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3279 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3280 (ANY_EXTEND:<VWIDE>
3281 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3282 "TARGET_SIMD"
3283 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3284 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3285 )
3286
3287 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3288 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3289 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3290 (ANY_EXTEND:<VWIDE>
3291 (vec_select:<VHALF>
3292 (match_operand:VQW 2 "register_operand" "w")
3293 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3294 "TARGET_SIMD"
3295 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3296 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3297 )
3298
3299 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3300 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3301 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3302 (ANY_EXTEND:<VWIDE>
3303 (vec_select:<VHALF>
3304 (match_operand:VQW 2 "register_operand" "w")
3305 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3306 "TARGET_SIMD"
3307 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3308 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3309 )
3310
3311 (define_expand "aarch64_saddw2<mode>"
3312 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3313 (match_operand:<VWIDE> 1 "register_operand" "w")
3314 (match_operand:VQW 2 "register_operand" "w")]
3315 "TARGET_SIMD"
3316 {
3317 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3318 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3319 operands[2], p));
3320 DONE;
3321 })
3322
3323 (define_expand "aarch64_uaddw2<mode>"
3324 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3325 (match_operand:<VWIDE> 1 "register_operand" "w")
3326 (match_operand:VQW 2 "register_operand" "w")]
3327 "TARGET_SIMD"
3328 {
3329 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3330 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3331 operands[2], p));
3332 DONE;
3333 })
3334
3335
3336 (define_expand "aarch64_ssubw2<mode>"
3337 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3338 (match_operand:<VWIDE> 1 "register_operand" "w")
3339 (match_operand:VQW 2 "register_operand" "w")]
3340 "TARGET_SIMD"
3341 {
3342 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3343 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3344 operands[2], p));
3345 DONE;
3346 })
3347
3348 (define_expand "aarch64_usubw2<mode>"
3349 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3350 (match_operand:<VWIDE> 1 "register_operand" "w")
3351 (match_operand:VQW 2 "register_operand" "w")]
3352 "TARGET_SIMD"
3353 {
3354 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3355 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3356 operands[2], p));
3357 DONE;
3358 })
3359
3360 ;; <su><r>h<addsub>.
3361
3362 (define_insn "aarch64_<sur>h<addsub><mode>"
3363 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3364 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3365 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3366 HADDSUB))]
3367 "TARGET_SIMD"
3368 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3369 [(set_attr "type" "neon_<addsub>_halve<q>")]
3370 )
3371
3372 ;; <r><addsub>hn<q>.
3373
3374 (define_insn "aarch64_<sur><addsub>hn<mode>"
3375 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3376 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3377 (match_operand:VQN 2 "register_operand" "w")]
3378 ADDSUBHN))]
3379 "TARGET_SIMD"
3380 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3381 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3382 )
3383
3384 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3385 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3386 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3387 (match_operand:VQN 2 "register_operand" "w")
3388 (match_operand:VQN 3 "register_operand" "w")]
3389 ADDSUBHN2))]
3390 "TARGET_SIMD"
3391 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3392 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3393 )
3394
3395 ;; pmul.
3396
3397 (define_insn "aarch64_pmul<mode>"
3398 [(set (match_operand:VB 0 "register_operand" "=w")
3399 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3400 (match_operand:VB 2 "register_operand" "w")]
3401 UNSPEC_PMUL))]
3402 "TARGET_SIMD"
3403 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3404 [(set_attr "type" "neon_mul_<Vetype><q>")]
3405 )
3406
3407 ;; fmulx.
3408
3409 (define_insn "aarch64_fmulx<mode>"
3410 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3411 (unspec:VHSDF_HSDF
3412 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3413 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3414 UNSPEC_FMULX))]
3415 "TARGET_SIMD"
3416 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3417 [(set_attr "type" "neon_fp_mul_<stype>")]
3418 )
3419
3420 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3421
3422 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3423 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3424 (unspec:VDQSF
3425 [(match_operand:VDQSF 1 "register_operand" "w")
3426 (vec_duplicate:VDQSF
3427 (vec_select:<VEL>
3428 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3429 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3430 UNSPEC_FMULX))]
3431 "TARGET_SIMD"
3432 {
3433 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3434 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3435 }
3436 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3437 )
3438
3439 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3440
3441 (define_insn "*aarch64_mulx_elt<mode>"
3442 [(set (match_operand:VDQF 0 "register_operand" "=w")
3443 (unspec:VDQF
3444 [(match_operand:VDQF 1 "register_operand" "w")
3445 (vec_duplicate:VDQF
3446 (vec_select:<VEL>
3447 (match_operand:VDQF 2 "register_operand" "w")
3448 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3449 UNSPEC_FMULX))]
3450 "TARGET_SIMD"
3451 {
3452 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3453 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3454 }
3455 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3456 )
3457
3458 ;; vmulxq_lane
3459
3460 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3461 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3462 (unspec:VHSDF
3463 [(match_operand:VHSDF 1 "register_operand" "w")
3464 (vec_duplicate:VHSDF
3465 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3466 UNSPEC_FMULX))]
3467 "TARGET_SIMD"
3468 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3469 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3470 )
3471
3472 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3473 ;; vmulxd_lane_f64 == vmulx_lane_f64
3474 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3475
3476 (define_insn "*aarch64_vgetfmulx<mode>"
3477 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3478 (unspec:<VEL>
3479 [(match_operand:<VEL> 1 "register_operand" "w")
3480 (vec_select:<VEL>
3481 (match_operand:VDQF 2 "register_operand" "w")
3482 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3483 UNSPEC_FMULX))]
3484 "TARGET_SIMD"
3485 {
3486 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3487 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3488 }
3489 [(set_attr "type" "fmul<Vetype>")]
3490 )
3491 ;; <su>q<addsub>
3492
3493 (define_insn "aarch64_<su_optab><optab><mode>"
3494 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3495 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3496 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3497 "TARGET_SIMD"
3498 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3499 [(set_attr "type" "neon_<optab><q>")]
3500 )
3501
3502 ;; suqadd and usqadd
3503
3504 (define_insn "aarch64_<sur>qadd<mode>"
3505 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3506 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3507 (match_operand:VSDQ_I 2 "register_operand" "w")]
3508 USSUQADD))]
3509 "TARGET_SIMD"
3510 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3511 [(set_attr "type" "neon_qadd<q>")]
3512 )
3513
3514 ;; sqmovun
3515
3516 (define_insn "aarch64_sqmovun<mode>"
3517 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3518 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3519 UNSPEC_SQXTUN))]
3520 "TARGET_SIMD"
3521 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3522 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3523 )
3524
3525 ;; sqmovn and uqmovn
3526
3527 (define_insn "aarch64_<sur>qmovn<mode>"
3528 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3529 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3530 SUQMOVN))]
3531 "TARGET_SIMD"
3532 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3533 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3534 )
3535
3536 ;; <su>q<absneg>
3537
3538 (define_insn "aarch64_s<optab><mode>"
3539 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3540 (UNQOPS:VSDQ_I
3541 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3542 "TARGET_SIMD"
3543 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3544 [(set_attr "type" "neon_<optab><q>")]
3545 )
3546
3547 ;; sq<r>dmulh.
3548
3549 (define_insn "aarch64_sq<r>dmulh<mode>"
3550 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3551 (unspec:VSDQ_HSI
3552 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3553 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3554 VQDMULH))]
3555 "TARGET_SIMD"
3556 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3557 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3558 )
3559
3560 ;; sq<r>dmulh_lane
3561
3562 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3563 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3564 (unspec:VDQHS
3565 [(match_operand:VDQHS 1 "register_operand" "w")
3566 (vec_select:<VEL>
3567 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3568 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3569 VQDMULH))]
3570 "TARGET_SIMD"
3571 "*
3572 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3573 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3574 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3575 )
3576
3577 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3578 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3579 (unspec:VDQHS
3580 [(match_operand:VDQHS 1 "register_operand" "w")
3581 (vec_select:<VEL>
3582 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3583 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3584 VQDMULH))]
3585 "TARGET_SIMD"
3586 "*
3587 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3588 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3589 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3590 )
3591
3592 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3593 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3594 (unspec:SD_HSI
3595 [(match_operand:SD_HSI 1 "register_operand" "w")
3596 (vec_select:<VEL>
3597 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3598 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3599 VQDMULH))]
3600 "TARGET_SIMD"
3601 "*
3602 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3603 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3604 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3605 )
3606
3607 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3608 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3609 (unspec:SD_HSI
3610 [(match_operand:SD_HSI 1 "register_operand" "w")
3611 (vec_select:<VEL>
3612 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3613 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3614 VQDMULH))]
3615 "TARGET_SIMD"
3616 "*
3617 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3618 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3619 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3620 )
3621
3622 ;; sqrdml[as]h.
3623
3624 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3625 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3626 (unspec:VSDQ_HSI
3627 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3628 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3629 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3630 SQRDMLH_AS))]
3631 "TARGET_SIMD_RDMA"
3632 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3633 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3634 )
3635
3636 ;; sqrdml[as]h_lane.
3637
3638 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3639 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3640 (unspec:VDQHS
3641 [(match_operand:VDQHS 1 "register_operand" "0")
3642 (match_operand:VDQHS 2 "register_operand" "w")
3643 (vec_select:<VEL>
3644 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3645 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3646 SQRDMLH_AS))]
3647 "TARGET_SIMD_RDMA"
3648 {
3649 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3650 return
3651 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3652 }
3653 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3654 )
3655
3656 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3657 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3658 (unspec:SD_HSI
3659 [(match_operand:SD_HSI 1 "register_operand" "0")
3660 (match_operand:SD_HSI 2 "register_operand" "w")
3661 (vec_select:<VEL>
3662 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3663 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3664 SQRDMLH_AS))]
3665 "TARGET_SIMD_RDMA"
3666 {
3667 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3668 return
3669 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3670 }
3671 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3672 )
3673
3674 ;; sqrdml[as]h_laneq.
3675
3676 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3677 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3678 (unspec:VDQHS
3679 [(match_operand:VDQHS 1 "register_operand" "0")
3680 (match_operand:VDQHS 2 "register_operand" "w")
3681 (vec_select:<VEL>
3682 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3683 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3684 SQRDMLH_AS))]
3685 "TARGET_SIMD_RDMA"
3686 {
3687 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3688 return
3689 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3690 }
3691 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3692 )
3693
3694 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3695 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3696 (unspec:SD_HSI
3697 [(match_operand:SD_HSI 1 "register_operand" "0")
3698 (match_operand:SD_HSI 2 "register_operand" "w")
3699 (vec_select:<VEL>
3700 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3701 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3702 SQRDMLH_AS))]
3703 "TARGET_SIMD_RDMA"
3704 {
3705 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3706 return
3707 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3708 }
3709 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3710 )
3711
3712 ;; vqdml[sa]l
3713
3714 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3715 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3716 (SBINQOPS:<VWIDE>
3717 (match_operand:<VWIDE> 1 "register_operand" "0")
3718 (ss_ashift:<VWIDE>
3719 (mult:<VWIDE>
3720 (sign_extend:<VWIDE>
3721 (match_operand:VSD_HSI 2 "register_operand" "w"))
3722 (sign_extend:<VWIDE>
3723 (match_operand:VSD_HSI 3 "register_operand" "w")))
3724 (const_int 1))))]
3725 "TARGET_SIMD"
3726 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3727 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3728 )
3729
3730 ;; vqdml[sa]l_lane
3731
3732 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3733 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3734 (SBINQOPS:<VWIDE>
3735 (match_operand:<VWIDE> 1 "register_operand" "0")
3736 (ss_ashift:<VWIDE>
3737 (mult:<VWIDE>
3738 (sign_extend:<VWIDE>
3739 (match_operand:VD_HSI 2 "register_operand" "w"))
3740 (sign_extend:<VWIDE>
3741 (vec_duplicate:VD_HSI
3742 (vec_select:<VEL>
3743 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3744 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3745 ))
3746 (const_int 1))))]
3747 "TARGET_SIMD"
3748 {
3749 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3750 return
3751 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3752 }
3753 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3754 )
3755
3756 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3757 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3758 (SBINQOPS:<VWIDE>
3759 (match_operand:<VWIDE> 1 "register_operand" "0")
3760 (ss_ashift:<VWIDE>
3761 (mult:<VWIDE>
3762 (sign_extend:<VWIDE>
3763 (match_operand:VD_HSI 2 "register_operand" "w"))
3764 (sign_extend:<VWIDE>
3765 (vec_duplicate:VD_HSI
3766 (vec_select:<VEL>
3767 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3768 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3769 ))
3770 (const_int 1))))]
3771 "TARGET_SIMD"
3772 {
3773 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3774 return
3775 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3776 }
3777 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3778 )
3779
3780 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3781 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3782 (SBINQOPS:<VWIDE>
3783 (match_operand:<VWIDE> 1 "register_operand" "0")
3784 (ss_ashift:<VWIDE>
3785 (mult:<VWIDE>
3786 (sign_extend:<VWIDE>
3787 (match_operand:SD_HSI 2 "register_operand" "w"))
3788 (sign_extend:<VWIDE>
3789 (vec_select:<VEL>
3790 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3791 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3792 )
3793 (const_int 1))))]
3794 "TARGET_SIMD"
3795 {
3796 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3797 return
3798 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3799 }
3800 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3801 )
3802
3803 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3804 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3805 (SBINQOPS:<VWIDE>
3806 (match_operand:<VWIDE> 1 "register_operand" "0")
3807 (ss_ashift:<VWIDE>
3808 (mult:<VWIDE>
3809 (sign_extend:<VWIDE>
3810 (match_operand:SD_HSI 2 "register_operand" "w"))
3811 (sign_extend:<VWIDE>
3812 (vec_select:<VEL>
3813 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3814 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3815 )
3816 (const_int 1))))]
3817 "TARGET_SIMD"
3818 {
3819 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3820 return
3821 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3822 }
3823 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3824 )
3825
3826 ;; vqdml[sa]l_n
3827
3828 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3829 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3830 (SBINQOPS:<VWIDE>
3831 (match_operand:<VWIDE> 1 "register_operand" "0")
3832 (ss_ashift:<VWIDE>
3833 (mult:<VWIDE>
3834 (sign_extend:<VWIDE>
3835 (match_operand:VD_HSI 2 "register_operand" "w"))
3836 (sign_extend:<VWIDE>
3837 (vec_duplicate:VD_HSI
3838 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3839 (const_int 1))))]
3840 "TARGET_SIMD"
3841 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3842 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3843 )
3844
3845 ;; sqdml[as]l2
3846
3847 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3848 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3849 (SBINQOPS:<VWIDE>
3850 (match_operand:<VWIDE> 1 "register_operand" "0")
3851 (ss_ashift:<VWIDE>
3852 (mult:<VWIDE>
3853 (sign_extend:<VWIDE>
3854 (vec_select:<VHALF>
3855 (match_operand:VQ_HSI 2 "register_operand" "w")
3856 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3857 (sign_extend:<VWIDE>
3858 (vec_select:<VHALF>
3859 (match_operand:VQ_HSI 3 "register_operand" "w")
3860 (match_dup 4))))
3861 (const_int 1))))]
3862 "TARGET_SIMD"
3863 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3864 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3865 )
3866
3867 (define_expand "aarch64_sqdmlal2<mode>"
3868 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3869 (match_operand:<VWIDE> 1 "register_operand" "w")
3870 (match_operand:VQ_HSI 2 "register_operand" "w")
3871 (match_operand:VQ_HSI 3 "register_operand" "w")]
3872 "TARGET_SIMD"
3873 {
3874 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3875 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3876 operands[2], operands[3], p));
3877 DONE;
3878 })
3879
3880 (define_expand "aarch64_sqdmlsl2<mode>"
3881 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3882 (match_operand:<VWIDE> 1 "register_operand" "w")
3883 (match_operand:VQ_HSI 2 "register_operand" "w")
3884 (match_operand:VQ_HSI 3 "register_operand" "w")]
3885 "TARGET_SIMD"
3886 {
3887 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3888 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3889 operands[2], operands[3], p));
3890 DONE;
3891 })
3892
3893 ;; vqdml[sa]l2_lane
3894
3895 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3896 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3897 (SBINQOPS:<VWIDE>
3898 (match_operand:<VWIDE> 1 "register_operand" "0")
3899 (ss_ashift:<VWIDE>
3900 (mult:<VWIDE>
3901 (sign_extend:<VWIDE>
3902 (vec_select:<VHALF>
3903 (match_operand:VQ_HSI 2 "register_operand" "w")
3904 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3905 (sign_extend:<VWIDE>
3906 (vec_duplicate:<VHALF>
3907 (vec_select:<VEL>
3908 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3909 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3910 ))))
3911 (const_int 1))))]
3912 "TARGET_SIMD"
3913 {
3914 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3915 return
3916 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3917 }
3918 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3919 )
3920
3921 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3922 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3923 (SBINQOPS:<VWIDE>
3924 (match_operand:<VWIDE> 1 "register_operand" "0")
3925 (ss_ashift:<VWIDE>
3926 (mult:<VWIDE>
3927 (sign_extend:<VWIDE>
3928 (vec_select:<VHALF>
3929 (match_operand:VQ_HSI 2 "register_operand" "w")
3930 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3931 (sign_extend:<VWIDE>
3932 (vec_duplicate:<VHALF>
3933 (vec_select:<VEL>
3934 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3935 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3936 ))))
3937 (const_int 1))))]
3938 "TARGET_SIMD"
3939 {
3940 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3941 return
3942 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3943 }
3944 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3945 )
3946
3947 (define_expand "aarch64_sqdmlal2_lane<mode>"
3948 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3949 (match_operand:<VWIDE> 1 "register_operand" "w")
3950 (match_operand:VQ_HSI 2 "register_operand" "w")
3951 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3952 (match_operand:SI 4 "immediate_operand" "i")]
3953 "TARGET_SIMD"
3954 {
3955 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3956 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3957 operands[2], operands[3],
3958 operands[4], p));
3959 DONE;
3960 })
3961
3962 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3963 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3964 (match_operand:<VWIDE> 1 "register_operand" "w")
3965 (match_operand:VQ_HSI 2 "register_operand" "w")
3966 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3967 (match_operand:SI 4 "immediate_operand" "i")]
3968 "TARGET_SIMD"
3969 {
3970 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3971 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3972 operands[2], operands[3],
3973 operands[4], p));
3974 DONE;
3975 })
3976
3977 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3978 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3979 (match_operand:<VWIDE> 1 "register_operand" "w")
3980 (match_operand:VQ_HSI 2 "register_operand" "w")
3981 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3982 (match_operand:SI 4 "immediate_operand" "i")]
3983 "TARGET_SIMD"
3984 {
3985 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3986 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3987 operands[2], operands[3],
3988 operands[4], p));
3989 DONE;
3990 })
3991
3992 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3993 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3994 (match_operand:<VWIDE> 1 "register_operand" "w")
3995 (match_operand:VQ_HSI 2 "register_operand" "w")
3996 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3997 (match_operand:SI 4 "immediate_operand" "i")]
3998 "TARGET_SIMD"
3999 {
4000 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4001 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4002 operands[2], operands[3],
4003 operands[4], p));
4004 DONE;
4005 })
4006
4007 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4008 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4009 (SBINQOPS:<VWIDE>
4010 (match_operand:<VWIDE> 1 "register_operand" "0")
4011 (ss_ashift:<VWIDE>
4012 (mult:<VWIDE>
4013 (sign_extend:<VWIDE>
4014 (vec_select:<VHALF>
4015 (match_operand:VQ_HSI 2 "register_operand" "w")
4016 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4017 (sign_extend:<VWIDE>
4018 (vec_duplicate:<VHALF>
4019 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4020 (const_int 1))))]
4021 "TARGET_SIMD"
4022 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4023 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4024 )
4025
4026 (define_expand "aarch64_sqdmlal2_n<mode>"
4027 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4028 (match_operand:<VWIDE> 1 "register_operand" "w")
4029 (match_operand:VQ_HSI 2 "register_operand" "w")
4030 (match_operand:<VEL> 3 "register_operand" "w")]
4031 "TARGET_SIMD"
4032 {
4033 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4034 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4035 operands[2], operands[3],
4036 p));
4037 DONE;
4038 })
4039
4040 (define_expand "aarch64_sqdmlsl2_n<mode>"
4041 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4042 (match_operand:<VWIDE> 1 "register_operand" "w")
4043 (match_operand:VQ_HSI 2 "register_operand" "w")
4044 (match_operand:<VEL> 3 "register_operand" "w")]
4045 "TARGET_SIMD"
4046 {
4047 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4048 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4049 operands[2], operands[3],
4050 p));
4051 DONE;
4052 })
4053
4054 ;; vqdmull
4055
4056 (define_insn "aarch64_sqdmull<mode>"
4057 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4058 (ss_ashift:<VWIDE>
4059 (mult:<VWIDE>
4060 (sign_extend:<VWIDE>
4061 (match_operand:VSD_HSI 1 "register_operand" "w"))
4062 (sign_extend:<VWIDE>
4063 (match_operand:VSD_HSI 2 "register_operand" "w")))
4064 (const_int 1)))]
4065 "TARGET_SIMD"
4066 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4067 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4068 )
4069
4070 ;; vqdmull_lane
4071
4072 (define_insn "aarch64_sqdmull_lane<mode>"
4073 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4074 (ss_ashift:<VWIDE>
4075 (mult:<VWIDE>
4076 (sign_extend:<VWIDE>
4077 (match_operand:VD_HSI 1 "register_operand" "w"))
4078 (sign_extend:<VWIDE>
4079 (vec_duplicate:VD_HSI
4080 (vec_select:<VEL>
4081 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4082 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4083 ))
4084 (const_int 1)))]
4085 "TARGET_SIMD"
4086 {
4087 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4088 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4089 }
4090 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4091 )
4092
4093 (define_insn "aarch64_sqdmull_laneq<mode>"
4094 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4095 (ss_ashift:<VWIDE>
4096 (mult:<VWIDE>
4097 (sign_extend:<VWIDE>
4098 (match_operand:VD_HSI 1 "register_operand" "w"))
4099 (sign_extend:<VWIDE>
4100 (vec_duplicate:VD_HSI
4101 (vec_select:<VEL>
4102 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4103 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4104 ))
4105 (const_int 1)))]
4106 "TARGET_SIMD"
4107 {
4108 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4109 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4110 }
4111 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4112 )
4113
4114 (define_insn "aarch64_sqdmull_lane<mode>"
4115 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4116 (ss_ashift:<VWIDE>
4117 (mult:<VWIDE>
4118 (sign_extend:<VWIDE>
4119 (match_operand:SD_HSI 1 "register_operand" "w"))
4120 (sign_extend:<VWIDE>
4121 (vec_select:<VEL>
4122 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4123 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4124 ))
4125 (const_int 1)))]
4126 "TARGET_SIMD"
4127 {
4128 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4129 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4130 }
4131 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4132 )
4133
4134 (define_insn "aarch64_sqdmull_laneq<mode>"
4135 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4136 (ss_ashift:<VWIDE>
4137 (mult:<VWIDE>
4138 (sign_extend:<VWIDE>
4139 (match_operand:SD_HSI 1 "register_operand" "w"))
4140 (sign_extend:<VWIDE>
4141 (vec_select:<VEL>
4142 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4143 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4144 ))
4145 (const_int 1)))]
4146 "TARGET_SIMD"
4147 {
4148 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4149 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4150 }
4151 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4152 )
4153
4154 ;; vqdmull_n
4155
4156 (define_insn "aarch64_sqdmull_n<mode>"
4157 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4158 (ss_ashift:<VWIDE>
4159 (mult:<VWIDE>
4160 (sign_extend:<VWIDE>
4161 (match_operand:VD_HSI 1 "register_operand" "w"))
4162 (sign_extend:<VWIDE>
4163 (vec_duplicate:VD_HSI
4164 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4165 )
4166 (const_int 1)))]
4167 "TARGET_SIMD"
4168 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4169 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4170 )
4171
4172 ;; vqdmull2
4173
4174
4175
4176 (define_insn "aarch64_sqdmull2<mode>_internal"
4177 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4178 (ss_ashift:<VWIDE>
4179 (mult:<VWIDE>
4180 (sign_extend:<VWIDE>
4181 (vec_select:<VHALF>
4182 (match_operand:VQ_HSI 1 "register_operand" "w")
4183 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4184 (sign_extend:<VWIDE>
4185 (vec_select:<VHALF>
4186 (match_operand:VQ_HSI 2 "register_operand" "w")
4187 (match_dup 3)))
4188 )
4189 (const_int 1)))]
4190 "TARGET_SIMD"
4191 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4192 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4193 )
4194
4195 (define_expand "aarch64_sqdmull2<mode>"
4196 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4197 (match_operand:VQ_HSI 1 "register_operand" "w")
4198 (match_operand:VQ_HSI 2 "register_operand" "w")]
4199 "TARGET_SIMD"
4200 {
4201 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4202 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4203 operands[2], p));
4204 DONE;
4205 })
4206
4207 ;; vqdmull2_lane
4208
4209 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4210 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4211 (ss_ashift:<VWIDE>
4212 (mult:<VWIDE>
4213 (sign_extend:<VWIDE>
4214 (vec_select:<VHALF>
4215 (match_operand:VQ_HSI 1 "register_operand" "w")
4216 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4217 (sign_extend:<VWIDE>
4218 (vec_duplicate:<VHALF>
4219 (vec_select:<VEL>
4220 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4221 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4222 ))
4223 (const_int 1)))]
4224 "TARGET_SIMD"
4225 {
4226 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4227 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4228 }
4229 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4230 )
4231
4232 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4234 (ss_ashift:<VWIDE>
4235 (mult:<VWIDE>
4236 (sign_extend:<VWIDE>
4237 (vec_select:<VHALF>
4238 (match_operand:VQ_HSI 1 "register_operand" "w")
4239 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4240 (sign_extend:<VWIDE>
4241 (vec_duplicate:<VHALF>
4242 (vec_select:<VEL>
4243 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4244 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4245 ))
4246 (const_int 1)))]
4247 "TARGET_SIMD"
4248 {
4249 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4250 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4251 }
4252 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4253 )
4254
4255 (define_expand "aarch64_sqdmull2_lane<mode>"
4256 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4257 (match_operand:VQ_HSI 1 "register_operand" "w")
4258 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4259 (match_operand:SI 3 "immediate_operand" "i")]
4260 "TARGET_SIMD"
4261 {
4262 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4263 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4264 operands[2], operands[3],
4265 p));
4266 DONE;
4267 })
4268
4269 (define_expand "aarch64_sqdmull2_laneq<mode>"
4270 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4271 (match_operand:VQ_HSI 1 "register_operand" "w")
4272 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4273 (match_operand:SI 3 "immediate_operand" "i")]
4274 "TARGET_SIMD"
4275 {
4276 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4277 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4278 operands[2], operands[3],
4279 p));
4280 DONE;
4281 })
4282
4283 ;; vqdmull2_n
4284
4285 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4286 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4287 (ss_ashift:<VWIDE>
4288 (mult:<VWIDE>
4289 (sign_extend:<VWIDE>
4290 (vec_select:<VHALF>
4291 (match_operand:VQ_HSI 1 "register_operand" "w")
4292 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4293 (sign_extend:<VWIDE>
4294 (vec_duplicate:<VHALF>
4295 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4296 )
4297 (const_int 1)))]
4298 "TARGET_SIMD"
4299 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4300 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4301 )
4302
4303 (define_expand "aarch64_sqdmull2_n<mode>"
4304 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4305 (match_operand:VQ_HSI 1 "register_operand" "w")
4306 (match_operand:<VEL> 2 "register_operand" "w")]
4307 "TARGET_SIMD"
4308 {
4309 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4310 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4311 operands[2], p));
4312 DONE;
4313 })
4314
4315 ;; vshl
4316
4317 (define_insn "aarch64_<sur>shl<mode>"
4318 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4319 (unspec:VSDQ_I_DI
4320 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4321 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4322 VSHL))]
4323 "TARGET_SIMD"
4324 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4325 [(set_attr "type" "neon_shift_reg<q>")]
4326 )
4327
4328
4329 ;; vqshl
4330
4331 (define_insn "aarch64_<sur>q<r>shl<mode>"
4332 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4333 (unspec:VSDQ_I
4334 [(match_operand:VSDQ_I 1 "register_operand" "w")
4335 (match_operand:VSDQ_I 2 "register_operand" "w")]
4336 VQSHL))]
4337 "TARGET_SIMD"
4338 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4339 [(set_attr "type" "neon_sat_shift_reg<q>")]
4340 )
4341
4342 ;; vshll_n
4343
4344 (define_insn "aarch64_<sur>shll_n<mode>"
4345 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4346 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4347 (match_operand:SI 2
4348 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4349 VSHLL))]
4350 "TARGET_SIMD"
4351 {
4352 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4353 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4354 else
4355 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4356 }
4357 [(set_attr "type" "neon_shift_imm_long")]
4358 )
4359
4360 ;; vshll_high_n
4361
4362 (define_insn "aarch64_<sur>shll2_n<mode>"
4363 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4364 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4365 (match_operand:SI 2 "immediate_operand" "i")]
4366 VSHLL))]
4367 "TARGET_SIMD"
4368 {
4369 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4370 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4371 else
4372 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4373 }
4374 [(set_attr "type" "neon_shift_imm_long")]
4375 )
4376
4377 ;; vrshr_n
4378
4379 (define_insn "aarch64_<sur>shr_n<mode>"
4380 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4381 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4382 (match_operand:SI 2
4383 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4384 VRSHR_N))]
4385 "TARGET_SIMD"
4386 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4387 [(set_attr "type" "neon_sat_shift_imm<q>")]
4388 )
4389
4390 ;; v(r)sra_n
4391
4392 (define_insn "aarch64_<sur>sra_n<mode>"
4393 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4394 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4395 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4396 (match_operand:SI 3
4397 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4398 VSRA))]
4399 "TARGET_SIMD"
4400 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4401 [(set_attr "type" "neon_shift_acc<q>")]
4402 )
4403
4404 ;; vs<lr>i_n
4405
4406 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4407 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4408 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4409 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4410 (match_operand:SI 3
4411 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4412 VSLRI))]
4413 "TARGET_SIMD"
4414 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4415 [(set_attr "type" "neon_shift_imm<q>")]
4416 )
4417
4418 ;; vqshl(u)
4419
4420 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4421 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4422 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4423 (match_operand:SI 2
4424 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4425 VQSHL_N))]
4426 "TARGET_SIMD"
4427 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4428 [(set_attr "type" "neon_sat_shift_imm<q>")]
4429 )
4430
4431
4432 ;; vq(r)shr(u)n_n
4433
4434 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4435 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4436 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4437 (match_operand:SI 2
4438 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4439 VQSHRN_N))]
4440 "TARGET_SIMD"
4441 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4442 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4443 )
4444
4445
4446 ;; cm(eq|ge|gt|lt|le)
4447 ;; Note, we have constraints for Dz and Z as different expanders
4448 ;; have different ideas of what should be passed to this pattern.
4449
4450 (define_insn "aarch64_cm<optab><mode>"
4451 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4452 (neg:<V_INT_EQUIV>
4453 (COMPARISONS:<V_INT_EQUIV>
4454 (match_operand:VDQ_I 1 "register_operand" "w,w")
4455 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4456 )))]
4457 "TARGET_SIMD"
4458 "@
4459 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4460 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4461 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4462 )
4463
4464 (define_insn_and_split "aarch64_cm<optab>di"
4465 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4466 (neg:DI
4467 (COMPARISONS:DI
4468 (match_operand:DI 1 "register_operand" "w,w,r")
4469 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4470 )))
4471 (clobber (reg:CC CC_REGNUM))]
4472 "TARGET_SIMD"
4473 "#"
4474 "&& reload_completed"
4475 [(set (match_operand:DI 0 "register_operand")
4476 (neg:DI
4477 (COMPARISONS:DI
4478 (match_operand:DI 1 "register_operand")
4479 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4480 )))]
4481 {
4482 /* If we are in the general purpose register file,
4483 we split to a sequence of comparison and store. */
4484 if (GP_REGNUM_P (REGNO (operands[0]))
4485 && GP_REGNUM_P (REGNO (operands[1])))
4486 {
4487 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4488 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4489 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4490 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4491 DONE;
4492 }
4493 /* Otherwise, we expand to a similar pattern which does not
4494 clobber CC_REGNUM. */
4495 }
4496 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4497 )
4498
4499 (define_insn "*aarch64_cm<optab>di"
4500 [(set (match_operand:DI 0 "register_operand" "=w,w")
4501 (neg:DI
4502 (COMPARISONS:DI
4503 (match_operand:DI 1 "register_operand" "w,w")
4504 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4505 )))]
4506 "TARGET_SIMD && reload_completed"
4507 "@
4508 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4509 cm<optab>\t%d0, %d1, #0"
4510 [(set_attr "type" "neon_compare, neon_compare_zero")]
4511 )
4512
4513 ;; cm(hs|hi)
4514
4515 (define_insn "aarch64_cm<optab><mode>"
4516 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4517 (neg:<V_INT_EQUIV>
4518 (UCOMPARISONS:<V_INT_EQUIV>
4519 (match_operand:VDQ_I 1 "register_operand" "w")
4520 (match_operand:VDQ_I 2 "register_operand" "w")
4521 )))]
4522 "TARGET_SIMD"
4523 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4524 [(set_attr "type" "neon_compare<q>")]
4525 )
4526
4527 (define_insn_and_split "aarch64_cm<optab>di"
4528 [(set (match_operand:DI 0 "register_operand" "=w,r")
4529 (neg:DI
4530 (UCOMPARISONS:DI
4531 (match_operand:DI 1 "register_operand" "w,r")
4532 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4533 )))
4534 (clobber (reg:CC CC_REGNUM))]
4535 "TARGET_SIMD"
4536 "#"
4537 "&& reload_completed"
4538 [(set (match_operand:DI 0 "register_operand")
4539 (neg:DI
4540 (UCOMPARISONS:DI
4541 (match_operand:DI 1 "register_operand")
4542 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4543 )))]
4544 {
4545 /* If we are in the general purpose register file,
4546 we split to a sequence of comparison and store. */
4547 if (GP_REGNUM_P (REGNO (operands[0]))
4548 && GP_REGNUM_P (REGNO (operands[1])))
4549 {
4550 machine_mode mode = CCmode;
4551 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4552 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4553 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4554 DONE;
4555 }
4556 /* Otherwise, we expand to a similar pattern which does not
4557 clobber CC_REGNUM. */
4558 }
4559 [(set_attr "type" "neon_compare,multiple")]
4560 )
4561
4562 (define_insn "*aarch64_cm<optab>di"
4563 [(set (match_operand:DI 0 "register_operand" "=w")
4564 (neg:DI
4565 (UCOMPARISONS:DI
4566 (match_operand:DI 1 "register_operand" "w")
4567 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4568 )))]
4569 "TARGET_SIMD && reload_completed"
4570 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4571 [(set_attr "type" "neon_compare")]
4572 )
4573
4574 ;; cmtst
4575
4576 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4577 ;; we don't have any insns using ne, and aarch64_vcond outputs
4578 ;; not (neg (eq (and x y) 0))
4579 ;; which is rewritten by simplify_rtx as
4580 ;; plus (eq (and x y) 0) -1.
4581
4582 (define_insn "aarch64_cmtst<mode>"
4583 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4584 (plus:<V_INT_EQUIV>
4585 (eq:<V_INT_EQUIV>
4586 (and:VDQ_I
4587 (match_operand:VDQ_I 1 "register_operand" "w")
4588 (match_operand:VDQ_I 2 "register_operand" "w"))
4589 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4590 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4591 ]
4592 "TARGET_SIMD"
4593 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4594 [(set_attr "type" "neon_tst<q>")]
4595 )
4596
4597 (define_insn_and_split "aarch64_cmtstdi"
4598 [(set (match_operand:DI 0 "register_operand" "=w,r")
4599 (neg:DI
4600 (ne:DI
4601 (and:DI
4602 (match_operand:DI 1 "register_operand" "w,r")
4603 (match_operand:DI 2 "register_operand" "w,r"))
4604 (const_int 0))))
4605 (clobber (reg:CC CC_REGNUM))]
4606 "TARGET_SIMD"
4607 "#"
4608 "&& reload_completed"
4609 [(set (match_operand:DI 0 "register_operand")
4610 (neg:DI
4611 (ne:DI
4612 (and:DI
4613 (match_operand:DI 1 "register_operand")
4614 (match_operand:DI 2 "register_operand"))
4615 (const_int 0))))]
4616 {
4617 /* If we are in the general purpose register file,
4618 we split to a sequence of comparison and store. */
4619 if (GP_REGNUM_P (REGNO (operands[0]))
4620 && GP_REGNUM_P (REGNO (operands[1])))
4621 {
4622 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4623 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4624 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4625 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4626 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4627 DONE;
4628 }
4629 /* Otherwise, we expand to a similar pattern which does not
4630 clobber CC_REGNUM. */
4631 }
4632 [(set_attr "type" "neon_tst,multiple")]
4633 )
4634
4635 (define_insn "*aarch64_cmtstdi"
4636 [(set (match_operand:DI 0 "register_operand" "=w")
4637 (neg:DI
4638 (ne:DI
4639 (and:DI
4640 (match_operand:DI 1 "register_operand" "w")
4641 (match_operand:DI 2 "register_operand" "w"))
4642 (const_int 0))))]
4643 "TARGET_SIMD"
4644 "cmtst\t%d0, %d1, %d2"
4645 [(set_attr "type" "neon_tst")]
4646 )
4647
4648 ;; fcm(eq|ge|gt|le|lt)
4649
4650 (define_insn "aarch64_cm<optab><mode>"
4651 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4652 (neg:<V_INT_EQUIV>
4653 (COMPARISONS:<V_INT_EQUIV>
4654 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4655 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4656 )))]
4657 "TARGET_SIMD"
4658 "@
4659 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4660 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4661 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4662 )
4663
4664 ;; fac(ge|gt)
4665 ;; Note we can also handle what would be fac(le|lt) by
4666 ;; generating fac(ge|gt).
4667
4668 (define_insn "aarch64_fac<optab><mode>"
4669 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4670 (neg:<V_INT_EQUIV>
4671 (FAC_COMPARISONS:<V_INT_EQUIV>
4672 (abs:VHSDF_HSDF
4673 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4674 (abs:VHSDF_HSDF
4675 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4676 )))]
4677 "TARGET_SIMD"
4678 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4679 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4680 )
4681
4682 ;; addp
4683
4684 (define_insn "aarch64_addp<mode>"
4685 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4686 (unspec:VD_BHSI
4687 [(match_operand:VD_BHSI 1 "register_operand" "w")
4688 (match_operand:VD_BHSI 2 "register_operand" "w")]
4689 UNSPEC_ADDP))]
4690 "TARGET_SIMD"
4691 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4692 [(set_attr "type" "neon_reduc_add<q>")]
4693 )
4694
4695 (define_insn "aarch64_addpdi"
4696 [(set (match_operand:DI 0 "register_operand" "=w")
4697 (unspec:DI
4698 [(match_operand:V2DI 1 "register_operand" "w")]
4699 UNSPEC_ADDP))]
4700 "TARGET_SIMD"
4701 "addp\t%d0, %1.2d"
4702 [(set_attr "type" "neon_reduc_add")]
4703 )
4704
4705 ;; sqrt
4706
4707 (define_expand "sqrt<mode>2"
4708 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4709 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4710 "TARGET_SIMD"
4711 {
4712 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4713 DONE;
4714 })
4715
4716 (define_insn "*sqrt<mode>2"
4717 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4718 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4719 "TARGET_SIMD"
4720 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4721 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4722 )
4723
4724 ;; Patterns for vector struct loads and stores.
4725
4726 (define_insn "aarch64_simd_ld2<mode>"
4727 [(set (match_operand:OI 0 "register_operand" "=w")
4728 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4729 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4730 UNSPEC_LD2))]
4731 "TARGET_SIMD"
4732 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4733 [(set_attr "type" "neon_load2_2reg<q>")]
4734 )
4735
4736 (define_insn "aarch64_simd_ld2r<mode>"
4737 [(set (match_operand:OI 0 "register_operand" "=w")
4738 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4739 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4740 UNSPEC_LD2_DUP))]
4741 "TARGET_SIMD"
4742 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4743 [(set_attr "type" "neon_load2_all_lanes<q>")]
4744 )
4745
4746 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4747 [(set (match_operand:OI 0 "register_operand" "=w")
4748 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4749 (match_operand:OI 2 "register_operand" "0")
4750 (match_operand:SI 3 "immediate_operand" "i")
4751 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4752 UNSPEC_LD2_LANE))]
4753 "TARGET_SIMD"
4754 {
4755 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4756 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4757 }
4758 [(set_attr "type" "neon_load2_one_lane")]
4759 )
4760
4761 (define_expand "vec_load_lanesoi<mode>"
4762 [(set (match_operand:OI 0 "register_operand" "=w")
4763 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4764 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4765 UNSPEC_LD2))]
4766 "TARGET_SIMD"
4767 {
4768 if (BYTES_BIG_ENDIAN)
4769 {
4770 rtx tmp = gen_reg_rtx (OImode);
4771 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4772 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4773 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4774 }
4775 else
4776 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4777 DONE;
4778 })
4779
4780 (define_insn "aarch64_simd_st2<mode>"
4781 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4782 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4783 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4784 UNSPEC_ST2))]
4785 "TARGET_SIMD"
4786 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4787 [(set_attr "type" "neon_store2_2reg<q>")]
4788 )
4789
4790 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4791 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4792 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4793 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4794 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4795 (match_operand:SI 2 "immediate_operand" "i")]
4796 UNSPEC_ST2_LANE))]
4797 "TARGET_SIMD"
4798 {
4799 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4800 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4801 }
4802 [(set_attr "type" "neon_store2_one_lane<q>")]
4803 )
4804
4805 (define_expand "vec_store_lanesoi<mode>"
4806 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4807 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4808 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4809 UNSPEC_ST2))]
4810 "TARGET_SIMD"
4811 {
4812 if (BYTES_BIG_ENDIAN)
4813 {
4814 rtx tmp = gen_reg_rtx (OImode);
4815 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4816 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4817 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4818 }
4819 else
4820 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4821 DONE;
4822 })
4823
4824 (define_insn "aarch64_simd_ld3<mode>"
4825 [(set (match_operand:CI 0 "register_operand" "=w")
4826 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4827 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4828 UNSPEC_LD3))]
4829 "TARGET_SIMD"
4830 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4831 [(set_attr "type" "neon_load3_3reg<q>")]
4832 )
4833
4834 (define_insn "aarch64_simd_ld3r<mode>"
4835 [(set (match_operand:CI 0 "register_operand" "=w")
4836 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4837 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4838 UNSPEC_LD3_DUP))]
4839 "TARGET_SIMD"
4840 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4841 [(set_attr "type" "neon_load3_all_lanes<q>")]
4842 )
4843
4844 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4845 [(set (match_operand:CI 0 "register_operand" "=w")
4846 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4847 (match_operand:CI 2 "register_operand" "0")
4848 (match_operand:SI 3 "immediate_operand" "i")
4849 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4850 UNSPEC_LD3_LANE))]
4851 "TARGET_SIMD"
4852 {
4853 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4854 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4855 }
4856 [(set_attr "type" "neon_load3_one_lane")]
4857 )
4858
4859 (define_expand "vec_load_lanesci<mode>"
4860 [(set (match_operand:CI 0 "register_operand" "=w")
4861 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4862 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4863 UNSPEC_LD3))]
4864 "TARGET_SIMD"
4865 {
4866 if (BYTES_BIG_ENDIAN)
4867 {
4868 rtx tmp = gen_reg_rtx (CImode);
4869 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4870 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4871 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4872 }
4873 else
4874 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4875 DONE;
4876 })
4877
4878 (define_insn "aarch64_simd_st3<mode>"
4879 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4880 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4881 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4882 UNSPEC_ST3))]
4883 "TARGET_SIMD"
4884 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4885 [(set_attr "type" "neon_store3_3reg<q>")]
4886 )
4887
4888 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4889 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4890 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4891 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4892 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4893 (match_operand:SI 2 "immediate_operand" "i")]
4894 UNSPEC_ST3_LANE))]
4895 "TARGET_SIMD"
4896 {
4897 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4898 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4899 }
4900 [(set_attr "type" "neon_store3_one_lane<q>")]
4901 )
4902
4903 (define_expand "vec_store_lanesci<mode>"
4904 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4905 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4906 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4907 UNSPEC_ST3))]
4908 "TARGET_SIMD"
4909 {
4910 if (BYTES_BIG_ENDIAN)
4911 {
4912 rtx tmp = gen_reg_rtx (CImode);
4913 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4914 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4915 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4916 }
4917 else
4918 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4919 DONE;
4920 })
4921
4922 (define_insn "aarch64_simd_ld4<mode>"
4923 [(set (match_operand:XI 0 "register_operand" "=w")
4924 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4925 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4926 UNSPEC_LD4))]
4927 "TARGET_SIMD"
4928 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4929 [(set_attr "type" "neon_load4_4reg<q>")]
4930 )
4931
4932 (define_insn "aarch64_simd_ld4r<mode>"
4933 [(set (match_operand:XI 0 "register_operand" "=w")
4934 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4935 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4936 UNSPEC_LD4_DUP))]
4937 "TARGET_SIMD"
4938 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4939 [(set_attr "type" "neon_load4_all_lanes<q>")]
4940 )
4941
4942 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4943 [(set (match_operand:XI 0 "register_operand" "=w")
4944 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4945 (match_operand:XI 2 "register_operand" "0")
4946 (match_operand:SI 3 "immediate_operand" "i")
4947 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4948 UNSPEC_LD4_LANE))]
4949 "TARGET_SIMD"
4950 {
4951 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4952 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4953 }
4954 [(set_attr "type" "neon_load4_one_lane")]
4955 )
4956
4957 (define_expand "vec_load_lanesxi<mode>"
4958 [(set (match_operand:XI 0 "register_operand" "=w")
4959 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4960 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4961 UNSPEC_LD4))]
4962 "TARGET_SIMD"
4963 {
4964 if (BYTES_BIG_ENDIAN)
4965 {
4966 rtx tmp = gen_reg_rtx (XImode);
4967 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4968 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4969 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4970 }
4971 else
4972 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4973 DONE;
4974 })
4975
4976 (define_insn "aarch64_simd_st4<mode>"
4977 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4978 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4979 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4980 UNSPEC_ST4))]
4981 "TARGET_SIMD"
4982 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4983 [(set_attr "type" "neon_store4_4reg<q>")]
4984 )
4985
4986 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4987 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4988 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4989 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4990 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4991 (match_operand:SI 2 "immediate_operand" "i")]
4992 UNSPEC_ST4_LANE))]
4993 "TARGET_SIMD"
4994 {
4995 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4996 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4997 }
4998 [(set_attr "type" "neon_store4_one_lane<q>")]
4999 )
5000
5001 (define_expand "vec_store_lanesxi<mode>"
5002 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5003 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5004 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5005 UNSPEC_ST4))]
5006 "TARGET_SIMD"
5007 {
5008 if (BYTES_BIG_ENDIAN)
5009 {
5010 rtx tmp = gen_reg_rtx (XImode);
5011 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5012 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5013 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5014 }
5015 else
5016 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5017 DONE;
5018 })
5019
5020 (define_insn_and_split "aarch64_rev_reglist<mode>"
5021 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5022 (unspec:VSTRUCT
5023 [(match_operand:VSTRUCT 1 "register_operand" "w")
5024 (match_operand:V16QI 2 "register_operand" "w")]
5025 UNSPEC_REV_REGLIST))]
5026 "TARGET_SIMD"
5027 "#"
5028 "&& reload_completed"
5029 [(const_int 0)]
5030 {
5031 int i;
5032 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5033 for (i = 0; i < nregs; i++)
5034 {
5035 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5036 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5037 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5038 }
5039 DONE;
5040 }
5041 [(set_attr "type" "neon_tbl1_q")
5042 (set_attr "length" "<insn_count>")]
5043 )
5044
5045 ;; Reload patterns for AdvSIMD register list operands.
5046
5047 (define_expand "mov<mode>"
5048 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5049 (match_operand:VSTRUCT 1 "general_operand" ""))]
5050 "TARGET_SIMD"
5051 {
5052 if (can_create_pseudo_p ())
5053 {
5054 if (GET_CODE (operands[0]) != REG)
5055 operands[1] = force_reg (<MODE>mode, operands[1]);
5056 }
5057 })
5058
5059 (define_insn "*aarch64_mov<mode>"
5060 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5061 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5062 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5063 && (register_operand (operands[0], <MODE>mode)
5064 || register_operand (operands[1], <MODE>mode))"
5065 "@
5066 #
5067 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5068 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5069 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5070 neon_load<nregs>_<nregs>reg_q")
5071 (set_attr "length" "<insn_count>,4,4")]
5072 )
5073
5074 (define_insn "aarch64_be_ld1<mode>"
5075 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5076 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5077 "aarch64_simd_struct_operand" "Utv")]
5078 UNSPEC_LD1))]
5079 "TARGET_SIMD"
5080 "ld1\\t{%0<Vmtype>}, %1"
5081 [(set_attr "type" "neon_load1_1reg<q>")]
5082 )
5083
5084 (define_insn "aarch64_be_st1<mode>"
5085 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5086 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5087 UNSPEC_ST1))]
5088 "TARGET_SIMD"
5089 "st1\\t{%1<Vmtype>}, %0"
5090 [(set_attr "type" "neon_store1_1reg<q>")]
5091 )
5092
5093 (define_insn "*aarch64_be_movoi"
5094 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5095 (match_operand:OI 1 "general_operand" " w,w,m"))]
5096 "TARGET_SIMD && BYTES_BIG_ENDIAN
5097 && (register_operand (operands[0], OImode)
5098 || register_operand (operands[1], OImode))"
5099 "@
5100 #
5101 stp\\t%q1, %R1, %0
5102 ldp\\t%q0, %R0, %1"
5103 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5104 (set_attr "length" "8,4,4")]
5105 )
5106
5107 (define_insn "*aarch64_be_movci"
5108 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5109 (match_operand:CI 1 "general_operand" " w,w,o"))]
5110 "TARGET_SIMD && BYTES_BIG_ENDIAN
5111 && (register_operand (operands[0], CImode)
5112 || register_operand (operands[1], CImode))"
5113 "#"
5114 [(set_attr "type" "multiple")
5115 (set_attr "length" "12,4,4")]
5116 )
5117
5118 (define_insn "*aarch64_be_movxi"
5119 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5120 (match_operand:XI 1 "general_operand" " w,w,o"))]
5121 "TARGET_SIMD && BYTES_BIG_ENDIAN
5122 && (register_operand (operands[0], XImode)
5123 || register_operand (operands[1], XImode))"
5124 "#"
5125 [(set_attr "type" "multiple")
5126 (set_attr "length" "16,4,4")]
5127 )
5128
5129 (define_split
5130 [(set (match_operand:OI 0 "register_operand")
5131 (match_operand:OI 1 "register_operand"))]
5132 "TARGET_SIMD && reload_completed"
5133 [(const_int 0)]
5134 {
5135 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5136 DONE;
5137 })
5138
5139 (define_split
5140 [(set (match_operand:CI 0 "nonimmediate_operand")
5141 (match_operand:CI 1 "general_operand"))]
5142 "TARGET_SIMD && reload_completed"
5143 [(const_int 0)]
5144 {
5145 if (register_operand (operands[0], CImode)
5146 && register_operand (operands[1], CImode))
5147 {
5148 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5149 DONE;
5150 }
5151 else if (BYTES_BIG_ENDIAN)
5152 {
5153 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5154 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5155 emit_move_insn (gen_lowpart (V16QImode,
5156 simplify_gen_subreg (TImode, operands[0],
5157 CImode, 32)),
5158 gen_lowpart (V16QImode,
5159 simplify_gen_subreg (TImode, operands[1],
5160 CImode, 32)));
5161 DONE;
5162 }
5163 else
5164 FAIL;
5165 })
5166
5167 (define_split
5168 [(set (match_operand:XI 0 "nonimmediate_operand")
5169 (match_operand:XI 1 "general_operand"))]
5170 "TARGET_SIMD && reload_completed"
5171 [(const_int 0)]
5172 {
5173 if (register_operand (operands[0], XImode)
5174 && register_operand (operands[1], XImode))
5175 {
5176 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5177 DONE;
5178 }
5179 else if (BYTES_BIG_ENDIAN)
5180 {
5181 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5182 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5183 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5184 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5185 DONE;
5186 }
5187 else
5188 FAIL;
5189 })
5190
5191 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5192 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5193 (match_operand:DI 1 "register_operand" "w")
5194 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5195 "TARGET_SIMD"
5196 {
5197 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5198 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5199 * <VSTRUCT:nregs>);
5200
5201 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5202 mem));
5203 DONE;
5204 })
5205
5206 (define_insn "aarch64_ld2<mode>_dreg"
5207 [(set (match_operand:OI 0 "register_operand" "=w")
5208 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5209 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5210 UNSPEC_LD2_DREG))]
5211 "TARGET_SIMD"
5212 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5213 [(set_attr "type" "neon_load2_2reg<q>")]
5214 )
5215
5216 (define_insn "aarch64_ld2<mode>_dreg"
5217 [(set (match_operand:OI 0 "register_operand" "=w")
5218 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5219 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5220 UNSPEC_LD2_DREG))]
5221 "TARGET_SIMD"
5222 "ld1\\t{%S0.1d - %T0.1d}, %1"
5223 [(set_attr "type" "neon_load1_2reg<q>")]
5224 )
5225
5226 (define_insn "aarch64_ld3<mode>_dreg"
5227 [(set (match_operand:CI 0 "register_operand" "=w")
5228 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5229 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5230 UNSPEC_LD3_DREG))]
5231 "TARGET_SIMD"
5232 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5233 [(set_attr "type" "neon_load3_3reg<q>")]
5234 )
5235
5236 (define_insn "aarch64_ld3<mode>_dreg"
5237 [(set (match_operand:CI 0 "register_operand" "=w")
5238 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5239 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5240 UNSPEC_LD3_DREG))]
5241 "TARGET_SIMD"
5242 "ld1\\t{%S0.1d - %U0.1d}, %1"
5243 [(set_attr "type" "neon_load1_3reg<q>")]
5244 )
5245
5246 (define_insn "aarch64_ld4<mode>_dreg"
5247 [(set (match_operand:XI 0 "register_operand" "=w")
5248 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5249 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5250 UNSPEC_LD4_DREG))]
5251 "TARGET_SIMD"
5252 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5253 [(set_attr "type" "neon_load4_4reg<q>")]
5254 )
5255
5256 (define_insn "aarch64_ld4<mode>_dreg"
5257 [(set (match_operand:XI 0 "register_operand" "=w")
5258 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5259 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5260 UNSPEC_LD4_DREG))]
5261 "TARGET_SIMD"
5262 "ld1\\t{%S0.1d - %V0.1d}, %1"
5263 [(set_attr "type" "neon_load1_4reg<q>")]
5264 )
5265
5266 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5267 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5268 (match_operand:DI 1 "register_operand" "r")
5269 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5270 "TARGET_SIMD"
5271 {
5272 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5273 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5274
5275 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5276 DONE;
5277 })
5278
5279 (define_expand "aarch64_ld1<VALL_F16:mode>"
5280 [(match_operand:VALL_F16 0 "register_operand")
5281 (match_operand:DI 1 "register_operand")]
5282 "TARGET_SIMD"
5283 {
5284 machine_mode mode = <VALL_F16:MODE>mode;
5285 rtx mem = gen_rtx_MEM (mode, operands[1]);
5286
5287 if (BYTES_BIG_ENDIAN)
5288 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5289 else
5290 emit_move_insn (operands[0], mem);
5291 DONE;
5292 })
5293
5294 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5295 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5296 (match_operand:DI 1 "register_operand" "r")
5297 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5298 "TARGET_SIMD"
5299 {
5300 machine_mode mode = <VSTRUCT:MODE>mode;
5301 rtx mem = gen_rtx_MEM (mode, operands[1]);
5302
5303 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5304 DONE;
5305 })
5306
5307 (define_expand "aarch64_ld1x2<VQ:mode>"
5308 [(match_operand:OI 0 "register_operand" "=w")
5309 (match_operand:DI 1 "register_operand" "r")
5310 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5311 "TARGET_SIMD"
5312 {
5313 machine_mode mode = OImode;
5314 rtx mem = gen_rtx_MEM (mode, operands[1]);
5315
5316 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5317 DONE;
5318 })
5319
5320 (define_expand "aarch64_ld1x2<VDC:mode>"
5321 [(match_operand:OI 0 "register_operand" "=w")
5322 (match_operand:DI 1 "register_operand" "r")
5323 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5324 "TARGET_SIMD"
5325 {
5326 machine_mode mode = OImode;
5327 rtx mem = gen_rtx_MEM (mode, operands[1]);
5328
5329 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5330 DONE;
5331 })
5332
5333
5334 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5335 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5336 (match_operand:DI 1 "register_operand" "w")
5337 (match_operand:VSTRUCT 2 "register_operand" "0")
5338 (match_operand:SI 3 "immediate_operand" "i")
5339 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5340 "TARGET_SIMD"
5341 {
5342 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5343 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5344 * <VSTRUCT:nregs>);
5345
5346 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5347 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5348 operands[0], mem, operands[2], operands[3]));
5349 DONE;
5350 })
5351
5352 ;; Expanders for builtins to extract vector registers from large
5353 ;; opaque integer modes.
5354
5355 ;; D-register list.
5356
5357 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5358 [(match_operand:VDC 0 "register_operand" "=w")
5359 (match_operand:VSTRUCT 1 "register_operand" "w")
5360 (match_operand:SI 2 "immediate_operand" "i")]
5361 "TARGET_SIMD"
5362 {
5363 int part = INTVAL (operands[2]);
5364 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5365 int offset = part * 16;
5366
5367 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5368 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5369 DONE;
5370 })
5371
5372 ;; Q-register list.
5373
5374 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5375 [(match_operand:VQ 0 "register_operand" "=w")
5376 (match_operand:VSTRUCT 1 "register_operand" "w")
5377 (match_operand:SI 2 "immediate_operand" "i")]
5378 "TARGET_SIMD"
5379 {
5380 int part = INTVAL (operands[2]);
5381 int offset = part * 16;
5382
5383 emit_move_insn (operands[0],
5384 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5385 DONE;
5386 })
5387
5388 ;; Permuted-store expanders for neon intrinsics.
5389
5390 ;; Permute instructions
5391
5392 ;; vec_perm support
5393
5394 (define_expand "vec_perm<mode>"
5395 [(match_operand:VB 0 "register_operand")
5396 (match_operand:VB 1 "register_operand")
5397 (match_operand:VB 2 "register_operand")
5398 (match_operand:VB 3 "register_operand")]
5399 "TARGET_SIMD"
5400 {
5401 aarch64_expand_vec_perm (operands[0], operands[1],
5402 operands[2], operands[3], <nunits>);
5403 DONE;
5404 })
5405
5406 (define_insn "aarch64_tbl1<mode>"
5407 [(set (match_operand:VB 0 "register_operand" "=w")
5408 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5409 (match_operand:VB 2 "register_operand" "w")]
5410 UNSPEC_TBL))]
5411 "TARGET_SIMD"
5412 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5413 [(set_attr "type" "neon_tbl1<q>")]
5414 )
5415
5416 ;; Two source registers.
5417
5418 (define_insn "aarch64_tbl2v16qi"
5419 [(set (match_operand:V16QI 0 "register_operand" "=w")
5420 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5421 (match_operand:V16QI 2 "register_operand" "w")]
5422 UNSPEC_TBL))]
5423 "TARGET_SIMD"
5424 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5425 [(set_attr "type" "neon_tbl2_q")]
5426 )
5427
5428 (define_insn "aarch64_tbl3<mode>"
5429 [(set (match_operand:VB 0 "register_operand" "=w")
5430 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5431 (match_operand:VB 2 "register_operand" "w")]
5432 UNSPEC_TBL))]
5433 "TARGET_SIMD"
5434 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5435 [(set_attr "type" "neon_tbl3")]
5436 )
5437
5438 (define_insn "aarch64_tbx4<mode>"
5439 [(set (match_operand:VB 0 "register_operand" "=w")
5440 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5441 (match_operand:OI 2 "register_operand" "w")
5442 (match_operand:VB 3 "register_operand" "w")]
5443 UNSPEC_TBX))]
5444 "TARGET_SIMD"
5445 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5446 [(set_attr "type" "neon_tbl4")]
5447 )
5448
5449 ;; Three source registers.
5450
5451 (define_insn "aarch64_qtbl3<mode>"
5452 [(set (match_operand:VB 0 "register_operand" "=w")
5453 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5454 (match_operand:VB 2 "register_operand" "w")]
5455 UNSPEC_TBL))]
5456 "TARGET_SIMD"
5457 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5458 [(set_attr "type" "neon_tbl3")]
5459 )
5460
5461 (define_insn "aarch64_qtbx3<mode>"
5462 [(set (match_operand:VB 0 "register_operand" "=w")
5463 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5464 (match_operand:CI 2 "register_operand" "w")
5465 (match_operand:VB 3 "register_operand" "w")]
5466 UNSPEC_TBX))]
5467 "TARGET_SIMD"
5468 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5469 [(set_attr "type" "neon_tbl3")]
5470 )
5471
5472 ;; Four source registers.
5473
5474 (define_insn "aarch64_qtbl4<mode>"
5475 [(set (match_operand:VB 0 "register_operand" "=w")
5476 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5477 (match_operand:VB 2 "register_operand" "w")]
5478 UNSPEC_TBL))]
5479 "TARGET_SIMD"
5480 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5481 [(set_attr "type" "neon_tbl4")]
5482 )
5483
5484 (define_insn "aarch64_qtbx4<mode>"
5485 [(set (match_operand:VB 0 "register_operand" "=w")
5486 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5487 (match_operand:XI 2 "register_operand" "w")
5488 (match_operand:VB 3 "register_operand" "w")]
5489 UNSPEC_TBX))]
5490 "TARGET_SIMD"
5491 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5492 [(set_attr "type" "neon_tbl4")]
5493 )
5494
5495 (define_insn_and_split "aarch64_combinev16qi"
5496 [(set (match_operand:OI 0 "register_operand" "=w")
5497 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5498 (match_operand:V16QI 2 "register_operand" "w")]
5499 UNSPEC_CONCAT))]
5500 "TARGET_SIMD"
5501 "#"
5502 "&& reload_completed"
5503 [(const_int 0)]
5504 {
5505 aarch64_split_combinev16qi (operands);
5506 DONE;
5507 }
5508 [(set_attr "type" "multiple")]
5509 )
5510
5511 ;; This instruction's pattern is generated directly by
5512 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5513 ;; need corresponding changes there.
5514 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5515 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5516 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5517 (match_operand:VALL_F16 2 "register_operand" "w")]
5518 PERMUTE))]
5519 "TARGET_SIMD"
5520 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5521 [(set_attr "type" "neon_permute<q>")]
5522 )
5523
5524 ;; This instruction's pattern is generated directly by
5525 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5526 ;; need corresponding changes there. Note that the immediate (third)
5527 ;; operand is a lane index not a byte index.
5528 (define_insn "aarch64_ext<mode>"
5529 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5530 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5531 (match_operand:VALL_F16 2 "register_operand" "w")
5532 (match_operand:SI 3 "immediate_operand" "i")]
5533 UNSPEC_EXT))]
5534 "TARGET_SIMD"
5535 {
5536 operands[3] = GEN_INT (INTVAL (operands[3])
5537 * GET_MODE_UNIT_SIZE (<MODE>mode));
5538 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5539 }
5540 [(set_attr "type" "neon_ext<q>")]
5541 )
5542
5543 ;; This instruction's pattern is generated directly by
5544 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5545 ;; need corresponding changes there.
5546 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5547 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5548 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5549 REVERSE))]
5550 "TARGET_SIMD"
5551 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5552 [(set_attr "type" "neon_rev<q>")]
5553 )
5554
5555 (define_insn "aarch64_st2<mode>_dreg"
5556 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5557 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5558 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5559 UNSPEC_ST2))]
5560 "TARGET_SIMD"
5561 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5562 [(set_attr "type" "neon_store2_2reg")]
5563 )
5564
5565 (define_insn "aarch64_st2<mode>_dreg"
5566 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5567 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5568 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5569 UNSPEC_ST2))]
5570 "TARGET_SIMD"
5571 "st1\\t{%S1.1d - %T1.1d}, %0"
5572 [(set_attr "type" "neon_store1_2reg")]
5573 )
5574
5575 (define_insn "aarch64_st3<mode>_dreg"
5576 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5577 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5578 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5579 UNSPEC_ST3))]
5580 "TARGET_SIMD"
5581 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5582 [(set_attr "type" "neon_store3_3reg")]
5583 )
5584
5585 (define_insn "aarch64_st3<mode>_dreg"
5586 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5587 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5588 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5589 UNSPEC_ST3))]
5590 "TARGET_SIMD"
5591 "st1\\t{%S1.1d - %U1.1d}, %0"
5592 [(set_attr "type" "neon_store1_3reg")]
5593 )
5594
5595 (define_insn "aarch64_st4<mode>_dreg"
5596 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5597 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5598 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5599 UNSPEC_ST4))]
5600 "TARGET_SIMD"
5601 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5602 [(set_attr "type" "neon_store4_4reg")]
5603 )
5604
5605 (define_insn "aarch64_st4<mode>_dreg"
5606 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5607 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5608 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5609 UNSPEC_ST4))]
5610 "TARGET_SIMD"
5611 "st1\\t{%S1.1d - %V1.1d}, %0"
5612 [(set_attr "type" "neon_store1_4reg")]
5613 )
5614
5615 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5616 [(match_operand:DI 0 "register_operand" "r")
5617 (match_operand:VSTRUCT 1 "register_operand" "w")
5618 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5619 "TARGET_SIMD"
5620 {
5621 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5622 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5623
5624 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5625 DONE;
5626 })
5627
5628 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5629 [(match_operand:DI 0 "register_operand" "r")
5630 (match_operand:VSTRUCT 1 "register_operand" "w")
5631 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5632 "TARGET_SIMD"
5633 {
5634 machine_mode mode = <VSTRUCT:MODE>mode;
5635 rtx mem = gen_rtx_MEM (mode, operands[0]);
5636
5637 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5638 DONE;
5639 })
5640
5641 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5642 [(match_operand:DI 0 "register_operand" "r")
5643 (match_operand:VSTRUCT 1 "register_operand" "w")
5644 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5645 (match_operand:SI 2 "immediate_operand")]
5646 "TARGET_SIMD"
5647 {
5648 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5649 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5650 * <VSTRUCT:nregs>);
5651
5652 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5653 mem, operands[1], operands[2]));
5654 DONE;
5655 })
5656
5657 (define_expand "aarch64_st1<VALL_F16:mode>"
5658 [(match_operand:DI 0 "register_operand")
5659 (match_operand:VALL_F16 1 "register_operand")]
5660 "TARGET_SIMD"
5661 {
5662 machine_mode mode = <VALL_F16:MODE>mode;
5663 rtx mem = gen_rtx_MEM (mode, operands[0]);
5664
5665 if (BYTES_BIG_ENDIAN)
5666 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5667 else
5668 emit_move_insn (mem, operands[1]);
5669 DONE;
5670 })
5671
5672 ;; Expander for builtins to insert vector registers into large
5673 ;; opaque integer modes.
5674
5675 ;; Q-register list. We don't need a D-reg inserter as we zero
5676 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5677
5678 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5679 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5680 (match_operand:VSTRUCT 1 "register_operand" "0")
5681 (match_operand:VQ 2 "register_operand" "w")
5682 (match_operand:SI 3 "immediate_operand" "i")]
5683 "TARGET_SIMD"
5684 {
5685 int part = INTVAL (operands[3]);
5686 int offset = part * 16;
5687
5688 emit_move_insn (operands[0], operands[1]);
5689 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5690 operands[2]);
5691 DONE;
5692 })
5693
5694 ;; Standard pattern name vec_init<mode><Vel>.
5695
5696 (define_expand "vec_init<mode><Vel>"
5697 [(match_operand:VALL_F16 0 "register_operand" "")
5698 (match_operand 1 "" "")]
5699 "TARGET_SIMD"
5700 {
5701 aarch64_expand_vector_init (operands[0], operands[1]);
5702 DONE;
5703 })
5704
5705 (define_insn "*aarch64_simd_ld1r<mode>"
5706 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5707 (vec_duplicate:VALL_F16
5708 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5709 "TARGET_SIMD"
5710 "ld1r\\t{%0.<Vtype>}, %1"
5711 [(set_attr "type" "neon_load1_all_lanes")]
5712 )
5713
5714 (define_insn "aarch64_simd_ld1<mode>_x2"
5715 [(set (match_operand:OI 0 "register_operand" "=w")
5716 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5717 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5718 UNSPEC_LD1))]
5719 "TARGET_SIMD"
5720 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5721 [(set_attr "type" "neon_load1_2reg<q>")]
5722 )
5723
5724 (define_insn "aarch64_simd_ld1<mode>_x2"
5725 [(set (match_operand:OI 0 "register_operand" "=w")
5726 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5727 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5728 UNSPEC_LD1))]
5729 "TARGET_SIMD"
5730 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5731 [(set_attr "type" "neon_load1_2reg<q>")]
5732 )
5733
5734
5735 (define_insn "aarch64_frecpe<mode>"
5736 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5737 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5738 UNSPEC_FRECPE))]
5739 "TARGET_SIMD"
5740 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5741 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5742 )
5743
5744 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5745 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5746 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5747 FRECP))]
5748 "TARGET_SIMD"
5749 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5750 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5751 )
5752
5753 (define_insn "aarch64_frecps<mode>"
5754 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5755 (unspec:VHSDF_HSDF
5756 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5757 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5758 UNSPEC_FRECPS))]
5759 "TARGET_SIMD"
5760 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5761 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5762 )
5763
5764 (define_insn "aarch64_urecpe<mode>"
5765 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5766 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5767 UNSPEC_URECPE))]
5768 "TARGET_SIMD"
5769 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5770 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5771
5772 ;; Standard pattern name vec_extract<mode><Vel>.
5773
5774 (define_expand "vec_extract<mode><Vel>"
5775 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5776 (match_operand:VALL_F16 1 "register_operand" "")
5777 (match_operand:SI 2 "immediate_operand" "")]
5778 "TARGET_SIMD"
5779 {
5780 emit_insn
5781 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5782 DONE;
5783 })
5784
5785 ;; aes
5786
5787 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5788 [(set (match_operand:V16QI 0 "register_operand" "=w")
5789 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5790 (match_operand:V16QI 2 "register_operand" "w")]
5791 CRYPTO_AES))]
5792 "TARGET_SIMD && TARGET_AES"
5793 "aes<aes_op>\\t%0.16b, %2.16b"
5794 [(set_attr "type" "crypto_aese")]
5795 )
5796
5797 ;; When AES/AESMC fusion is enabled we want the register allocation to
5798 ;; look like:
5799 ;; AESE Vn, _
5800 ;; AESMC Vn, Vn
5801 ;; So prefer to tie operand 1 to operand 0 when fusing.
5802
5803 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5804 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5805 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5806 CRYPTO_AESMC))]
5807 "TARGET_SIMD && TARGET_AES"
5808 "aes<aesmc_op>\\t%0.16b, %1.16b"
5809 [(set_attr "type" "crypto_aesmc")
5810 (set_attr_alternative "enabled"
5811 [(if_then_else (match_test
5812 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5813 (const_string "yes" )
5814 (const_string "no"))
5815 (const_string "yes")])]
5816 )
5817
5818 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5819 ;; and enforce the register dependency without scheduling or register
5820 ;; allocation messing up the order or introducing moves inbetween.
5821 ;; Mash the two together during combine.
5822
5823 (define_insn "*aarch64_crypto_aese_fused"
5824 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5825 (unspec:V16QI
5826 [(unspec:V16QI
5827 [(match_operand:V16QI 1 "register_operand" "0")
5828 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5829 ] UNSPEC_AESMC))]
5830 "TARGET_SIMD && TARGET_AES
5831 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5832 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5833 [(set_attr "type" "crypto_aese")
5834 (set_attr "length" "8")]
5835 )
5836
5837 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
5838 ;; and enforce the register dependency without scheduling or register
5839 ;; allocation messing up the order or introducing moves inbetween.
5840 ;; Mash the two together during combine.
5841
5842 (define_insn "*aarch64_crypto_aesd_fused"
5843 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5844 (unspec:V16QI
5845 [(unspec:V16QI
5846 [(match_operand:V16QI 1 "register_operand" "0")
5847 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
5848 ] UNSPEC_AESIMC))]
5849 "TARGET_SIMD && TARGET_AES
5850 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5851 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
5852 [(set_attr "type" "crypto_aese")
5853 (set_attr "length" "8")]
5854 )
5855
5856 ;; sha1
5857
5858 (define_insn "aarch64_crypto_sha1hsi"
5859 [(set (match_operand:SI 0 "register_operand" "=w")
5860 (unspec:SI [(match_operand:SI 1
5861 "register_operand" "w")]
5862 UNSPEC_SHA1H))]
5863 "TARGET_SIMD && TARGET_SHA2"
5864 "sha1h\\t%s0, %s1"
5865 [(set_attr "type" "crypto_sha1_fast")]
5866 )
5867
5868 (define_insn "aarch64_crypto_sha1hv4si"
5869 [(set (match_operand:SI 0 "register_operand" "=w")
5870 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5871 (parallel [(const_int 0)]))]
5872 UNSPEC_SHA1H))]
5873 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
5874 "sha1h\\t%s0, %s1"
5875 [(set_attr "type" "crypto_sha1_fast")]
5876 )
5877
5878 (define_insn "aarch64_be_crypto_sha1hv4si"
5879 [(set (match_operand:SI 0 "register_operand" "=w")
5880 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5881 (parallel [(const_int 3)]))]
5882 UNSPEC_SHA1H))]
5883 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
5884 "sha1h\\t%s0, %s1"
5885 [(set_attr "type" "crypto_sha1_fast")]
5886 )
5887
5888 (define_insn "aarch64_crypto_sha1su1v4si"
5889 [(set (match_operand:V4SI 0 "register_operand" "=w")
5890 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5891 (match_operand:V4SI 2 "register_operand" "w")]
5892 UNSPEC_SHA1SU1))]
5893 "TARGET_SIMD && TARGET_SHA2"
5894 "sha1su1\\t%0.4s, %2.4s"
5895 [(set_attr "type" "crypto_sha1_fast")]
5896 )
5897
5898 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5899 [(set (match_operand:V4SI 0 "register_operand" "=w")
5900 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5901 (match_operand:SI 2 "register_operand" "w")
5902 (match_operand:V4SI 3 "register_operand" "w")]
5903 CRYPTO_SHA1))]
5904 "TARGET_SIMD && TARGET_SHA2"
5905 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5906 [(set_attr "type" "crypto_sha1_slow")]
5907 )
5908
5909 (define_insn "aarch64_crypto_sha1su0v4si"
5910 [(set (match_operand:V4SI 0 "register_operand" "=w")
5911 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5912 (match_operand:V4SI 2 "register_operand" "w")
5913 (match_operand:V4SI 3 "register_operand" "w")]
5914 UNSPEC_SHA1SU0))]
5915 "TARGET_SIMD && TARGET_SHA2"
5916 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5917 [(set_attr "type" "crypto_sha1_xor")]
5918 )
5919
5920 ;; sha256
5921
5922 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5923 [(set (match_operand:V4SI 0 "register_operand" "=w")
5924 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5925 (match_operand:V4SI 2 "register_operand" "w")
5926 (match_operand:V4SI 3 "register_operand" "w")]
5927 CRYPTO_SHA256))]
5928 "TARGET_SIMD && TARGET_SHA2"
5929 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5930 [(set_attr "type" "crypto_sha256_slow")]
5931 )
5932
5933 (define_insn "aarch64_crypto_sha256su0v4si"
5934 [(set (match_operand:V4SI 0 "register_operand" "=w")
5935 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5936 (match_operand:V4SI 2 "register_operand" "w")]
5937 UNSPEC_SHA256SU0))]
5938 "TARGET_SIMD && TARGET_SHA2"
5939 "sha256su0\\t%0.4s, %2.4s"
5940 [(set_attr "type" "crypto_sha256_fast")]
5941 )
5942
5943 (define_insn "aarch64_crypto_sha256su1v4si"
5944 [(set (match_operand:V4SI 0 "register_operand" "=w")
5945 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5946 (match_operand:V4SI 2 "register_operand" "w")
5947 (match_operand:V4SI 3 "register_operand" "w")]
5948 UNSPEC_SHA256SU1))]
5949 "TARGET_SIMD && TARGET_SHA2"
5950 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5951 [(set_attr "type" "crypto_sha256_slow")]
5952 )
5953
5954 ;; sha512
5955
5956 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
5957 [(set (match_operand:V2DI 0 "register_operand" "=w")
5958 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5959 (match_operand:V2DI 2 "register_operand" "w")
5960 (match_operand:V2DI 3 "register_operand" "w")]
5961 CRYPTO_SHA512))]
5962 "TARGET_SIMD && TARGET_SHA3"
5963 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
5964 [(set_attr "type" "crypto_sha512")]
5965 )
5966
5967 (define_insn "aarch64_crypto_sha512su0qv2di"
5968 [(set (match_operand:V2DI 0 "register_operand" "=w")
5969 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5970 (match_operand:V2DI 2 "register_operand" "w")]
5971 UNSPEC_SHA512SU0))]
5972 "TARGET_SIMD && TARGET_SHA3"
5973 "sha512su0\\t%0.2d, %2.2d"
5974 [(set_attr "type" "crypto_sha512")]
5975 )
5976
5977 (define_insn "aarch64_crypto_sha512su1qv2di"
5978 [(set (match_operand:V2DI 0 "register_operand" "=w")
5979 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
5980 (match_operand:V2DI 2 "register_operand" "w")
5981 (match_operand:V2DI 3 "register_operand" "w")]
5982 UNSPEC_SHA512SU1))]
5983 "TARGET_SIMD && TARGET_SHA3"
5984 "sha512su1\\t%0.2d, %2.2d, %3.2d"
5985 [(set_attr "type" "crypto_sha512")]
5986 )
5987
5988 ;; sha3
5989
5990 (define_insn "eor3q<mode>4"
5991 [(set (match_operand:VQ_I 0 "register_operand" "=w")
5992 (xor:VQ_I
5993 (xor:VQ_I
5994 (match_operand:VQ_I 2 "register_operand" "w")
5995 (match_operand:VQ_I 3 "register_operand" "w"))
5996 (match_operand:VQ_I 1 "register_operand" "w")))]
5997 "TARGET_SIMD && TARGET_SHA3"
5998 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
5999 [(set_attr "type" "crypto_sha3")]
6000 )
6001
6002 (define_insn "aarch64_rax1qv2di"
6003 [(set (match_operand:V2DI 0 "register_operand" "=w")
6004 (xor:V2DI
6005 (rotate:V2DI
6006 (match_operand:V2DI 2 "register_operand" "w")
6007 (const_int 1))
6008 (match_operand:V2DI 1 "register_operand" "w")))]
6009 "TARGET_SIMD && TARGET_SHA3"
6010 "rax1\\t%0.2d, %1.2d, %2.2d"
6011 [(set_attr "type" "crypto_sha3")]
6012 )
6013
6014 (define_insn "aarch64_xarqv2di"
6015 [(set (match_operand:V2DI 0 "register_operand" "=w")
6016 (rotatert:V2DI
6017 (xor:V2DI
6018 (match_operand:V2DI 1 "register_operand" "%w")
6019 (match_operand:V2DI 2 "register_operand" "w"))
6020 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6021 "TARGET_SIMD && TARGET_SHA3"
6022 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6023 [(set_attr "type" "crypto_sha3")]
6024 )
6025
6026 (define_insn "bcaxq<mode>4"
6027 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6028 (xor:VQ_I
6029 (and:VQ_I
6030 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6031 (match_operand:VQ_I 2 "register_operand" "w"))
6032 (match_operand:VQ_I 1 "register_operand" "w")))]
6033 "TARGET_SIMD && TARGET_SHA3"
6034 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6035 [(set_attr "type" "crypto_sha3")]
6036 )
6037
6038 ;; SM3
6039
6040 (define_insn "aarch64_sm3ss1qv4si"
6041 [(set (match_operand:V4SI 0 "register_operand" "=w")
6042 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6043 (match_operand:V4SI 2 "register_operand" "w")
6044 (match_operand:V4SI 3 "register_operand" "w")]
6045 UNSPEC_SM3SS1))]
6046 "TARGET_SIMD && TARGET_SM4"
6047 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6048 [(set_attr "type" "crypto_sm3")]
6049 )
6050
6051
6052 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6053 [(set (match_operand:V4SI 0 "register_operand" "=w")
6054 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6055 (match_operand:V4SI 2 "register_operand" "w")
6056 (match_operand:V4SI 3 "register_operand" "w")
6057 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6058 CRYPTO_SM3TT))]
6059 "TARGET_SIMD && TARGET_SM4"
6060 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6061 [(set_attr "type" "crypto_sm3")]
6062 )
6063
6064 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6065 [(set (match_operand:V4SI 0 "register_operand" "=w")
6066 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6067 (match_operand:V4SI 2 "register_operand" "w")
6068 (match_operand:V4SI 3 "register_operand" "w")]
6069 CRYPTO_SM3PART))]
6070 "TARGET_SIMD && TARGET_SM4"
6071 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6072 [(set_attr "type" "crypto_sm3")]
6073 )
6074
6075 ;; SM4
6076
6077 (define_insn "aarch64_sm4eqv4si"
6078 [(set (match_operand:V4SI 0 "register_operand" "=w")
6079 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6080 (match_operand:V4SI 2 "register_operand" "w")]
6081 UNSPEC_SM4E))]
6082 "TARGET_SIMD && TARGET_SM4"
6083 "sm4e\\t%0.4s, %2.4s"
6084 [(set_attr "type" "crypto_sm4")]
6085 )
6086
6087 (define_insn "aarch64_sm4ekeyqv4si"
6088 [(set (match_operand:V4SI 0 "register_operand" "=w")
6089 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6090 (match_operand:V4SI 2 "register_operand" "w")]
6091 UNSPEC_SM4EKEY))]
6092 "TARGET_SIMD && TARGET_SM4"
6093 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6094 [(set_attr "type" "crypto_sm4")]
6095 )
6096
6097 ;; fp16fml
6098
6099 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6100 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6101 (unspec:VDQSF
6102 [(match_operand:VDQSF 1 "register_operand" "0")
6103 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6104 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6105 VFMLA16_LOW))]
6106 "TARGET_F16FML"
6107 {
6108 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6109 <nunits> * 2, false);
6110 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6111 <nunits> * 2, false);
6112
6113 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6114 operands[1],
6115 operands[2],
6116 operands[3],
6117 p1, p2));
6118 DONE;
6119
6120 })
6121
6122 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6123 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6124 (unspec:VDQSF
6125 [(match_operand:VDQSF 1 "register_operand" "0")
6126 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6127 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6128 VFMLA16_HIGH))]
6129 "TARGET_F16FML"
6130 {
6131 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6132 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6133
6134 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6135 operands[1],
6136 operands[2],
6137 operands[3],
6138 p1, p2));
6139 DONE;
6140 })
6141
6142 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6143 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6144 (fma:VDQSF
6145 (float_extend:VDQSF
6146 (vec_select:<VFMLA_SEL_W>
6147 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6148 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6149 (float_extend:VDQSF
6150 (vec_select:<VFMLA_SEL_W>
6151 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6152 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6153 (match_operand:VDQSF 1 "register_operand" "0")))]
6154 "TARGET_F16FML"
6155 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6156 [(set_attr "type" "neon_fp_mul_s")]
6157 )
6158
6159 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6160 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6161 (fma:VDQSF
6162 (float_extend:VDQSF
6163 (neg:<VFMLA_SEL_W>
6164 (vec_select:<VFMLA_SEL_W>
6165 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6166 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6167 (float_extend:VDQSF
6168 (vec_select:<VFMLA_SEL_W>
6169 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6170 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6171 (match_operand:VDQSF 1 "register_operand" "0")))]
6172 "TARGET_F16FML"
6173 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6174 [(set_attr "type" "neon_fp_mul_s")]
6175 )
6176
6177 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6178 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6179 (fma:VDQSF
6180 (float_extend:VDQSF
6181 (vec_select:<VFMLA_SEL_W>
6182 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6183 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6184 (float_extend:VDQSF
6185 (vec_select:<VFMLA_SEL_W>
6186 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6187 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6188 (match_operand:VDQSF 1 "register_operand" "0")))]
6189 "TARGET_F16FML"
6190 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6191 [(set_attr "type" "neon_fp_mul_s")]
6192 )
6193
6194 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6195 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6196 (fma:VDQSF
6197 (float_extend:VDQSF
6198 (neg:<VFMLA_SEL_W>
6199 (vec_select:<VFMLA_SEL_W>
6200 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6201 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6202 (float_extend:VDQSF
6203 (vec_select:<VFMLA_SEL_W>
6204 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6205 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6206 (match_operand:VDQSF 1 "register_operand" "0")))]
6207 "TARGET_F16FML"
6208 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6209 [(set_attr "type" "neon_fp_mul_s")]
6210 )
6211
6212 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6213 [(set (match_operand:V2SF 0 "register_operand" "")
6214 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6215 (match_operand:V4HF 2 "register_operand" "")
6216 (match_operand:V4HF 3 "register_operand" "")
6217 (match_operand:SI 4 "aarch64_imm2" "")]
6218 VFMLA16_LOW))]
6219 "TARGET_F16FML"
6220 {
6221 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6222 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6223
6224 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6225 operands[1],
6226 operands[2],
6227 operands[3],
6228 p1, lane));
6229 DONE;
6230 }
6231 )
6232
6233 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6234 [(set (match_operand:V2SF 0 "register_operand" "")
6235 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6236 (match_operand:V4HF 2 "register_operand" "")
6237 (match_operand:V4HF 3 "register_operand" "")
6238 (match_operand:SI 4 "aarch64_imm2" "")]
6239 VFMLA16_HIGH))]
6240 "TARGET_F16FML"
6241 {
6242 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6243 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6244
6245 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6246 operands[1],
6247 operands[2],
6248 operands[3],
6249 p1, lane));
6250 DONE;
6251 })
6252
6253 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6254 [(set (match_operand:V2SF 0 "register_operand" "=w")
6255 (fma:V2SF
6256 (float_extend:V2SF
6257 (vec_select:V2HF
6258 (match_operand:V4HF 2 "register_operand" "w")
6259 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6260 (float_extend:V2SF
6261 (vec_duplicate:V2HF
6262 (vec_select:HF
6263 (match_operand:V4HF 3 "register_operand" "x")
6264 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6265 (match_operand:V2SF 1 "register_operand" "0")))]
6266 "TARGET_F16FML"
6267 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6268 [(set_attr "type" "neon_fp_mul_s")]
6269 )
6270
6271 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6272 [(set (match_operand:V2SF 0 "register_operand" "=w")
6273 (fma:V2SF
6274 (float_extend:V2SF
6275 (neg:V2HF
6276 (vec_select:V2HF
6277 (match_operand:V4HF 2 "register_operand" "w")
6278 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6279 (float_extend:V2SF
6280 (vec_duplicate:V2HF
6281 (vec_select:HF
6282 (match_operand:V4HF 3 "register_operand" "x")
6283 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6284 (match_operand:V2SF 1 "register_operand" "0")))]
6285 "TARGET_F16FML"
6286 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6287 [(set_attr "type" "neon_fp_mul_s")]
6288 )
6289
6290 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6291 [(set (match_operand:V2SF 0 "register_operand" "=w")
6292 (fma:V2SF
6293 (float_extend:V2SF
6294 (vec_select:V2HF
6295 (match_operand:V4HF 2 "register_operand" "w")
6296 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6297 (float_extend:V2SF
6298 (vec_duplicate:V2HF
6299 (vec_select:HF
6300 (match_operand:V4HF 3 "register_operand" "x")
6301 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6302 (match_operand:V2SF 1 "register_operand" "0")))]
6303 "TARGET_F16FML"
6304 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6305 [(set_attr "type" "neon_fp_mul_s")]
6306 )
6307
6308 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6309 [(set (match_operand:V2SF 0 "register_operand" "=w")
6310 (fma:V2SF
6311 (float_extend:V2SF
6312 (neg:V2HF
6313 (vec_select:V2HF
6314 (match_operand:V4HF 2 "register_operand" "w")
6315 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6316 (float_extend:V2SF
6317 (vec_duplicate:V2HF
6318 (vec_select:HF
6319 (match_operand:V4HF 3 "register_operand" "x")
6320 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6321 (match_operand:V2SF 1 "register_operand" "0")))]
6322 "TARGET_F16FML"
6323 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6324 [(set_attr "type" "neon_fp_mul_s")]
6325 )
6326
6327 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6328 [(set (match_operand:V4SF 0 "register_operand" "")
6329 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6330 (match_operand:V8HF 2 "register_operand" "")
6331 (match_operand:V8HF 3 "register_operand" "")
6332 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6333 VFMLA16_LOW))]
6334 "TARGET_F16FML"
6335 {
6336 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6337 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6338
6339 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6340 operands[1],
6341 operands[2],
6342 operands[3],
6343 p1, lane));
6344 DONE;
6345 })
6346
6347 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6348 [(set (match_operand:V4SF 0 "register_operand" "")
6349 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6350 (match_operand:V8HF 2 "register_operand" "")
6351 (match_operand:V8HF 3 "register_operand" "")
6352 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6353 VFMLA16_HIGH))]
6354 "TARGET_F16FML"
6355 {
6356 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6357 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6358
6359 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6360 operands[1],
6361 operands[2],
6362 operands[3],
6363 p1, lane));
6364 DONE;
6365 })
6366
6367 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6368 [(set (match_operand:V4SF 0 "register_operand" "=w")
6369 (fma:V4SF
6370 (float_extend:V4SF
6371 (vec_select:V4HF
6372 (match_operand:V8HF 2 "register_operand" "w")
6373 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6374 (float_extend:V4SF
6375 (vec_duplicate:V4HF
6376 (vec_select:HF
6377 (match_operand:V8HF 3 "register_operand" "x")
6378 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6379 (match_operand:V4SF 1 "register_operand" "0")))]
6380 "TARGET_F16FML"
6381 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6382 [(set_attr "type" "neon_fp_mul_s")]
6383 )
6384
6385 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6386 [(set (match_operand:V4SF 0 "register_operand" "=w")
6387 (fma:V4SF
6388 (float_extend:V4SF
6389 (neg:V4HF
6390 (vec_select:V4HF
6391 (match_operand:V8HF 2 "register_operand" "w")
6392 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6393 (float_extend:V4SF
6394 (vec_duplicate:V4HF
6395 (vec_select:HF
6396 (match_operand:V8HF 3 "register_operand" "x")
6397 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6398 (match_operand:V4SF 1 "register_operand" "0")))]
6399 "TARGET_F16FML"
6400 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6401 [(set_attr "type" "neon_fp_mul_s")]
6402 )
6403
6404 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6405 [(set (match_operand:V4SF 0 "register_operand" "=w")
6406 (fma:V4SF
6407 (float_extend:V4SF
6408 (vec_select:V4HF
6409 (match_operand:V8HF 2 "register_operand" "w")
6410 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6411 (float_extend:V4SF
6412 (vec_duplicate:V4HF
6413 (vec_select:HF
6414 (match_operand:V8HF 3 "register_operand" "x")
6415 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6416 (match_operand:V4SF 1 "register_operand" "0")))]
6417 "TARGET_F16FML"
6418 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6419 [(set_attr "type" "neon_fp_mul_s")]
6420 )
6421
6422 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6423 [(set (match_operand:V4SF 0 "register_operand" "=w")
6424 (fma:V4SF
6425 (float_extend:V4SF
6426 (neg:V4HF
6427 (vec_select:V4HF
6428 (match_operand:V8HF 2 "register_operand" "w")
6429 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6430 (float_extend:V4SF
6431 (vec_duplicate:V4HF
6432 (vec_select:HF
6433 (match_operand:V8HF 3 "register_operand" "x")
6434 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6435 (match_operand:V4SF 1 "register_operand" "0")))]
6436 "TARGET_F16FML"
6437 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6438 [(set_attr "type" "neon_fp_mul_s")]
6439 )
6440
6441 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6442 [(set (match_operand:V2SF 0 "register_operand" "")
6443 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6444 (match_operand:V4HF 2 "register_operand" "")
6445 (match_operand:V8HF 3 "register_operand" "")
6446 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6447 VFMLA16_LOW))]
6448 "TARGET_F16FML"
6449 {
6450 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6451 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6452
6453 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6454 operands[1],
6455 operands[2],
6456 operands[3],
6457 p1, lane));
6458 DONE;
6459
6460 })
6461
6462 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6463 [(set (match_operand:V2SF 0 "register_operand" "")
6464 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6465 (match_operand:V4HF 2 "register_operand" "")
6466 (match_operand:V8HF 3 "register_operand" "")
6467 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6468 VFMLA16_HIGH))]
6469 "TARGET_F16FML"
6470 {
6471 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6472 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6473
6474 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6475 operands[1],
6476 operands[2],
6477 operands[3],
6478 p1, lane));
6479 DONE;
6480
6481 })
6482
6483 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6484 [(set (match_operand:V2SF 0 "register_operand" "=w")
6485 (fma:V2SF
6486 (float_extend:V2SF
6487 (vec_select:V2HF
6488 (match_operand:V4HF 2 "register_operand" "w")
6489 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6490 (float_extend:V2SF
6491 (vec_duplicate:V2HF
6492 (vec_select:HF
6493 (match_operand:V8HF 3 "register_operand" "x")
6494 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6495 (match_operand:V2SF 1 "register_operand" "0")))]
6496 "TARGET_F16FML"
6497 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6498 [(set_attr "type" "neon_fp_mul_s")]
6499 )
6500
6501 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6502 [(set (match_operand:V2SF 0 "register_operand" "=w")
6503 (fma:V2SF
6504 (float_extend:V2SF
6505 (neg:V2HF
6506 (vec_select:V2HF
6507 (match_operand:V4HF 2 "register_operand" "w")
6508 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6509 (float_extend:V2SF
6510 (vec_duplicate:V2HF
6511 (vec_select:HF
6512 (match_operand:V8HF 3 "register_operand" "x")
6513 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6514 (match_operand:V2SF 1 "register_operand" "0")))]
6515 "TARGET_F16FML"
6516 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6517 [(set_attr "type" "neon_fp_mul_s")]
6518 )
6519
6520 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6521 [(set (match_operand:V2SF 0 "register_operand" "=w")
6522 (fma:V2SF
6523 (float_extend:V2SF
6524 (vec_select:V2HF
6525 (match_operand:V4HF 2 "register_operand" "w")
6526 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6527 (float_extend:V2SF
6528 (vec_duplicate:V2HF
6529 (vec_select:HF
6530 (match_operand:V8HF 3 "register_operand" "x")
6531 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6532 (match_operand:V2SF 1 "register_operand" "0")))]
6533 "TARGET_F16FML"
6534 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6535 [(set_attr "type" "neon_fp_mul_s")]
6536 )
6537
6538 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6539 [(set (match_operand:V2SF 0 "register_operand" "=w")
6540 (fma:V2SF
6541 (float_extend:V2SF
6542 (neg:V2HF
6543 (vec_select:V2HF
6544 (match_operand:V4HF 2 "register_operand" "w")
6545 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6546 (float_extend:V2SF
6547 (vec_duplicate:V2HF
6548 (vec_select:HF
6549 (match_operand:V8HF 3 "register_operand" "x")
6550 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6551 (match_operand:V2SF 1 "register_operand" "0")))]
6552 "TARGET_F16FML"
6553 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6554 [(set_attr "type" "neon_fp_mul_s")]
6555 )
6556
6557 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6558 [(set (match_operand:V4SF 0 "register_operand" "")
6559 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6560 (match_operand:V8HF 2 "register_operand" "")
6561 (match_operand:V4HF 3 "register_operand" "")
6562 (match_operand:SI 4 "aarch64_imm2" "")]
6563 VFMLA16_LOW))]
6564 "TARGET_F16FML"
6565 {
6566 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6567 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6568
6569 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6570 operands[1],
6571 operands[2],
6572 operands[3],
6573 p1, lane));
6574 DONE;
6575 })
6576
6577 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6578 [(set (match_operand:V4SF 0 "register_operand" "")
6579 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6580 (match_operand:V8HF 2 "register_operand" "")
6581 (match_operand:V4HF 3 "register_operand" "")
6582 (match_operand:SI 4 "aarch64_imm2" "")]
6583 VFMLA16_HIGH))]
6584 "TARGET_F16FML"
6585 {
6586 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6587 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6588
6589 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6590 operands[1],
6591 operands[2],
6592 operands[3],
6593 p1, lane));
6594 DONE;
6595 })
6596
6597 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6598 [(set (match_operand:V4SF 0 "register_operand" "=w")
6599 (fma:V4SF
6600 (float_extend:V4SF
6601 (vec_select:V4HF
6602 (match_operand:V8HF 2 "register_operand" "w")
6603 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6604 (float_extend:V4SF
6605 (vec_duplicate:V4HF
6606 (vec_select:HF
6607 (match_operand:V4HF 3 "register_operand" "x")
6608 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6609 (match_operand:V4SF 1 "register_operand" "0")))]
6610 "TARGET_F16FML"
6611 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6612 [(set_attr "type" "neon_fp_mul_s")]
6613 )
6614
6615 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6616 [(set (match_operand:V4SF 0 "register_operand" "=w")
6617 (fma:V4SF
6618 (float_extend:V4SF
6619 (neg:V4HF
6620 (vec_select:V4HF
6621 (match_operand:V8HF 2 "register_operand" "w")
6622 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6623 (float_extend:V4SF
6624 (vec_duplicate:V4HF
6625 (vec_select:HF
6626 (match_operand:V4HF 3 "register_operand" "x")
6627 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6628 (match_operand:V4SF 1 "register_operand" "0")))]
6629 "TARGET_F16FML"
6630 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6631 [(set_attr "type" "neon_fp_mul_s")]
6632 )
6633
6634 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6635 [(set (match_operand:V4SF 0 "register_operand" "=w")
6636 (fma:V4SF
6637 (float_extend:V4SF
6638 (vec_select:V4HF
6639 (match_operand:V8HF 2 "register_operand" "w")
6640 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6641 (float_extend:V4SF
6642 (vec_duplicate:V4HF
6643 (vec_select:HF
6644 (match_operand:V4HF 3 "register_operand" "x")
6645 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6646 (match_operand:V4SF 1 "register_operand" "0")))]
6647 "TARGET_F16FML"
6648 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6649 [(set_attr "type" "neon_fp_mul_s")]
6650 )
6651
6652 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6653 [(set (match_operand:V4SF 0 "register_operand" "=w")
6654 (fma:V4SF
6655 (float_extend:V4SF
6656 (neg:V4HF
6657 (vec_select:V4HF
6658 (match_operand:V8HF 2 "register_operand" "w")
6659 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6660 (float_extend:V4SF
6661 (vec_duplicate:V4HF
6662 (vec_select:HF
6663 (match_operand:V4HF 3 "register_operand" "x")
6664 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6665 (match_operand:V4SF 1 "register_operand" "0")))]
6666 "TARGET_F16FML"
6667 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6668 [(set_attr "type" "neon_fp_mul_s")]
6669 )
6670
6671 ;; pmull
6672
6673 (define_insn "aarch64_crypto_pmulldi"
6674 [(set (match_operand:TI 0 "register_operand" "=w")
6675 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6676 (match_operand:DI 2 "register_operand" "w")]
6677 UNSPEC_PMULL))]
6678 "TARGET_SIMD && TARGET_AES"
6679 "pmull\\t%0.1q, %1.1d, %2.1d"
6680 [(set_attr "type" "crypto_pmull")]
6681 )
6682
6683 (define_insn "aarch64_crypto_pmullv2di"
6684 [(set (match_operand:TI 0 "register_operand" "=w")
6685 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6686 (match_operand:V2DI 2 "register_operand" "w")]
6687 UNSPEC_PMULL2))]
6688 "TARGET_SIMD && TARGET_AES"
6689 "pmull2\\t%0.1q, %1.2d, %2.2d"
6690 [(set_attr "type" "crypto_pmull")]
6691 )