Accept code attributes as rtx codes in .md files
[gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2019 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 /* Force the operand into a register if it is not an
27 immediate whose use can be replaced with xzr.
28 If the mode is 16 bytes wide, then we will be doing
29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 "
39 )
40
41 (define_expand "movmisalign<mode>"
42 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
43 (match_operand:VALL 1 "general_operand" ""))]
44 "TARGET_SIMD"
45 {
46 /* This pattern is not permitted to fail during expansion: if both arguments
47 are non-registers (e.g. memory := constant, which can be created by the
48 auto-vectorizer), force operand 1 into a register. */
49 if (!register_operand (operands[0], <MODE>mode)
50 && !register_operand (operands[1], <MODE>mode))
51 operands[1] = force_reg (<MODE>mode, operands[1]);
52 })
53
54 (define_insn "aarch64_simd_dup<mode>"
55 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
56 (vec_duplicate:VDQ_I
57 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
58 "TARGET_SIMD"
59 "@
60 dup\\t%0.<Vtype>, %1.<Vetype>[0]
61 dup\\t%0.<Vtype>, %<vw>1"
62 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
63 )
64
65 (define_insn "aarch64_simd_dup<mode>"
66 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
67 (vec_duplicate:VDQF_F16
68 (match_operand:<VEL> 1 "register_operand" "w")))]
69 "TARGET_SIMD"
70 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
71 [(set_attr "type" "neon_dup<q>")]
72 )
73
74 (define_insn "aarch64_dup_lane<mode>"
75 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
76 (vec_duplicate:VALL_F16
77 (vec_select:<VEL>
78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))]
81 "TARGET_SIMD"
82 {
83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 }
86 [(set_attr "type" "neon_dup<q>")]
87 )
88
89 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
90 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
91 (vec_duplicate:VALL_F16_NO_V2Q
92 (vec_select:<VEL>
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))]
96 "TARGET_SIMD"
97 {
98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
100 }
101 [(set_attr "type" "neon_dup<q>")]
102 )
103
104 (define_insn "*aarch64_simd_mov<VD:mode>"
105 [(set (match_operand:VD 0 "nonimmediate_operand"
106 "=w, m, m, w, ?r, ?w, ?r, w")
107 (match_operand:VD 1 "general_operand"
108 "m, Dz, w, w, w, r, r, Dn"))]
109 "TARGET_SIMD
110 && (register_operand (operands[0], <MODE>mode)
111 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
112 {
113 switch (which_alternative)
114 {
115 case 0: return "ldr\t%d0, %1";
116 case 1: return "str\txzr, %0";
117 case 2: return "str\t%d1, %0";
118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
119 case 4: return "umov\t%0, %1.d[0]";
120 case 5: return "fmov\t%d0, %1";
121 case 6: return "mov\t%0, %1";
122 case 7:
123 return aarch64_output_simd_mov_immediate (operands[1], 64);
124 default: gcc_unreachable ();
125 }
126 }
127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
129 mov_reg, neon_move<q>")]
130 )
131
132 (define_insn "*aarch64_simd_mov<VQ:mode>"
133 [(set (match_operand:VQ 0 "nonimmediate_operand"
134 "=w, Umn, m, w, ?r, ?w, ?r, w")
135 (match_operand:VQ 1 "general_operand"
136 "m, Dz, w, w, w, r, r, Dn"))]
137 "TARGET_SIMD
138 && (register_operand (operands[0], <MODE>mode)
139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
140 {
141 switch (which_alternative)
142 {
143 case 0:
144 return "ldr\t%q0, %1";
145 case 1:
146 return "stp\txzr, xzr, %0";
147 case 2:
148 return "str\t%q1, %0";
149 case 3:
150 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
151 case 4:
152 case 5:
153 case 6:
154 return "#";
155 case 7:
156 return aarch64_output_simd_mov_immediate (operands[1], 128);
157 default:
158 gcc_unreachable ();
159 }
160 }
161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
162 neon_logic<q>, multiple, multiple,\
163 multiple, neon_move<q>")
164 (set_attr "length" "4,4,4,4,8,8,8,4")]
165 )
166
167 ;; When storing lane zero we can use the normal STR and its more permissive
168 ;; addressing modes.
169
170 (define_insn "aarch64_store_lane0<mode>"
171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
174 "TARGET_SIMD
175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
176 "str\\t%<Vetype>1, %0"
177 [(set_attr "type" "neon_store1_1reg<q>")]
178 )
179
180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
181 [(set (match_operand:DREG 0 "register_operand" "=w")
182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
183 (set (match_operand:DREG2 2 "register_operand" "=w")
184 (match_operand:DREG2 3 "memory_operand" "m"))]
185 "TARGET_SIMD
186 && rtx_equal_p (XEXP (operands[3], 0),
187 plus_constant (Pmode,
188 XEXP (operands[1], 0),
189 GET_MODE_SIZE (<DREG:MODE>mode)))"
190 "ldp\\t%d0, %d2, %1"
191 [(set_attr "type" "neon_ldp")]
192 )
193
194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
196 (match_operand:DREG 1 "register_operand" "w"))
197 (set (match_operand:DREG2 2 "memory_operand" "=m")
198 (match_operand:DREG2 3 "register_operand" "w"))]
199 "TARGET_SIMD
200 && rtx_equal_p (XEXP (operands[2], 0),
201 plus_constant (Pmode,
202 XEXP (operands[0], 0),
203 GET_MODE_SIZE (<DREG:MODE>mode)))"
204 "stp\\t%d1, %d3, %0"
205 [(set_attr "type" "neon_stp")]
206 )
207
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
213 "TARGET_SIMD
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
218 "ldp\\t%q0, %q2, %1"
219 [(set_attr "type" "neon_ldp_q")]
220 )
221
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
231 "stp\\t%q1, %q3, %0"
232 [(set_attr "type" "neon_stp_q")]
233 )
234
235
236 (define_split
237 [(set (match_operand:VQ 0 "register_operand" "")
238 (match_operand:VQ 1 "register_operand" ""))]
239 "TARGET_SIMD && reload_completed
240 && GP_REGNUM_P (REGNO (operands[0]))
241 && GP_REGNUM_P (REGNO (operands[1]))"
242 [(const_int 0)]
243 {
244 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
245 DONE;
246 })
247
248 (define_split
249 [(set (match_operand:VQ 0 "register_operand" "")
250 (match_operand:VQ 1 "register_operand" ""))]
251 "TARGET_SIMD && reload_completed
252 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
253 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
254 [(const_int 0)]
255 {
256 aarch64_split_simd_move (operands[0], operands[1]);
257 DONE;
258 })
259
260 (define_expand "@aarch64_split_simd_mov<mode>"
261 [(set (match_operand:VQ 0)
262 (match_operand:VQ 1))]
263 "TARGET_SIMD"
264 {
265 rtx dst = operands[0];
266 rtx src = operands[1];
267
268 if (GP_REGNUM_P (REGNO (src)))
269 {
270 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
271 rtx src_high_part = gen_highpart (<VHALF>mode, src);
272
273 emit_insn
274 (gen_move_lo_quad_<mode> (dst, src_low_part));
275 emit_insn
276 (gen_move_hi_quad_<mode> (dst, src_high_part));
277 }
278
279 else
280 {
281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
285
286 emit_insn
287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
288 emit_insn
289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
290 }
291 DONE;
292 }
293 )
294
295 (define_insn "aarch64_simd_mov_from_<mode>low"
296 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
297 (vec_select:<VHALF>
298 (match_operand:VQ 1 "register_operand" "w")
299 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
300 "TARGET_SIMD && reload_completed"
301 "umov\t%0, %1.d[0]"
302 [(set_attr "type" "neon_to_gp<q>")
303 (set_attr "length" "4")
304 ])
305
306 (define_insn "aarch64_simd_mov_from_<mode>high"
307 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
308 (vec_select:<VHALF>
309 (match_operand:VQ 1 "register_operand" "w")
310 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
311 "TARGET_SIMD && reload_completed"
312 "umov\t%0, %1.d[1]"
313 [(set_attr "type" "neon_to_gp<q>")
314 (set_attr "length" "4")
315 ])
316
317 (define_insn "orn<mode>3"
318 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
319 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
320 (match_operand:VDQ_I 2 "register_operand" "w")))]
321 "TARGET_SIMD"
322 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
323 [(set_attr "type" "neon_logic<q>")]
324 )
325
326 (define_insn "bic<mode>3"
327 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
328 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
329 (match_operand:VDQ_I 2 "register_operand" "w")))]
330 "TARGET_SIMD"
331 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
332 [(set_attr "type" "neon_logic<q>")]
333 )
334
335 (define_insn "add<mode>3"
336 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
337 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
338 (match_operand:VDQ_I 2 "register_operand" "w")))]
339 "TARGET_SIMD"
340 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
341 [(set_attr "type" "neon_add<q>")]
342 )
343
344 (define_insn "sub<mode>3"
345 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
346 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
347 (match_operand:VDQ_I 2 "register_operand" "w")))]
348 "TARGET_SIMD"
349 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
350 [(set_attr "type" "neon_sub<q>")]
351 )
352
353 (define_insn "mul<mode>3"
354 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
355 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
356 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
357 "TARGET_SIMD"
358 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
359 [(set_attr "type" "neon_mul_<Vetype><q>")]
360 )
361
362 (define_insn "bswap<mode>2"
363 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
364 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
365 "TARGET_SIMD"
366 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
367 [(set_attr "type" "neon_rev<q>")]
368 )
369
370 (define_insn "aarch64_rbit<mode>"
371 [(set (match_operand:VB 0 "register_operand" "=w")
372 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
373 UNSPEC_RBIT))]
374 "TARGET_SIMD"
375 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
376 [(set_attr "type" "neon_rbit")]
377 )
378
379 (define_expand "ctz<mode>2"
380 [(set (match_operand:VS 0 "register_operand")
381 (ctz:VS (match_operand:VS 1 "register_operand")))]
382 "TARGET_SIMD"
383 {
384 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
385 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
386 <MODE>mode, 0);
387 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
388 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
389 DONE;
390 }
391 )
392
393 (define_expand "xorsign<mode>3"
394 [(match_operand:VHSDF 0 "register_operand")
395 (match_operand:VHSDF 1 "register_operand")
396 (match_operand:VHSDF 2 "register_operand")]
397 "TARGET_SIMD"
398 {
399
400 machine_mode imode = <V_INT_EQUIV>mode;
401 rtx v_bitmask = gen_reg_rtx (imode);
402 rtx op1x = gen_reg_rtx (imode);
403 rtx op2x = gen_reg_rtx (imode);
404
405 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
406 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
407
408 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
409
410 emit_move_insn (v_bitmask,
411 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
412 HOST_WIDE_INT_M1U << bits));
413
414 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
415 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
416 emit_move_insn (operands[0],
417 lowpart_subreg (<MODE>mode, op1x, imode));
418 DONE;
419 }
420 )
421
422 ;; The fcadd and fcmla patterns are made UNSPEC for the explicitly due to the
423 ;; fact that their usage need to guarantee that the source vectors are
424 ;; contiguous. It would be wrong to describe the operation without being able
425 ;; to describe the permute that is also required, but even if that is done
426 ;; the permute would have been created as a LOAD_LANES which means the values
427 ;; in the registers are in the wrong order.
428 (define_insn "aarch64_fcadd<rot><mode>"
429 [(set (match_operand:VHSDF 0 "register_operand" "=w")
430 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
431 (match_operand:VHSDF 2 "register_operand" "w")]
432 FCADD))]
433 "TARGET_COMPLEX"
434 "fcadd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>, #<rot>"
435 [(set_attr "type" "neon_fcadd")]
436 )
437
438 (define_insn "aarch64_fcmla<rot><mode>"
439 [(set (match_operand:VHSDF 0 "register_operand" "=w")
440 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
441 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
442 (match_operand:VHSDF 3 "register_operand" "w")]
443 FCMLA)))]
444 "TARGET_COMPLEX"
445 "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>, #<rot>"
446 [(set_attr "type" "neon_fcmla")]
447 )
448
449
450 (define_insn "aarch64_fcmla_lane<rot><mode>"
451 [(set (match_operand:VHSDF 0 "register_operand" "=w")
452 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "0")
453 (unspec:VHSDF [(match_operand:VHSDF 2 "register_operand" "w")
454 (match_operand:VHSDF 3 "register_operand" "w")
455 (match_operand:SI 4 "const_int_operand" "n")]
456 FCMLA)))]
457 "TARGET_COMPLEX"
458 {
459 operands[4] = aarch64_endian_lane_rtx (<VHALF>mode, INTVAL (operands[4]));
460 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
461 }
462 [(set_attr "type" "neon_fcmla")]
463 )
464
465 (define_insn "aarch64_fcmla_laneq<rot>v4hf"
466 [(set (match_operand:V4HF 0 "register_operand" "=w")
467 (plus:V4HF (match_operand:V4HF 1 "register_operand" "0")
468 (unspec:V4HF [(match_operand:V4HF 2 "register_operand" "w")
469 (match_operand:V8HF 3 "register_operand" "w")
470 (match_operand:SI 4 "const_int_operand" "n")]
471 FCMLA)))]
472 "TARGET_COMPLEX"
473 {
474 operands[4] = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
475 return "fcmla\t%0.4h, %2.4h, %3.h[%4], #<rot>";
476 }
477 [(set_attr "type" "neon_fcmla")]
478 )
479
480 (define_insn "aarch64_fcmlaq_lane<rot><mode>"
481 [(set (match_operand:VQ_HSF 0 "register_operand" "=w")
482 (plus:VQ_HSF (match_operand:VQ_HSF 1 "register_operand" "0")
483 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "register_operand" "w")
484 (match_operand:<VHALF> 3 "register_operand" "w")
485 (match_operand:SI 4 "const_int_operand" "n")]
486 FCMLA)))]
487 "TARGET_COMPLEX"
488 {
489 int nunits = GET_MODE_NUNITS (<VHALF>mode).to_constant ();
490 operands[4]
491 = gen_int_mode (ENDIAN_LANE_N (nunits / 2, INTVAL (operands[4])), SImode);
492 return "fcmla\t%0.<Vtype>, %2.<Vtype>, %3.<FCMLA_maybe_lane>, #<rot>";
493 }
494 [(set_attr "type" "neon_fcmla")]
495 )
496
497 ;; These instructions map to the __builtins for the Dot Product operations.
498 (define_insn "aarch64_<sur>dot<vsi2qi>"
499 [(set (match_operand:VS 0 "register_operand" "=w")
500 (plus:VS (match_operand:VS 1 "register_operand" "0")
501 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
502 (match_operand:<VSI2QI> 3 "register_operand" "w")]
503 DOTPROD)))]
504 "TARGET_DOTPROD"
505 "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.<Vdottype>"
506 [(set_attr "type" "neon_dot<q>")]
507 )
508
509 ;; These expands map to the Dot Product optab the vectorizer checks for.
510 ;; The auto-vectorizer expects a dot product builtin that also does an
511 ;; accumulation into the provided register.
512 ;; Given the following pattern
513 ;;
514 ;; for (i=0; i<len; i++) {
515 ;; c = a[i] * b[i];
516 ;; r += c;
517 ;; }
518 ;; return result;
519 ;;
520 ;; This can be auto-vectorized to
521 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
522 ;;
523 ;; given enough iterations. However the vectorizer can keep unrolling the loop
524 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
525 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
526 ;; ...
527 ;;
528 ;; and so the vectorizer provides r, in which the result has to be accumulated.
529 (define_expand "<sur>dot_prod<vsi2qi>"
530 [(set (match_operand:VS 0 "register_operand")
531 (plus:VS (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand")
532 (match_operand:<VSI2QI> 2 "register_operand")]
533 DOTPROD)
534 (match_operand:VS 3 "register_operand")))]
535 "TARGET_DOTPROD"
536 {
537 emit_insn (
538 gen_aarch64_<sur>dot<vsi2qi> (operands[3], operands[3], operands[1],
539 operands[2]));
540 emit_insn (gen_rtx_SET (operands[0], operands[3]));
541 DONE;
542 })
543
544 ;; These instructions map to the __builtins for the Dot Product
545 ;; indexed operations.
546 (define_insn "aarch64_<sur>dot_lane<vsi2qi>"
547 [(set (match_operand:VS 0 "register_operand" "=w")
548 (plus:VS (match_operand:VS 1 "register_operand" "0")
549 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
550 (match_operand:V8QI 3 "register_operand" "<h_con>")
551 (match_operand:SI 4 "immediate_operand" "i")]
552 DOTPROD)))]
553 "TARGET_DOTPROD"
554 {
555 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
556 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
557 }
558 [(set_attr "type" "neon_dot<q>")]
559 )
560
561 (define_insn "aarch64_<sur>dot_laneq<vsi2qi>"
562 [(set (match_operand:VS 0 "register_operand" "=w")
563 (plus:VS (match_operand:VS 1 "register_operand" "0")
564 (unspec:VS [(match_operand:<VSI2QI> 2 "register_operand" "w")
565 (match_operand:V16QI 3 "register_operand" "<h_con>")
566 (match_operand:SI 4 "immediate_operand" "i")]
567 DOTPROD)))]
568 "TARGET_DOTPROD"
569 {
570 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
571 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
572 }
573 [(set_attr "type" "neon_dot<q>")]
574 )
575
576 (define_expand "copysign<mode>3"
577 [(match_operand:VHSDF 0 "register_operand")
578 (match_operand:VHSDF 1 "register_operand")
579 (match_operand:VHSDF 2 "register_operand")]
580 "TARGET_FLOAT && TARGET_SIMD"
581 {
582 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
583 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
584
585 emit_move_insn (v_bitmask,
586 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
587 HOST_WIDE_INT_M1U << bits));
588 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
589 operands[2], operands[1]));
590 DONE;
591 }
592 )
593
594 (define_insn "*aarch64_mul3_elt<mode>"
595 [(set (match_operand:VMUL 0 "register_operand" "=w")
596 (mult:VMUL
597 (vec_duplicate:VMUL
598 (vec_select:<VEL>
599 (match_operand:VMUL 1 "register_operand" "<h_con>")
600 (parallel [(match_operand:SI 2 "immediate_operand")])))
601 (match_operand:VMUL 3 "register_operand" "w")))]
602 "TARGET_SIMD"
603 {
604 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
605 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
606 }
607 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
608 )
609
610 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
611 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
612 (mult:VMUL_CHANGE_NLANES
613 (vec_duplicate:VMUL_CHANGE_NLANES
614 (vec_select:<VEL>
615 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
616 (parallel [(match_operand:SI 2 "immediate_operand")])))
617 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
618 "TARGET_SIMD"
619 {
620 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
621 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
622 }
623 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
624 )
625
626 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
627 [(set (match_operand:VMUL 0 "register_operand" "=w")
628 (mult:VMUL
629 (vec_duplicate:VMUL
630 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
631 (match_operand:VMUL 2 "register_operand" "w")))]
632 "TARGET_SIMD"
633 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
634 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
635 )
636
637 (define_insn "@aarch64_rsqrte<mode>"
638 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
639 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
640 UNSPEC_RSQRTE))]
641 "TARGET_SIMD"
642 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
643 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
644
645 (define_insn "@aarch64_rsqrts<mode>"
646 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
647 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
648 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
649 UNSPEC_RSQRTS))]
650 "TARGET_SIMD"
651 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
652 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
653
654 (define_expand "rsqrt<mode>2"
655 [(set (match_operand:VALLF 0 "register_operand" "=w")
656 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
657 UNSPEC_RSQRT))]
658 "TARGET_SIMD"
659 {
660 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
661 DONE;
662 })
663
664 (define_insn "*aarch64_mul3_elt_to_64v2df"
665 [(set (match_operand:DF 0 "register_operand" "=w")
666 (mult:DF
667 (vec_select:DF
668 (match_operand:V2DF 1 "register_operand" "w")
669 (parallel [(match_operand:SI 2 "immediate_operand")]))
670 (match_operand:DF 3 "register_operand" "w")))]
671 "TARGET_SIMD"
672 {
673 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
674 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
675 }
676 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
677 )
678
679 (define_insn "neg<mode>2"
680 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
681 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
682 "TARGET_SIMD"
683 "neg\t%0.<Vtype>, %1.<Vtype>"
684 [(set_attr "type" "neon_neg<q>")]
685 )
686
687 (define_insn "abs<mode>2"
688 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
689 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
690 "TARGET_SIMD"
691 "abs\t%0.<Vtype>, %1.<Vtype>"
692 [(set_attr "type" "neon_abs<q>")]
693 )
694
695 ;; The intrinsic version of integer ABS must not be allowed to
696 ;; combine with any operation with an integerated ABS step, such
697 ;; as SABD.
698 (define_insn "aarch64_abs<mode>"
699 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
700 (unspec:VSDQ_I_DI
701 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
702 UNSPEC_ABS))]
703 "TARGET_SIMD"
704 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
705 [(set_attr "type" "neon_abs<q>")]
706 )
707
708 ;; It's tempting to represent SABD as ABS (MINUS op1 op2).
709 ;; This isn't accurate as ABS treats always its input as a signed value.
710 ;; So (ABS:QI (minus:QI 64 -128)) == (ABS:QI (192 or -64 signed)) == 64.
711 ;; Whereas SABD would return 192 (-64 signed) on the above example.
712 ;; Use MINUS ([us]max (op1, op2), [us]min (op1, op2)) instead.
713 (define_insn "*aarch64_<su>abd<mode>_3"
714 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
715 (minus:VDQ_BHSI
716 (USMAX:VDQ_BHSI
717 (match_operand:VDQ_BHSI 1 "register_operand" "w")
718 (match_operand:VDQ_BHSI 2 "register_operand" "w"))
719 (<max_opp>:VDQ_BHSI
720 (match_dup 1)
721 (match_dup 2))))]
722 "TARGET_SIMD"
723 "<su>abd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
724 [(set_attr "type" "neon_abd<q>")]
725 )
726
727 (define_insn "aarch64_<sur>abdl2<mode>_3"
728 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
729 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
730 (match_operand:VDQV_S 2 "register_operand" "w")]
731 ABDL2))]
732 "TARGET_SIMD"
733 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
734 [(set_attr "type" "neon_abd<q>")]
735 )
736
737 (define_insn "aarch64_<sur>abal<mode>_4"
738 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
739 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
740 (match_operand:VDQV_S 2 "register_operand" "w")
741 (match_operand:<VDBLW> 3 "register_operand" "0")]
742 ABAL))]
743 "TARGET_SIMD"
744 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
745 [(set_attr "type" "neon_arith_acc<q>")]
746 )
747
748 (define_insn "aarch64_<sur>adalp<mode>_3"
749 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
750 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
751 (match_operand:<VDBLW> 2 "register_operand" "0")]
752 ADALP))]
753 "TARGET_SIMD"
754 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
755 [(set_attr "type" "neon_reduc_add<q>")]
756 )
757
758 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
759 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
760 ;; reduction of the difference into a V4SI vector and accumulate that into
761 ;; operand 3 before copying that into the result operand 0.
762 ;; Perform that with a sequence of:
763 ;; UABDL2 tmp.8h, op1.16b, op2.16b
764 ;; UABAL tmp.8h, op1.16b, op2.16b
765 ;; UADALP op3.4s, tmp.8h
766 ;; MOV op0, op3 // should be eliminated in later passes.
767 ;; The signed version just uses the signed variants of the above instructions.
768
769 (define_expand "<sur>sadv16qi"
770 [(use (match_operand:V4SI 0 "register_operand"))
771 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
772 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
773 (use (match_operand:V4SI 3 "register_operand"))]
774 "TARGET_SIMD"
775 {
776 rtx reduc = gen_reg_rtx (V8HImode);
777 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
778 operands[2]));
779 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
780 operands[2], reduc));
781 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
782 operands[3]));
783 emit_move_insn (operands[0], operands[3]);
784 DONE;
785 }
786 )
787
788 (define_insn "aba<mode>_3"
789 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
790 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
791 (match_operand:VDQ_BHSI 1 "register_operand" "w")
792 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
793 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
794 "TARGET_SIMD"
795 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
796 [(set_attr "type" "neon_arith_acc<q>")]
797 )
798
799 (define_insn "fabd<mode>3"
800 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
801 (abs:VHSDF_HSDF
802 (minus:VHSDF_HSDF
803 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
804 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
805 "TARGET_SIMD"
806 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
807 [(set_attr "type" "neon_fp_abd_<stype><q>")]
808 )
809
810 ;; For AND (vector, register) and BIC (vector, immediate)
811 (define_insn "and<mode>3"
812 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
813 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
814 (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm" "w,Db")))]
815 "TARGET_SIMD"
816 {
817 switch (which_alternative)
818 {
819 case 0:
820 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
821 case 1:
822 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
823 AARCH64_CHECK_BIC);
824 default:
825 gcc_unreachable ();
826 }
827 }
828 [(set_attr "type" "neon_logic<q>")]
829 )
830
831 ;; For ORR (vector, register) and ORR (vector, immediate)
832 (define_insn "ior<mode>3"
833 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w")
834 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w,0")
835 (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm" "w,Do")))]
836 "TARGET_SIMD"
837 {
838 switch (which_alternative)
839 {
840 case 0:
841 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
842 case 1:
843 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
844 AARCH64_CHECK_ORR);
845 default:
846 gcc_unreachable ();
847 }
848 }
849 [(set_attr "type" "neon_logic<q>")]
850 )
851
852 (define_insn "xor<mode>3"
853 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
854 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
855 (match_operand:VDQ_I 2 "register_operand" "w")))]
856 "TARGET_SIMD"
857 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
858 [(set_attr "type" "neon_logic<q>")]
859 )
860
861 (define_insn "one_cmpl<mode>2"
862 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
863 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
864 "TARGET_SIMD"
865 "not\t%0.<Vbtype>, %1.<Vbtype>"
866 [(set_attr "type" "neon_logic<q>")]
867 )
868
869 (define_insn "aarch64_simd_vec_set<mode>"
870 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
871 (vec_merge:VALL_F16
872 (vec_duplicate:VALL_F16
873 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
874 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
875 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
876 "TARGET_SIMD"
877 {
878 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
879 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
880 switch (which_alternative)
881 {
882 case 0:
883 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
884 case 1:
885 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
886 case 2:
887 return "ld1\\t{%0.<Vetype>}[%p2], %1";
888 default:
889 gcc_unreachable ();
890 }
891 }
892 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
893 )
894
895 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
896 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
897 (vec_merge:VALL_F16
898 (vec_duplicate:VALL_F16
899 (vec_select:<VEL>
900 (match_operand:VALL_F16 3 "register_operand" "w")
901 (parallel
902 [(match_operand:SI 4 "immediate_operand" "i")])))
903 (match_operand:VALL_F16 1 "register_operand" "0")
904 (match_operand:SI 2 "immediate_operand" "i")))]
905 "TARGET_SIMD"
906 {
907 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
908 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
909 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
910
911 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
912 }
913 [(set_attr "type" "neon_ins<q>")]
914 )
915
916 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
917 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
918 (vec_merge:VALL_F16_NO_V2Q
919 (vec_duplicate:VALL_F16_NO_V2Q
920 (vec_select:<VEL>
921 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
922 (parallel
923 [(match_operand:SI 4 "immediate_operand" "i")])))
924 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
925 (match_operand:SI 2 "immediate_operand" "i")))]
926 "TARGET_SIMD"
927 {
928 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
929 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
930 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
931 INTVAL (operands[4]));
932
933 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
934 }
935 [(set_attr "type" "neon_ins<q>")]
936 )
937
938 (define_insn "aarch64_simd_lshr<mode>"
939 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
940 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
941 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
942 "TARGET_SIMD"
943 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
944 [(set_attr "type" "neon_shift_imm<q>")]
945 )
946
947 (define_insn "aarch64_simd_ashr<mode>"
948 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
949 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
950 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
951 "TARGET_SIMD"
952 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
953 [(set_attr "type" "neon_shift_imm<q>")]
954 )
955
956 (define_insn "aarch64_simd_imm_shl<mode>"
957 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
958 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
959 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
960 "TARGET_SIMD"
961 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
962 [(set_attr "type" "neon_shift_imm<q>")]
963 )
964
965 (define_insn "aarch64_simd_reg_sshl<mode>"
966 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
967 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
968 (match_operand:VDQ_I 2 "register_operand" "w")))]
969 "TARGET_SIMD"
970 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
971 [(set_attr "type" "neon_shift_reg<q>")]
972 )
973
974 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
975 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
976 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
977 (match_operand:VDQ_I 2 "register_operand" "w")]
978 UNSPEC_ASHIFT_UNSIGNED))]
979 "TARGET_SIMD"
980 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
981 [(set_attr "type" "neon_shift_reg<q>")]
982 )
983
984 (define_insn "aarch64_simd_reg_shl<mode>_signed"
985 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
986 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
987 (match_operand:VDQ_I 2 "register_operand" "w")]
988 UNSPEC_ASHIFT_SIGNED))]
989 "TARGET_SIMD"
990 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
991 [(set_attr "type" "neon_shift_reg<q>")]
992 )
993
994 (define_expand "ashl<mode>3"
995 [(match_operand:VDQ_I 0 "register_operand" "")
996 (match_operand:VDQ_I 1 "register_operand" "")
997 (match_operand:SI 2 "general_operand" "")]
998 "TARGET_SIMD"
999 {
1000 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1001 int shift_amount;
1002
1003 if (CONST_INT_P (operands[2]))
1004 {
1005 shift_amount = INTVAL (operands[2]);
1006 if (shift_amount >= 0 && shift_amount < bit_width)
1007 {
1008 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1009 shift_amount);
1010 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
1011 operands[1],
1012 tmp));
1013 DONE;
1014 }
1015 else
1016 {
1017 operands[2] = force_reg (SImode, operands[2]);
1018 }
1019 }
1020 else if (MEM_P (operands[2]))
1021 {
1022 operands[2] = force_reg (SImode, operands[2]);
1023 }
1024
1025 if (REG_P (operands[2]))
1026 {
1027 rtx tmp = gen_reg_rtx (<MODE>mode);
1028 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
1029 convert_to_mode (<VEL>mode,
1030 operands[2],
1031 0)));
1032 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1033 tmp));
1034 DONE;
1035 }
1036 else
1037 FAIL;
1038 }
1039 )
1040
1041 (define_expand "lshr<mode>3"
1042 [(match_operand:VDQ_I 0 "register_operand" "")
1043 (match_operand:VDQ_I 1 "register_operand" "")
1044 (match_operand:SI 2 "general_operand" "")]
1045 "TARGET_SIMD"
1046 {
1047 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1048 int shift_amount;
1049
1050 if (CONST_INT_P (operands[2]))
1051 {
1052 shift_amount = INTVAL (operands[2]);
1053 if (shift_amount > 0 && shift_amount <= bit_width)
1054 {
1055 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1056 shift_amount);
1057 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
1058 operands[1],
1059 tmp));
1060 DONE;
1061 }
1062 else
1063 operands[2] = force_reg (SImode, operands[2]);
1064 }
1065 else if (MEM_P (operands[2]))
1066 {
1067 operands[2] = force_reg (SImode, operands[2]);
1068 }
1069
1070 if (REG_P (operands[2]))
1071 {
1072 rtx tmp = gen_reg_rtx (SImode);
1073 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1074 emit_insn (gen_negsi2 (tmp, operands[2]));
1075 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1076 convert_to_mode (<VEL>mode,
1077 tmp, 0)));
1078 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
1079 operands[1],
1080 tmp1));
1081 DONE;
1082 }
1083 else
1084 FAIL;
1085 }
1086 )
1087
1088 (define_expand "ashr<mode>3"
1089 [(match_operand:VDQ_I 0 "register_operand" "")
1090 (match_operand:VDQ_I 1 "register_operand" "")
1091 (match_operand:SI 2 "general_operand" "")]
1092 "TARGET_SIMD"
1093 {
1094 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
1095 int shift_amount;
1096
1097 if (CONST_INT_P (operands[2]))
1098 {
1099 shift_amount = INTVAL (operands[2]);
1100 if (shift_amount > 0 && shift_amount <= bit_width)
1101 {
1102 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
1103 shift_amount);
1104 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
1105 operands[1],
1106 tmp));
1107 DONE;
1108 }
1109 else
1110 operands[2] = force_reg (SImode, operands[2]);
1111 }
1112 else if (MEM_P (operands[2]))
1113 {
1114 operands[2] = force_reg (SImode, operands[2]);
1115 }
1116
1117 if (REG_P (operands[2]))
1118 {
1119 rtx tmp = gen_reg_rtx (SImode);
1120 rtx tmp1 = gen_reg_rtx (<MODE>mode);
1121 emit_insn (gen_negsi2 (tmp, operands[2]));
1122 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
1123 convert_to_mode (<VEL>mode,
1124 tmp, 0)));
1125 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
1126 operands[1],
1127 tmp1));
1128 DONE;
1129 }
1130 else
1131 FAIL;
1132 }
1133 )
1134
1135 (define_expand "vashl<mode>3"
1136 [(match_operand:VDQ_I 0 "register_operand" "")
1137 (match_operand:VDQ_I 1 "register_operand" "")
1138 (match_operand:VDQ_I 2 "register_operand" "")]
1139 "TARGET_SIMD"
1140 {
1141 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
1142 operands[2]));
1143 DONE;
1144 })
1145
1146 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
1147 ;; Negating individual lanes most certainly offsets the
1148 ;; gain from vectorization.
1149 (define_expand "vashr<mode>3"
1150 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1151 (match_operand:VDQ_BHSI 1 "register_operand" "")
1152 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1153 "TARGET_SIMD"
1154 {
1155 rtx neg = gen_reg_rtx (<MODE>mode);
1156 emit (gen_neg<mode>2 (neg, operands[2]));
1157 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
1158 neg));
1159 DONE;
1160 })
1161
1162 ;; DI vector shift
1163 (define_expand "aarch64_ashr_simddi"
1164 [(match_operand:DI 0 "register_operand" "=w")
1165 (match_operand:DI 1 "register_operand" "w")
1166 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1167 "TARGET_SIMD"
1168 {
1169 /* An arithmetic shift right by 64 fills the result with copies of the sign
1170 bit, just like asr by 63 - however the standard pattern does not handle
1171 a shift by 64. */
1172 if (INTVAL (operands[2]) == 64)
1173 operands[2] = GEN_INT (63);
1174 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
1175 DONE;
1176 }
1177 )
1178
1179 (define_expand "vlshr<mode>3"
1180 [(match_operand:VDQ_BHSI 0 "register_operand" "")
1181 (match_operand:VDQ_BHSI 1 "register_operand" "")
1182 (match_operand:VDQ_BHSI 2 "register_operand" "")]
1183 "TARGET_SIMD"
1184 {
1185 rtx neg = gen_reg_rtx (<MODE>mode);
1186 emit (gen_neg<mode>2 (neg, operands[2]));
1187 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
1188 neg));
1189 DONE;
1190 })
1191
1192 (define_expand "aarch64_lshr_simddi"
1193 [(match_operand:DI 0 "register_operand" "=w")
1194 (match_operand:DI 1 "register_operand" "w")
1195 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
1196 "TARGET_SIMD"
1197 {
1198 if (INTVAL (operands[2]) == 64)
1199 emit_move_insn (operands[0], const0_rtx);
1200 else
1201 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1202 DONE;
1203 }
1204 )
1205
1206 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1207 (define_insn "vec_shr_<mode>"
1208 [(set (match_operand:VD 0 "register_operand" "=w")
1209 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1210 (match_operand:SI 2 "immediate_operand" "i")]
1211 UNSPEC_VEC_SHR))]
1212 "TARGET_SIMD"
1213 {
1214 if (BYTES_BIG_ENDIAN)
1215 return "shl %d0, %d1, %2";
1216 else
1217 return "ushr %d0, %d1, %2";
1218 }
1219 [(set_attr "type" "neon_shift_imm")]
1220 )
1221
1222 (define_expand "vec_set<mode>"
1223 [(match_operand:VALL_F16 0 "register_operand" "+w")
1224 (match_operand:<VEL> 1 "register_operand" "w")
1225 (match_operand:SI 2 "immediate_operand" "")]
1226 "TARGET_SIMD"
1227 {
1228 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1229 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1230 GEN_INT (elem), operands[0]));
1231 DONE;
1232 }
1233 )
1234
1235
1236 (define_insn "aarch64_mla<mode>"
1237 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1238 (plus:VDQ_BHSI (mult:VDQ_BHSI
1239 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1240 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1241 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1242 "TARGET_SIMD"
1243 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1244 [(set_attr "type" "neon_mla_<Vetype><q>")]
1245 )
1246
1247 (define_insn "*aarch64_mla_elt<mode>"
1248 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1249 (plus:VDQHS
1250 (mult:VDQHS
1251 (vec_duplicate:VDQHS
1252 (vec_select:<VEL>
1253 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1254 (parallel [(match_operand:SI 2 "immediate_operand")])))
1255 (match_operand:VDQHS 3 "register_operand" "w"))
1256 (match_operand:VDQHS 4 "register_operand" "0")))]
1257 "TARGET_SIMD"
1258 {
1259 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1260 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1261 }
1262 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1263 )
1264
1265 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1266 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1267 (plus:VDQHS
1268 (mult:VDQHS
1269 (vec_duplicate:VDQHS
1270 (vec_select:<VEL>
1271 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1272 (parallel [(match_operand:SI 2 "immediate_operand")])))
1273 (match_operand:VDQHS 3 "register_operand" "w"))
1274 (match_operand:VDQHS 4 "register_operand" "0")))]
1275 "TARGET_SIMD"
1276 {
1277 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1278 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1279 }
1280 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1281 )
1282
1283 (define_insn "*aarch64_mla_elt_merge<mode>"
1284 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1285 (plus:VDQHS
1286 (mult:VDQHS (vec_duplicate:VDQHS
1287 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1288 (match_operand:VDQHS 2 "register_operand" "w"))
1289 (match_operand:VDQHS 3 "register_operand" "0")))]
1290 "TARGET_SIMD"
1291 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1292 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1293 )
1294
1295 (define_insn "aarch64_mls<mode>"
1296 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1297 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1298 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1299 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1300 "TARGET_SIMD"
1301 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1302 [(set_attr "type" "neon_mla_<Vetype><q>")]
1303 )
1304
1305 (define_insn "*aarch64_mls_elt<mode>"
1306 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1307 (minus:VDQHS
1308 (match_operand:VDQHS 4 "register_operand" "0")
1309 (mult:VDQHS
1310 (vec_duplicate:VDQHS
1311 (vec_select:<VEL>
1312 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1313 (parallel [(match_operand:SI 2 "immediate_operand")])))
1314 (match_operand:VDQHS 3 "register_operand" "w"))))]
1315 "TARGET_SIMD"
1316 {
1317 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1318 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1319 }
1320 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1321 )
1322
1323 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1324 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1325 (minus:VDQHS
1326 (match_operand:VDQHS 4 "register_operand" "0")
1327 (mult:VDQHS
1328 (vec_duplicate:VDQHS
1329 (vec_select:<VEL>
1330 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1331 (parallel [(match_operand:SI 2 "immediate_operand")])))
1332 (match_operand:VDQHS 3 "register_operand" "w"))))]
1333 "TARGET_SIMD"
1334 {
1335 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1336 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1337 }
1338 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1339 )
1340
1341 (define_insn "*aarch64_mls_elt_merge<mode>"
1342 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1343 (minus:VDQHS
1344 (match_operand:VDQHS 1 "register_operand" "0")
1345 (mult:VDQHS (vec_duplicate:VDQHS
1346 (match_operand:<VEL> 2 "register_operand" "<h_con>"))
1347 (match_operand:VDQHS 3 "register_operand" "w"))))]
1348 "TARGET_SIMD"
1349 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1350 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1351 )
1352
1353 ;; Max/Min operations.
1354 (define_insn "<su><maxmin><mode>3"
1355 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1356 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1357 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1358 "TARGET_SIMD"
1359 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1360 [(set_attr "type" "neon_minmax<q>")]
1361 )
1362
1363 (define_expand "<su><maxmin>v2di3"
1364 [(set (match_operand:V2DI 0 "register_operand" "")
1365 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1366 (match_operand:V2DI 2 "register_operand" "")))]
1367 "TARGET_SIMD"
1368 {
1369 enum rtx_code cmp_operator;
1370 rtx cmp_fmt;
1371
1372 switch (<CODE>)
1373 {
1374 case UMIN:
1375 cmp_operator = LTU;
1376 break;
1377 case SMIN:
1378 cmp_operator = LT;
1379 break;
1380 case UMAX:
1381 cmp_operator = GTU;
1382 break;
1383 case SMAX:
1384 cmp_operator = GT;
1385 break;
1386 default:
1387 gcc_unreachable ();
1388 }
1389
1390 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1391 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1392 operands[2], cmp_fmt, operands[1], operands[2]));
1393 DONE;
1394 })
1395
1396 ;; Pairwise Integer Max/Min operations.
1397 (define_insn "aarch64_<maxmin_uns>p<mode>"
1398 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1399 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1400 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1401 MAXMINV))]
1402 "TARGET_SIMD"
1403 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1404 [(set_attr "type" "neon_minmax<q>")]
1405 )
1406
1407 ;; Pairwise FP Max/Min operations.
1408 (define_insn "aarch64_<maxmin_uns>p<mode>"
1409 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1410 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1411 (match_operand:VHSDF 2 "register_operand" "w")]
1412 FMAXMINV))]
1413 "TARGET_SIMD"
1414 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1415 [(set_attr "type" "neon_minmax<q>")]
1416 )
1417
1418 ;; vec_concat gives a new vector with the low elements from operand 1, and
1419 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1420 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1421 ;; What that means, is that the RTL descriptions of the below patterns
1422 ;; need to change depending on endianness.
1423
1424 ;; Move to the low architectural bits of the register.
1425 ;; On little-endian this is { operand, zeroes }
1426 ;; On big-endian this is { zeroes, operand }
1427
1428 (define_insn "move_lo_quad_internal_<mode>"
1429 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1430 (vec_concat:VQ_NO2E
1431 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1432 (vec_duplicate:<VHALF> (const_int 0))))]
1433 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1434 "@
1435 dup\\t%d0, %1.d[0]
1436 fmov\\t%d0, %1
1437 dup\\t%d0, %1"
1438 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1439 (set_attr "length" "4")
1440 (set_attr "arch" "simd,fp,simd")]
1441 )
1442
1443 (define_insn "move_lo_quad_internal_<mode>"
1444 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1445 (vec_concat:VQ_2E
1446 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1447 (const_int 0)))]
1448 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1449 "@
1450 dup\\t%d0, %1.d[0]
1451 fmov\\t%d0, %1
1452 dup\\t%d0, %1"
1453 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1454 (set_attr "length" "4")
1455 (set_attr "arch" "simd,fp,simd")]
1456 )
1457
1458 (define_insn "move_lo_quad_internal_be_<mode>"
1459 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1460 (vec_concat:VQ_NO2E
1461 (vec_duplicate:<VHALF> (const_int 0))
1462 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1463 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1464 "@
1465 dup\\t%d0, %1.d[0]
1466 fmov\\t%d0, %1
1467 dup\\t%d0, %1"
1468 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1469 (set_attr "length" "4")
1470 (set_attr "arch" "simd,fp,simd")]
1471 )
1472
1473 (define_insn "move_lo_quad_internal_be_<mode>"
1474 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1475 (vec_concat:VQ_2E
1476 (const_int 0)
1477 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1478 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1479 "@
1480 dup\\t%d0, %1.d[0]
1481 fmov\\t%d0, %1
1482 dup\\t%d0, %1"
1483 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1484 (set_attr "length" "4")
1485 (set_attr "arch" "simd,fp,simd")]
1486 )
1487
1488 (define_expand "move_lo_quad_<mode>"
1489 [(match_operand:VQ 0 "register_operand")
1490 (match_operand:VQ 1 "register_operand")]
1491 "TARGET_SIMD"
1492 {
1493 if (BYTES_BIG_ENDIAN)
1494 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1495 else
1496 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1497 DONE;
1498 }
1499 )
1500
1501 ;; Move operand1 to the high architectural bits of the register, keeping
1502 ;; the low architectural bits of operand2.
1503 ;; For little-endian this is { operand2, operand1 }
1504 ;; For big-endian this is { operand1, operand2 }
1505
1506 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1507 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1508 (vec_concat:VQ
1509 (vec_select:<VHALF>
1510 (match_dup 0)
1511 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1512 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1513 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1514 "@
1515 ins\\t%0.d[1], %1.d[0]
1516 ins\\t%0.d[1], %1"
1517 [(set_attr "type" "neon_ins")]
1518 )
1519
1520 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1521 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1522 (vec_concat:VQ
1523 (match_operand:<VHALF> 1 "register_operand" "w,r")
1524 (vec_select:<VHALF>
1525 (match_dup 0)
1526 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1527 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1528 "@
1529 ins\\t%0.d[1], %1.d[0]
1530 ins\\t%0.d[1], %1"
1531 [(set_attr "type" "neon_ins")]
1532 )
1533
1534 (define_expand "move_hi_quad_<mode>"
1535 [(match_operand:VQ 0 "register_operand" "")
1536 (match_operand:<VHALF> 1 "register_operand" "")]
1537 "TARGET_SIMD"
1538 {
1539 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1540 if (BYTES_BIG_ENDIAN)
1541 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1542 operands[1], p));
1543 else
1544 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1545 operands[1], p));
1546 DONE;
1547 })
1548
1549 ;; Narrowing operations.
1550
1551 ;; For doubles.
1552 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1553 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1554 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1555 "TARGET_SIMD"
1556 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1557 [(set_attr "type" "neon_shift_imm_narrow_q")]
1558 )
1559
1560 (define_expand "vec_pack_trunc_<mode>"
1561 [(match_operand:<VNARROWD> 0 "register_operand" "")
1562 (match_operand:VDN 1 "register_operand" "")
1563 (match_operand:VDN 2 "register_operand" "")]
1564 "TARGET_SIMD"
1565 {
1566 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1567 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1568 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1569
1570 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1571 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1572 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1573 DONE;
1574 })
1575
1576 ;; For quads.
1577
1578 (define_insn "vec_pack_trunc_<mode>"
1579 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1580 (vec_concat:<VNARROWQ2>
1581 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1582 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1583 "TARGET_SIMD"
1584 {
1585 if (BYTES_BIG_ENDIAN)
1586 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1587 else
1588 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1589 }
1590 [(set_attr "type" "multiple")
1591 (set_attr "length" "8")]
1592 )
1593
1594 ;; Widening operations.
1595
1596 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1597 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1598 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1599 (match_operand:VQW 1 "register_operand" "w")
1600 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1601 )))]
1602 "TARGET_SIMD"
1603 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1604 [(set_attr "type" "neon_shift_imm_long")]
1605 )
1606
1607 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1608 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1609 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1610 (match_operand:VQW 1 "register_operand" "w")
1611 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1612 )))]
1613 "TARGET_SIMD"
1614 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1615 [(set_attr "type" "neon_shift_imm_long")]
1616 )
1617
1618 (define_expand "vec_unpack<su>_hi_<mode>"
1619 [(match_operand:<VWIDE> 0 "register_operand" "")
1620 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1621 "TARGET_SIMD"
1622 {
1623 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1624 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1625 operands[1], p));
1626 DONE;
1627 }
1628 )
1629
1630 (define_expand "vec_unpack<su>_lo_<mode>"
1631 [(match_operand:<VWIDE> 0 "register_operand" "")
1632 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1633 "TARGET_SIMD"
1634 {
1635 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1636 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1637 operands[1], p));
1638 DONE;
1639 }
1640 )
1641
1642 ;; Widening arithmetic.
1643
1644 (define_insn "*aarch64_<su>mlal_lo<mode>"
1645 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1646 (plus:<VWIDE>
1647 (mult:<VWIDE>
1648 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1649 (match_operand:VQW 2 "register_operand" "w")
1650 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1651 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1652 (match_operand:VQW 4 "register_operand" "w")
1653 (match_dup 3))))
1654 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1655 "TARGET_SIMD"
1656 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1657 [(set_attr "type" "neon_mla_<Vetype>_long")]
1658 )
1659
1660 (define_insn "*aarch64_<su>mlal_hi<mode>"
1661 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1662 (plus:<VWIDE>
1663 (mult:<VWIDE>
1664 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1665 (match_operand:VQW 2 "register_operand" "w")
1666 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1667 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1668 (match_operand:VQW 4 "register_operand" "w")
1669 (match_dup 3))))
1670 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1671 "TARGET_SIMD"
1672 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1673 [(set_attr "type" "neon_mla_<Vetype>_long")]
1674 )
1675
1676 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1677 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1678 (minus:<VWIDE>
1679 (match_operand:<VWIDE> 1 "register_operand" "0")
1680 (mult:<VWIDE>
1681 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1682 (match_operand:VQW 2 "register_operand" "w")
1683 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1684 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1685 (match_operand:VQW 4 "register_operand" "w")
1686 (match_dup 3))))))]
1687 "TARGET_SIMD"
1688 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1689 [(set_attr "type" "neon_mla_<Vetype>_long")]
1690 )
1691
1692 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1693 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1694 (minus:<VWIDE>
1695 (match_operand:<VWIDE> 1 "register_operand" "0")
1696 (mult:<VWIDE>
1697 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1698 (match_operand:VQW 2 "register_operand" "w")
1699 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1700 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1701 (match_operand:VQW 4 "register_operand" "w")
1702 (match_dup 3))))))]
1703 "TARGET_SIMD"
1704 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1705 [(set_attr "type" "neon_mla_<Vetype>_long")]
1706 )
1707
1708 (define_insn "*aarch64_<su>mlal<mode>"
1709 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1710 (plus:<VWIDE>
1711 (mult:<VWIDE>
1712 (ANY_EXTEND:<VWIDE>
1713 (match_operand:VD_BHSI 1 "register_operand" "w"))
1714 (ANY_EXTEND:<VWIDE>
1715 (match_operand:VD_BHSI 2 "register_operand" "w")))
1716 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1717 "TARGET_SIMD"
1718 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1719 [(set_attr "type" "neon_mla_<Vetype>_long")]
1720 )
1721
1722 (define_insn "*aarch64_<su>mlsl<mode>"
1723 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1724 (minus:<VWIDE>
1725 (match_operand:<VWIDE> 1 "register_operand" "0")
1726 (mult:<VWIDE>
1727 (ANY_EXTEND:<VWIDE>
1728 (match_operand:VD_BHSI 2 "register_operand" "w"))
1729 (ANY_EXTEND:<VWIDE>
1730 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1731 "TARGET_SIMD"
1732 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1733 [(set_attr "type" "neon_mla_<Vetype>_long")]
1734 )
1735
1736 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1737 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1738 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1739 (match_operand:VQW 1 "register_operand" "w")
1740 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1741 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1742 (match_operand:VQW 2 "register_operand" "w")
1743 (match_dup 3)))))]
1744 "TARGET_SIMD"
1745 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1746 [(set_attr "type" "neon_mul_<Vetype>_long")]
1747 )
1748
1749 (define_expand "vec_widen_<su>mult_lo_<mode>"
1750 [(match_operand:<VWIDE> 0 "register_operand" "")
1751 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1752 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1753 "TARGET_SIMD"
1754 {
1755 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1756 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1757 operands[1],
1758 operands[2], p));
1759 DONE;
1760 }
1761 )
1762
1763 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1764 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1765 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1766 (match_operand:VQW 1 "register_operand" "w")
1767 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1768 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1769 (match_operand:VQW 2 "register_operand" "w")
1770 (match_dup 3)))))]
1771 "TARGET_SIMD"
1772 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1773 [(set_attr "type" "neon_mul_<Vetype>_long")]
1774 )
1775
1776 (define_expand "vec_widen_<su>mult_hi_<mode>"
1777 [(match_operand:<VWIDE> 0 "register_operand" "")
1778 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1779 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1780 "TARGET_SIMD"
1781 {
1782 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1783 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1784 operands[1],
1785 operands[2], p));
1786 DONE;
1787
1788 }
1789 )
1790
1791 ;; FP vector operations.
1792 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1793 ;; double-precision (64-bit) floating-point data types and arithmetic as
1794 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1795 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1796 ;;
1797 ;; Floating-point operations can raise an exception. Vectorizing such
1798 ;; operations are safe because of reasons explained below.
1799 ;;
1800 ;; ARMv8 permits an extension to enable trapped floating-point
1801 ;; exception handling, however this is an optional feature. In the
1802 ;; event of a floating-point exception being raised by vectorised
1803 ;; code then:
1804 ;; 1. If trapped floating-point exceptions are available, then a trap
1805 ;; will be taken when any lane raises an enabled exception. A trap
1806 ;; handler may determine which lane raised the exception.
1807 ;; 2. Alternatively a sticky exception flag is set in the
1808 ;; floating-point status register (FPSR). Software may explicitly
1809 ;; test the exception flags, in which case the tests will either
1810 ;; prevent vectorisation, allowing precise identification of the
1811 ;; failing operation, or if tested outside of vectorisable regions
1812 ;; then the specific operation and lane are not of interest.
1813
1814 ;; FP arithmetic operations.
1815
1816 (define_insn "add<mode>3"
1817 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1818 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1819 (match_operand:VHSDF 2 "register_operand" "w")))]
1820 "TARGET_SIMD"
1821 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1822 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1823 )
1824
1825 (define_insn "sub<mode>3"
1826 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1827 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1828 (match_operand:VHSDF 2 "register_operand" "w")))]
1829 "TARGET_SIMD"
1830 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1831 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1832 )
1833
1834 (define_insn "mul<mode>3"
1835 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1836 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1837 (match_operand:VHSDF 2 "register_operand" "w")))]
1838 "TARGET_SIMD"
1839 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1840 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1841 )
1842
1843 (define_expand "div<mode>3"
1844 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1845 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1846 (match_operand:VHSDF 2 "register_operand" "w")))]
1847 "TARGET_SIMD"
1848 {
1849 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1850 DONE;
1851
1852 operands[1] = force_reg (<MODE>mode, operands[1]);
1853 })
1854
1855 (define_insn "*div<mode>3"
1856 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1857 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1858 (match_operand:VHSDF 2 "register_operand" "w")))]
1859 "TARGET_SIMD"
1860 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1861 [(set_attr "type" "neon_fp_div_<stype><q>")]
1862 )
1863
1864 (define_insn "neg<mode>2"
1865 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1866 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1867 "TARGET_SIMD"
1868 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1869 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1870 )
1871
1872 (define_insn "abs<mode>2"
1873 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1874 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1875 "TARGET_SIMD"
1876 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1877 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1878 )
1879
1880 (define_insn "fma<mode>4"
1881 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1882 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1883 (match_operand:VHSDF 2 "register_operand" "w")
1884 (match_operand:VHSDF 3 "register_operand" "0")))]
1885 "TARGET_SIMD"
1886 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1887 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1888 )
1889
1890 (define_insn "*aarch64_fma4_elt<mode>"
1891 [(set (match_operand:VDQF 0 "register_operand" "=w")
1892 (fma:VDQF
1893 (vec_duplicate:VDQF
1894 (vec_select:<VEL>
1895 (match_operand:VDQF 1 "register_operand" "<h_con>")
1896 (parallel [(match_operand:SI 2 "immediate_operand")])))
1897 (match_operand:VDQF 3 "register_operand" "w")
1898 (match_operand:VDQF 4 "register_operand" "0")))]
1899 "TARGET_SIMD"
1900 {
1901 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1902 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1903 }
1904 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1905 )
1906
1907 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1908 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1909 (fma:VDQSF
1910 (vec_duplicate:VDQSF
1911 (vec_select:<VEL>
1912 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1913 (parallel [(match_operand:SI 2 "immediate_operand")])))
1914 (match_operand:VDQSF 3 "register_operand" "w")
1915 (match_operand:VDQSF 4 "register_operand" "0")))]
1916 "TARGET_SIMD"
1917 {
1918 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1919 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1920 }
1921 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1922 )
1923
1924 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1925 [(set (match_operand:VMUL 0 "register_operand" "=w")
1926 (fma:VMUL
1927 (vec_duplicate:VMUL
1928 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1929 (match_operand:VMUL 2 "register_operand" "w")
1930 (match_operand:VMUL 3 "register_operand" "0")))]
1931 "TARGET_SIMD"
1932 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1933 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1934 )
1935
1936 (define_insn "*aarch64_fma4_elt_to_64v2df"
1937 [(set (match_operand:DF 0 "register_operand" "=w")
1938 (fma:DF
1939 (vec_select:DF
1940 (match_operand:V2DF 1 "register_operand" "w")
1941 (parallel [(match_operand:SI 2 "immediate_operand")]))
1942 (match_operand:DF 3 "register_operand" "w")
1943 (match_operand:DF 4 "register_operand" "0")))]
1944 "TARGET_SIMD"
1945 {
1946 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1947 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1948 }
1949 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1950 )
1951
1952 (define_insn "fnma<mode>4"
1953 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1954 (fma:VHSDF
1955 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1956 (match_operand:VHSDF 2 "register_operand" "w")
1957 (match_operand:VHSDF 3 "register_operand" "0")))]
1958 "TARGET_SIMD"
1959 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1960 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1961 )
1962
1963 (define_insn "*aarch64_fnma4_elt<mode>"
1964 [(set (match_operand:VDQF 0 "register_operand" "=w")
1965 (fma:VDQF
1966 (neg:VDQF
1967 (match_operand:VDQF 3 "register_operand" "w"))
1968 (vec_duplicate:VDQF
1969 (vec_select:<VEL>
1970 (match_operand:VDQF 1 "register_operand" "<h_con>")
1971 (parallel [(match_operand:SI 2 "immediate_operand")])))
1972 (match_operand:VDQF 4 "register_operand" "0")))]
1973 "TARGET_SIMD"
1974 {
1975 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1976 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1977 }
1978 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1979 )
1980
1981 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1982 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1983 (fma:VDQSF
1984 (neg:VDQSF
1985 (match_operand:VDQSF 3 "register_operand" "w"))
1986 (vec_duplicate:VDQSF
1987 (vec_select:<VEL>
1988 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1989 (parallel [(match_operand:SI 2 "immediate_operand")])))
1990 (match_operand:VDQSF 4 "register_operand" "0")))]
1991 "TARGET_SIMD"
1992 {
1993 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1994 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1995 }
1996 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1997 )
1998
1999 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
2000 [(set (match_operand:VMUL 0 "register_operand" "=w")
2001 (fma:VMUL
2002 (neg:VMUL
2003 (match_operand:VMUL 2 "register_operand" "w"))
2004 (vec_duplicate:VMUL
2005 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
2006 (match_operand:VMUL 3 "register_operand" "0")))]
2007 "TARGET_SIMD"
2008 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
2009 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
2010 )
2011
2012 (define_insn "*aarch64_fnma4_elt_to_64v2df"
2013 [(set (match_operand:DF 0 "register_operand" "=w")
2014 (fma:DF
2015 (vec_select:DF
2016 (match_operand:V2DF 1 "register_operand" "w")
2017 (parallel [(match_operand:SI 2 "immediate_operand")]))
2018 (neg:DF
2019 (match_operand:DF 3 "register_operand" "w"))
2020 (match_operand:DF 4 "register_operand" "0")))]
2021 "TARGET_SIMD"
2022 {
2023 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
2024 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
2025 }
2026 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
2027 )
2028
2029 ;; Vector versions of the floating-point frint patterns.
2030 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
2031 (define_insn "<frint_pattern><mode>2"
2032 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2033 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2034 FRINT))]
2035 "TARGET_SIMD"
2036 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
2037 [(set_attr "type" "neon_fp_round_<stype><q>")]
2038 )
2039
2040 ;; Vector versions of the fcvt standard patterns.
2041 ;; Expands to lbtrunc, lround, lceil, lfloor
2042 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
2043 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2044 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2045 [(match_operand:VHSDF 1 "register_operand" "w")]
2046 FCVT)))]
2047 "TARGET_SIMD"
2048 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
2049 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
2050 )
2051
2052 ;; HF Scalar variants of related SIMD instructions.
2053 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
2054 [(set (match_operand:HI 0 "register_operand" "=w")
2055 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
2056 FCVT)))]
2057 "TARGET_SIMD_F16INST"
2058 "fcvt<frint_suffix><su>\t%h0, %h1"
2059 [(set_attr "type" "neon_fp_to_int_s")]
2060 )
2061
2062 (define_insn "<optab>_trunchfhi2"
2063 [(set (match_operand:HI 0 "register_operand" "=w")
2064 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
2065 "TARGET_SIMD_F16INST"
2066 "fcvtz<su>\t%h0, %h1"
2067 [(set_attr "type" "neon_fp_to_int_s")]
2068 )
2069
2070 (define_insn "<optab>hihf2"
2071 [(set (match_operand:HF 0 "register_operand" "=w")
2072 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
2073 "TARGET_SIMD_F16INST"
2074 "<su_optab>cvtf\t%h0, %h1"
2075 [(set_attr "type" "neon_int_to_fp_s")]
2076 )
2077
2078 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
2079 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
2080 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2081 [(mult:VDQF
2082 (match_operand:VDQF 1 "register_operand" "w")
2083 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
2084 UNSPEC_FRINTZ)))]
2085 "TARGET_SIMD
2086 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
2087 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
2088 {
2089 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
2090 char buf[64];
2091 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
2092 output_asm_insn (buf, operands);
2093 return "";
2094 }
2095 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
2096 )
2097
2098 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
2099 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2100 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2101 [(match_operand:VHSDF 1 "register_operand")]
2102 UNSPEC_FRINTZ)))]
2103 "TARGET_SIMD"
2104 {})
2105
2106 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
2107 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
2108 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
2109 [(match_operand:VHSDF 1 "register_operand")]
2110 UNSPEC_FRINTZ)))]
2111 "TARGET_SIMD"
2112 {})
2113
2114 (define_expand "ftrunc<VHSDF:mode>2"
2115 [(set (match_operand:VHSDF 0 "register_operand")
2116 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2117 UNSPEC_FRINTZ))]
2118 "TARGET_SIMD"
2119 {})
2120
2121 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
2122 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2123 (FLOATUORS:VHSDF
2124 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
2125 "TARGET_SIMD"
2126 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
2127 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
2128 )
2129
2130 ;; Conversions between vectors of floats and doubles.
2131 ;; Contains a mix of patterns to match standard pattern names
2132 ;; and those for intrinsics.
2133
2134 ;; Float widening operations.
2135
2136 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
2137 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2138 (float_extend:<VWIDE> (vec_select:<VHALF>
2139 (match_operand:VQ_HSF 1 "register_operand" "w")
2140 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
2141 )))]
2142 "TARGET_SIMD"
2143 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
2144 [(set_attr "type" "neon_fp_cvt_widen_s")]
2145 )
2146
2147 ;; Convert between fixed-point and floating-point (vector modes)
2148
2149 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
2150 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
2151 (unspec:<VHSDF:FCVT_TARGET>
2152 [(match_operand:VHSDF 1 "register_operand" "w")
2153 (match_operand:SI 2 "immediate_operand" "i")]
2154 FCVT_F2FIXED))]
2155 "TARGET_SIMD"
2156 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2157 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
2158 )
2159
2160 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
2161 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
2162 (unspec:<VDQ_HSDI:FCVT_TARGET>
2163 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
2164 (match_operand:SI 2 "immediate_operand" "i")]
2165 FCVT_FIXED2F))]
2166 "TARGET_SIMD"
2167 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
2168 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
2169 )
2170
2171 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
2172 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
2173 ;; the meaning of HI and LO changes depending on the target endianness.
2174 ;; While elsewhere we map the higher numbered elements of a vector to
2175 ;; the lower architectural lanes of the vector, for these patterns we want
2176 ;; to always treat "hi" as referring to the higher architectural lanes.
2177 ;; Consequently, while the patterns below look inconsistent with our
2178 ;; other big-endian patterns their behavior is as required.
2179
2180 (define_expand "vec_unpacks_lo_<mode>"
2181 [(match_operand:<VWIDE> 0 "register_operand" "")
2182 (match_operand:VQ_HSF 1 "register_operand" "")]
2183 "TARGET_SIMD"
2184 {
2185 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2186 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2187 operands[1], p));
2188 DONE;
2189 }
2190 )
2191
2192 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
2193 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2194 (float_extend:<VWIDE> (vec_select:<VHALF>
2195 (match_operand:VQ_HSF 1 "register_operand" "w")
2196 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
2197 )))]
2198 "TARGET_SIMD"
2199 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
2200 [(set_attr "type" "neon_fp_cvt_widen_s")]
2201 )
2202
2203 (define_expand "vec_unpacks_hi_<mode>"
2204 [(match_operand:<VWIDE> 0 "register_operand" "")
2205 (match_operand:VQ_HSF 1 "register_operand" "")]
2206 "TARGET_SIMD"
2207 {
2208 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2209 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2210 operands[1], p));
2211 DONE;
2212 }
2213 )
2214 (define_insn "aarch64_float_extend_lo_<Vwide>"
2215 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2216 (float_extend:<VWIDE>
2217 (match_operand:VDF 1 "register_operand" "w")))]
2218 "TARGET_SIMD"
2219 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2220 [(set_attr "type" "neon_fp_cvt_widen_s")]
2221 )
2222
2223 ;; Float narrowing operations.
2224
2225 (define_insn "aarch64_float_truncate_lo_<mode>"
2226 [(set (match_operand:VDF 0 "register_operand" "=w")
2227 (float_truncate:VDF
2228 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2229 "TARGET_SIMD"
2230 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2231 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2232 )
2233
2234 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2235 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2236 (vec_concat:<VDBL>
2237 (match_operand:VDF 1 "register_operand" "0")
2238 (float_truncate:VDF
2239 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2240 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2241 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2242 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2243 )
2244
2245 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2246 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2247 (vec_concat:<VDBL>
2248 (float_truncate:VDF
2249 (match_operand:<VWIDE> 2 "register_operand" "w"))
2250 (match_operand:VDF 1 "register_operand" "0")))]
2251 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2252 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2253 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2254 )
2255
2256 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2257 [(match_operand:<VDBL> 0 "register_operand" "=w")
2258 (match_operand:VDF 1 "register_operand" "0")
2259 (match_operand:<VWIDE> 2 "register_operand" "w")]
2260 "TARGET_SIMD"
2261 {
2262 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2263 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2264 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2265 emit_insn (gen (operands[0], operands[1], operands[2]));
2266 DONE;
2267 }
2268 )
2269
2270 (define_expand "vec_pack_trunc_v2df"
2271 [(set (match_operand:V4SF 0 "register_operand")
2272 (vec_concat:V4SF
2273 (float_truncate:V2SF
2274 (match_operand:V2DF 1 "register_operand"))
2275 (float_truncate:V2SF
2276 (match_operand:V2DF 2 "register_operand"))
2277 ))]
2278 "TARGET_SIMD"
2279 {
2280 rtx tmp = gen_reg_rtx (V2SFmode);
2281 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2282 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2283
2284 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2285 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2286 tmp, operands[hi]));
2287 DONE;
2288 }
2289 )
2290
2291 (define_expand "vec_pack_trunc_df"
2292 [(set (match_operand:V2SF 0 "register_operand")
2293 (vec_concat:V2SF
2294 (float_truncate:SF
2295 (match_operand:DF 1 "register_operand"))
2296 (float_truncate:SF
2297 (match_operand:DF 2 "register_operand"))
2298 ))]
2299 "TARGET_SIMD"
2300 {
2301 rtx tmp = gen_reg_rtx (V2SFmode);
2302 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2303 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2304
2305 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2306 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2307 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2308 DONE;
2309 }
2310 )
2311
2312 ;; FP Max/Min
2313 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2314 ;; expression like:
2315 ;; a = (b < c) ? b : c;
2316 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2317 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2318 ;; -ffast-math.
2319 ;;
2320 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2321 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2322 ;; operand will be returned when both operands are zero (i.e. they may not
2323 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2324 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2325 ;; NaNs.
2326
2327 (define_insn "<su><maxmin><mode>3"
2328 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2329 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2330 (match_operand:VHSDF 2 "register_operand" "w")))]
2331 "TARGET_SIMD"
2332 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2333 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2334 )
2335
2336 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2337 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2338 ;; which implement the IEEE fmax ()/fmin () functions.
2339 (define_insn "<maxmin_uns><mode>3"
2340 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2341 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2342 (match_operand:VHSDF 2 "register_operand" "w")]
2343 FMAXMIN_UNS))]
2344 "TARGET_SIMD"
2345 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2346 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2347 )
2348
2349 ;; 'across lanes' add.
2350
2351 (define_expand "reduc_plus_scal_<mode>"
2352 [(match_operand:<VEL> 0 "register_operand" "=w")
2353 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2354 UNSPEC_ADDV)]
2355 "TARGET_SIMD"
2356 {
2357 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2358 rtx scratch = gen_reg_rtx (<MODE>mode);
2359 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2360 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2361 DONE;
2362 }
2363 )
2364
2365 (define_insn "aarch64_faddp<mode>"
2366 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2367 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2368 (match_operand:VHSDF 2 "register_operand" "w")]
2369 UNSPEC_FADDV))]
2370 "TARGET_SIMD"
2371 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2372 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2373 )
2374
2375 (define_insn "aarch64_reduc_plus_internal<mode>"
2376 [(set (match_operand:VDQV 0 "register_operand" "=w")
2377 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2378 UNSPEC_ADDV))]
2379 "TARGET_SIMD"
2380 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2381 [(set_attr "type" "neon_reduc_add<q>")]
2382 )
2383
2384 (define_insn "aarch64_reduc_plus_internalv2si"
2385 [(set (match_operand:V2SI 0 "register_operand" "=w")
2386 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2387 UNSPEC_ADDV))]
2388 "TARGET_SIMD"
2389 "addp\\t%0.2s, %1.2s, %1.2s"
2390 [(set_attr "type" "neon_reduc_add")]
2391 )
2392
2393 (define_insn "reduc_plus_scal_<mode>"
2394 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2395 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2396 UNSPEC_FADDV))]
2397 "TARGET_SIMD"
2398 "faddp\\t%<Vetype>0, %1.<Vtype>"
2399 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2400 )
2401
2402 (define_expand "reduc_plus_scal_v4sf"
2403 [(set (match_operand:SF 0 "register_operand")
2404 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2405 UNSPEC_FADDV))]
2406 "TARGET_SIMD"
2407 {
2408 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2409 rtx scratch = gen_reg_rtx (V4SFmode);
2410 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2411 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2412 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2413 DONE;
2414 })
2415
2416 (define_insn "clrsb<mode>2"
2417 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2418 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2419 "TARGET_SIMD"
2420 "cls\\t%0.<Vtype>, %1.<Vtype>"
2421 [(set_attr "type" "neon_cls<q>")]
2422 )
2423
2424 (define_insn "clz<mode>2"
2425 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2426 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2427 "TARGET_SIMD"
2428 "clz\\t%0.<Vtype>, %1.<Vtype>"
2429 [(set_attr "type" "neon_cls<q>")]
2430 )
2431
2432 (define_insn "popcount<mode>2"
2433 [(set (match_operand:VB 0 "register_operand" "=w")
2434 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2435 "TARGET_SIMD"
2436 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2437 [(set_attr "type" "neon_cnt<q>")]
2438 )
2439
2440 ;; 'across lanes' max and min ops.
2441
2442 ;; Template for outputting a scalar, so we can create __builtins which can be
2443 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2444 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2445 [(match_operand:<VEL> 0 "register_operand")
2446 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2447 FMAXMINV)]
2448 "TARGET_SIMD"
2449 {
2450 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2451 rtx scratch = gen_reg_rtx (<MODE>mode);
2452 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2453 operands[1]));
2454 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2455 DONE;
2456 }
2457 )
2458
2459 ;; Likewise for integer cases, signed and unsigned.
2460 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2461 [(match_operand:<VEL> 0 "register_operand")
2462 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2463 MAXMINV)]
2464 "TARGET_SIMD"
2465 {
2466 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2467 rtx scratch = gen_reg_rtx (<MODE>mode);
2468 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2469 operands[1]));
2470 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2471 DONE;
2472 }
2473 )
2474
2475 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2476 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2477 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2478 MAXMINV))]
2479 "TARGET_SIMD"
2480 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2481 [(set_attr "type" "neon_reduc_minmax<q>")]
2482 )
2483
2484 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2485 [(set (match_operand:V2SI 0 "register_operand" "=w")
2486 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2487 MAXMINV))]
2488 "TARGET_SIMD"
2489 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2490 [(set_attr "type" "neon_reduc_minmax")]
2491 )
2492
2493 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2494 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2495 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2496 FMAXMINV))]
2497 "TARGET_SIMD"
2498 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2499 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2500 )
2501
2502 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2503 ;; allocation.
2504 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2505 ;; to select.
2506 ;;
2507 ;; Thus our BSL is of the form:
2508 ;; op0 = bsl (mask, op2, op3)
2509 ;; We can use any of:
2510 ;;
2511 ;; if (op0 = mask)
2512 ;; bsl mask, op1, op2
2513 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2514 ;; bit op0, op2, mask
2515 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2516 ;; bif op0, op1, mask
2517 ;;
2518 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2519 ;; Some forms of straight-line code may generate the equivalent form
2520 ;; in *aarch64_simd_bsl<mode>_alt.
2521
2522 (define_insn "aarch64_simd_bsl<mode>_internal"
2523 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2524 (xor:VDQ_I
2525 (and:VDQ_I
2526 (xor:VDQ_I
2527 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2528 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2529 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2530 (match_dup:<V_INT_EQUIV> 3)
2531 ))]
2532 "TARGET_SIMD"
2533 "@
2534 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2535 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2536 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2537 [(set_attr "type" "neon_bsl<q>")]
2538 )
2539
2540 ;; We need this form in addition to the above pattern to match the case
2541 ;; when combine tries merging three insns such that the second operand of
2542 ;; the outer XOR matches the second operand of the inner XOR rather than
2543 ;; the first. The two are equivalent but since recog doesn't try all
2544 ;; permutations of commutative operations, we have to have a separate pattern.
2545
2546 (define_insn "*aarch64_simd_bsl<mode>_alt"
2547 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2548 (xor:VDQ_I
2549 (and:VDQ_I
2550 (xor:VDQ_I
2551 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2552 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2553 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2554 (match_dup:<V_INT_EQUIV> 2)))]
2555 "TARGET_SIMD"
2556 "@
2557 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2558 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2559 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2560 [(set_attr "type" "neon_bsl<q>")]
2561 )
2562
2563 ;; DImode is special, we want to avoid computing operations which are
2564 ;; more naturally computed in general purpose registers in the vector
2565 ;; registers. If we do that, we need to move all three operands from general
2566 ;; purpose registers to vector registers, then back again. However, we
2567 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2568 ;; optimizations based on the component operations of a BSL.
2569 ;;
2570 ;; That means we need a splitter back to the individual operations, if they
2571 ;; would be better calculated on the integer side.
2572
2573 (define_insn_and_split "aarch64_simd_bsldi_internal"
2574 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2575 (xor:DI
2576 (and:DI
2577 (xor:DI
2578 (match_operand:DI 3 "register_operand" "w,0,w,r")
2579 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2580 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2581 (match_dup:DI 3)
2582 ))]
2583 "TARGET_SIMD"
2584 "@
2585 bsl\\t%0.8b, %2.8b, %3.8b
2586 bit\\t%0.8b, %2.8b, %1.8b
2587 bif\\t%0.8b, %3.8b, %1.8b
2588 #"
2589 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2590 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2591 {
2592 /* Split back to individual operations. If we're before reload, and
2593 able to create a temporary register, do so. If we're after reload,
2594 we've got an early-clobber destination register, so use that.
2595 Otherwise, we can't create pseudos and we can't yet guarantee that
2596 operands[0] is safe to write, so FAIL to split. */
2597
2598 rtx scratch;
2599 if (reload_completed)
2600 scratch = operands[0];
2601 else if (can_create_pseudo_p ())
2602 scratch = gen_reg_rtx (DImode);
2603 else
2604 FAIL;
2605
2606 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2607 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2608 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2609 DONE;
2610 }
2611 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2612 (set_attr "length" "4,4,4,12")]
2613 )
2614
2615 (define_insn_and_split "aarch64_simd_bsldi_alt"
2616 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2617 (xor:DI
2618 (and:DI
2619 (xor:DI
2620 (match_operand:DI 3 "register_operand" "w,w,0,r")
2621 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2622 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2623 (match_dup:DI 2)
2624 ))]
2625 "TARGET_SIMD"
2626 "@
2627 bsl\\t%0.8b, %3.8b, %2.8b
2628 bit\\t%0.8b, %3.8b, %1.8b
2629 bif\\t%0.8b, %2.8b, %1.8b
2630 #"
2631 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2632 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2633 {
2634 /* Split back to individual operations. If we're before reload, and
2635 able to create a temporary register, do so. If we're after reload,
2636 we've got an early-clobber destination register, so use that.
2637 Otherwise, we can't create pseudos and we can't yet guarantee that
2638 operands[0] is safe to write, so FAIL to split. */
2639
2640 rtx scratch;
2641 if (reload_completed)
2642 scratch = operands[0];
2643 else if (can_create_pseudo_p ())
2644 scratch = gen_reg_rtx (DImode);
2645 else
2646 FAIL;
2647
2648 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2649 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2650 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2651 DONE;
2652 }
2653 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2654 (set_attr "length" "4,4,4,12")]
2655 )
2656
2657 (define_expand "aarch64_simd_bsl<mode>"
2658 [(match_operand:VALLDIF 0 "register_operand")
2659 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2660 (match_operand:VALLDIF 2 "register_operand")
2661 (match_operand:VALLDIF 3 "register_operand")]
2662 "TARGET_SIMD"
2663 {
2664 /* We can't alias operands together if they have different modes. */
2665 rtx tmp = operands[0];
2666 if (FLOAT_MODE_P (<MODE>mode))
2667 {
2668 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2669 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2670 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2671 }
2672 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2673 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2674 operands[1],
2675 operands[2],
2676 operands[3]));
2677 if (tmp != operands[0])
2678 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2679
2680 DONE;
2681 })
2682
2683 (define_expand "vcond_mask_<mode><v_int_equiv>"
2684 [(match_operand:VALLDI 0 "register_operand")
2685 (match_operand:VALLDI 1 "nonmemory_operand")
2686 (match_operand:VALLDI 2 "nonmemory_operand")
2687 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2688 "TARGET_SIMD"
2689 {
2690 /* If we have (a = (P) ? -1 : 0);
2691 Then we can simply move the generated mask (result must be int). */
2692 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2693 && operands[2] == CONST0_RTX (<MODE>mode))
2694 emit_move_insn (operands[0], operands[3]);
2695 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2696 else if (operands[1] == CONST0_RTX (<MODE>mode)
2697 && operands[2] == CONSTM1_RTX (<MODE>mode))
2698 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2699 else
2700 {
2701 if (!REG_P (operands[1]))
2702 operands[1] = force_reg (<MODE>mode, operands[1]);
2703 if (!REG_P (operands[2]))
2704 operands[2] = force_reg (<MODE>mode, operands[2]);
2705 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2706 operands[1], operands[2]));
2707 }
2708
2709 DONE;
2710 })
2711
2712 ;; Patterns comparing two vectors to produce a mask.
2713
2714 (define_expand "vec_cmp<mode><mode>"
2715 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2716 (match_operator 1 "comparison_operator"
2717 [(match_operand:VSDQ_I_DI 2 "register_operand")
2718 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2719 "TARGET_SIMD"
2720 {
2721 rtx mask = operands[0];
2722 enum rtx_code code = GET_CODE (operands[1]);
2723
2724 switch (code)
2725 {
2726 case NE:
2727 case LE:
2728 case LT:
2729 case GE:
2730 case GT:
2731 case EQ:
2732 if (operands[3] == CONST0_RTX (<MODE>mode))
2733 break;
2734
2735 /* Fall through. */
2736 default:
2737 if (!REG_P (operands[3]))
2738 operands[3] = force_reg (<MODE>mode, operands[3]);
2739
2740 break;
2741 }
2742
2743 switch (code)
2744 {
2745 case LT:
2746 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2747 break;
2748
2749 case GE:
2750 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2751 break;
2752
2753 case LE:
2754 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2755 break;
2756
2757 case GT:
2758 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2759 break;
2760
2761 case LTU:
2762 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2763 break;
2764
2765 case GEU:
2766 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2767 break;
2768
2769 case LEU:
2770 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2771 break;
2772
2773 case GTU:
2774 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2775 break;
2776
2777 case NE:
2778 /* Handle NE as !EQ. */
2779 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2780 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2781 break;
2782
2783 case EQ:
2784 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2785 break;
2786
2787 default:
2788 gcc_unreachable ();
2789 }
2790
2791 DONE;
2792 })
2793
2794 (define_expand "vec_cmp<mode><v_int_equiv>"
2795 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2796 (match_operator 1 "comparison_operator"
2797 [(match_operand:VDQF 2 "register_operand")
2798 (match_operand:VDQF 3 "nonmemory_operand")]))]
2799 "TARGET_SIMD"
2800 {
2801 int use_zero_form = 0;
2802 enum rtx_code code = GET_CODE (operands[1]);
2803 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2804
2805 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2806
2807 switch (code)
2808 {
2809 case LE:
2810 case LT:
2811 case GE:
2812 case GT:
2813 case EQ:
2814 if (operands[3] == CONST0_RTX (<MODE>mode))
2815 {
2816 use_zero_form = 1;
2817 break;
2818 }
2819 /* Fall through. */
2820 default:
2821 if (!REG_P (operands[3]))
2822 operands[3] = force_reg (<MODE>mode, operands[3]);
2823
2824 break;
2825 }
2826
2827 switch (code)
2828 {
2829 case LT:
2830 if (use_zero_form)
2831 {
2832 comparison = gen_aarch64_cmlt<mode>;
2833 break;
2834 }
2835 /* Fall through. */
2836 case UNLT:
2837 std::swap (operands[2], operands[3]);
2838 /* Fall through. */
2839 case UNGT:
2840 case GT:
2841 comparison = gen_aarch64_cmgt<mode>;
2842 break;
2843 case LE:
2844 if (use_zero_form)
2845 {
2846 comparison = gen_aarch64_cmle<mode>;
2847 break;
2848 }
2849 /* Fall through. */
2850 case UNLE:
2851 std::swap (operands[2], operands[3]);
2852 /* Fall through. */
2853 case UNGE:
2854 case GE:
2855 comparison = gen_aarch64_cmge<mode>;
2856 break;
2857 case NE:
2858 case EQ:
2859 comparison = gen_aarch64_cmeq<mode>;
2860 break;
2861 case UNEQ:
2862 case ORDERED:
2863 case UNORDERED:
2864 case LTGT:
2865 break;
2866 default:
2867 gcc_unreachable ();
2868 }
2869
2870 switch (code)
2871 {
2872 case UNGE:
2873 case UNGT:
2874 case UNLE:
2875 case UNLT:
2876 {
2877 /* All of the above must not raise any FP exceptions. Thus we first
2878 check each operand for NaNs and force any elements containing NaN to
2879 zero before using them in the compare.
2880 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2881 (cm<cc> (isnan (a) ? 0.0 : a,
2882 isnan (b) ? 0.0 : b))
2883 We use the following transformations for doing the comparisions:
2884 a UNGE b -> a GE b
2885 a UNGT b -> a GT b
2886 a UNLE b -> b GE a
2887 a UNLT b -> b GT a. */
2888
2889 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2890 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2891 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2892 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2893 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2894 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2895 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2896 lowpart_subreg (<V_INT_EQUIV>mode,
2897 operands[2],
2898 <MODE>mode)));
2899 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2900 lowpart_subreg (<V_INT_EQUIV>mode,
2901 operands[3],
2902 <MODE>mode)));
2903 gcc_assert (comparison != NULL);
2904 emit_insn (comparison (operands[0],
2905 lowpart_subreg (<MODE>mode,
2906 tmp0, <V_INT_EQUIV>mode),
2907 lowpart_subreg (<MODE>mode,
2908 tmp1, <V_INT_EQUIV>mode)));
2909 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2910 }
2911 break;
2912
2913 case LT:
2914 case LE:
2915 case GT:
2916 case GE:
2917 case EQ:
2918 case NE:
2919 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2920 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2921 a GE b -> a GE b
2922 a GT b -> a GT b
2923 a LE b -> b GE a
2924 a LT b -> b GT a
2925 a EQ b -> a EQ b
2926 a NE b -> ~(a EQ b) */
2927 gcc_assert (comparison != NULL);
2928 emit_insn (comparison (operands[0], operands[2], operands[3]));
2929 if (code == NE)
2930 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2931 break;
2932
2933 case LTGT:
2934 /* LTGT is not guranteed to not generate a FP exception. So let's
2935 go the faster way : ((a > b) || (b > a)). */
2936 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2937 operands[2], operands[3]));
2938 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2939 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2940 break;
2941
2942 case ORDERED:
2943 case UNORDERED:
2944 case UNEQ:
2945 /* cmeq (a, a) & cmeq (b, b). */
2946 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2947 operands[2], operands[2]));
2948 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2949 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2950
2951 if (code == UNORDERED)
2952 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2953 else if (code == UNEQ)
2954 {
2955 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2956 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2957 }
2958 break;
2959
2960 default:
2961 gcc_unreachable ();
2962 }
2963
2964 DONE;
2965 })
2966
2967 (define_expand "vec_cmpu<mode><mode>"
2968 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2969 (match_operator 1 "comparison_operator"
2970 [(match_operand:VSDQ_I_DI 2 "register_operand")
2971 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2972 "TARGET_SIMD"
2973 {
2974 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2975 operands[2], operands[3]));
2976 DONE;
2977 })
2978
2979 (define_expand "vcond<mode><mode>"
2980 [(set (match_operand:VALLDI 0 "register_operand")
2981 (if_then_else:VALLDI
2982 (match_operator 3 "comparison_operator"
2983 [(match_operand:VALLDI 4 "register_operand")
2984 (match_operand:VALLDI 5 "nonmemory_operand")])
2985 (match_operand:VALLDI 1 "nonmemory_operand")
2986 (match_operand:VALLDI 2 "nonmemory_operand")))]
2987 "TARGET_SIMD"
2988 {
2989 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2990 enum rtx_code code = GET_CODE (operands[3]);
2991
2992 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2993 it as well as switch operands 1/2 in order to avoid the additional
2994 NOT instruction. */
2995 if (code == NE)
2996 {
2997 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2998 operands[4], operands[5]);
2999 std::swap (operands[1], operands[2]);
3000 }
3001 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3002 operands[4], operands[5]));
3003 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3004 operands[2], mask));
3005
3006 DONE;
3007 })
3008
3009 (define_expand "vcond<v_cmp_mixed><mode>"
3010 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
3011 (if_then_else:<V_cmp_mixed>
3012 (match_operator 3 "comparison_operator"
3013 [(match_operand:VDQF_COND 4 "register_operand")
3014 (match_operand:VDQF_COND 5 "nonmemory_operand")])
3015 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
3016 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
3017 "TARGET_SIMD"
3018 {
3019 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3020 enum rtx_code code = GET_CODE (operands[3]);
3021
3022 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3023 it as well as switch operands 1/2 in order to avoid the additional
3024 NOT instruction. */
3025 if (code == NE)
3026 {
3027 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3028 operands[4], operands[5]);
3029 std::swap (operands[1], operands[2]);
3030 }
3031 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
3032 operands[4], operands[5]));
3033 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
3034 operands[0], operands[1],
3035 operands[2], mask));
3036
3037 DONE;
3038 })
3039
3040 (define_expand "vcondu<mode><mode>"
3041 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
3042 (if_then_else:VSDQ_I_DI
3043 (match_operator 3 "comparison_operator"
3044 [(match_operand:VSDQ_I_DI 4 "register_operand")
3045 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
3046 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
3047 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
3048 "TARGET_SIMD"
3049 {
3050 rtx mask = gen_reg_rtx (<MODE>mode);
3051 enum rtx_code code = GET_CODE (operands[3]);
3052
3053 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3054 it as well as switch operands 1/2 in order to avoid the additional
3055 NOT instruction. */
3056 if (code == NE)
3057 {
3058 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3059 operands[4], operands[5]);
3060 std::swap (operands[1], operands[2]);
3061 }
3062 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
3063 operands[4], operands[5]));
3064 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3065 operands[2], mask));
3066 DONE;
3067 })
3068
3069 (define_expand "vcondu<mode><v_cmp_mixed>"
3070 [(set (match_operand:VDQF 0 "register_operand")
3071 (if_then_else:VDQF
3072 (match_operator 3 "comparison_operator"
3073 [(match_operand:<V_cmp_mixed> 4 "register_operand")
3074 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
3075 (match_operand:VDQF 1 "nonmemory_operand")
3076 (match_operand:VDQF 2 "nonmemory_operand")))]
3077 "TARGET_SIMD"
3078 {
3079 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
3080 enum rtx_code code = GET_CODE (operands[3]);
3081
3082 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
3083 it as well as switch operands 1/2 in order to avoid the additional
3084 NOT instruction. */
3085 if (code == NE)
3086 {
3087 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
3088 operands[4], operands[5]);
3089 std::swap (operands[1], operands[2]);
3090 }
3091 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
3092 mask, operands[3],
3093 operands[4], operands[5]));
3094 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
3095 operands[2], mask));
3096 DONE;
3097 })
3098
3099 ;; Patterns for AArch64 SIMD Intrinsics.
3100
3101 ;; Lane extraction with sign extension to general purpose register.
3102 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
3103 [(set (match_operand:GPI 0 "register_operand" "=r")
3104 (sign_extend:GPI
3105 (vec_select:<VEL>
3106 (match_operand:VDQQH 1 "register_operand" "w")
3107 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3108 "TARGET_SIMD"
3109 {
3110 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3111 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
3112 }
3113 [(set_attr "type" "neon_to_gp<q>")]
3114 )
3115
3116 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
3117 [(set (match_operand:GPI 0 "register_operand" "=r")
3118 (zero_extend:GPI
3119 (vec_select:<VEL>
3120 (match_operand:VDQQH 1 "register_operand" "w")
3121 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3122 "TARGET_SIMD"
3123 {
3124 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3125 INTVAL (operands[2]));
3126 return "umov\\t%w0, %1.<Vetype>[%2]";
3127 }
3128 [(set_attr "type" "neon_to_gp<q>")]
3129 )
3130
3131 ;; Lane extraction of a value, neither sign nor zero extension
3132 ;; is guaranteed so upper bits should be considered undefined.
3133 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
3134 (define_insn "aarch64_get_lane<mode>"
3135 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
3136 (vec_select:<VEL>
3137 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
3138 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
3139 "TARGET_SIMD"
3140 {
3141 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
3142 switch (which_alternative)
3143 {
3144 case 0:
3145 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3146 case 1:
3147 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3148 case 2:
3149 return "st1\\t{%1.<Vetype>}[%2], %0";
3150 default:
3151 gcc_unreachable ();
3152 }
3153 }
3154 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
3155 )
3156
3157 (define_insn "load_pair_lanes<mode>"
3158 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3159 (vec_concat:<VDBL>
3160 (match_operand:VDC 1 "memory_operand" "Utq")
3161 (match_operand:VDC 2 "memory_operand" "m")))]
3162 "TARGET_SIMD && !STRICT_ALIGNMENT
3163 && rtx_equal_p (XEXP (operands[2], 0),
3164 plus_constant (Pmode,
3165 XEXP (operands[1], 0),
3166 GET_MODE_SIZE (<MODE>mode)))"
3167 "ldr\\t%q0, %1"
3168 [(set_attr "type" "neon_load1_1reg_q")]
3169 )
3170
3171 (define_insn "store_pair_lanes<mode>"
3172 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3173 (vec_concat:<VDBL>
3174 (match_operand:VDC 1 "register_operand" "w, r")
3175 (match_operand:VDC 2 "register_operand" "w, r")))]
3176 "TARGET_SIMD"
3177 "@
3178 stp\\t%d1, %d2, %y0
3179 stp\\t%x1, %x2, %y0"
3180 [(set_attr "type" "neon_stp, store_16")]
3181 )
3182
3183 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3184 ;; dest vector.
3185
3186 (define_insn "*aarch64_combinez<mode>"
3187 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3188 (vec_concat:<VDBL>
3189 (match_operand:VDC 1 "general_operand" "w,?r,m")
3190 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
3191 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
3192 "@
3193 mov\\t%0.8b, %1.8b
3194 fmov\t%d0, %1
3195 ldr\\t%d0, %1"
3196 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3197 (set_attr "arch" "simd,fp,simd")]
3198 )
3199
3200 (define_insn "*aarch64_combinez_be<mode>"
3201 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
3202 (vec_concat:<VDBL>
3203 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
3204 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
3205 "TARGET_SIMD && BYTES_BIG_ENDIAN"
3206 "@
3207 mov\\t%0.8b, %1.8b
3208 fmov\t%d0, %1
3209 ldr\\t%d0, %1"
3210 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
3211 (set_attr "arch" "simd,fp,simd")]
3212 )
3213
3214 (define_expand "aarch64_combine<mode>"
3215 [(match_operand:<VDBL> 0 "register_operand")
3216 (match_operand:VDC 1 "register_operand")
3217 (match_operand:VDC 2 "register_operand")]
3218 "TARGET_SIMD"
3219 {
3220 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
3221
3222 DONE;
3223 }
3224 )
3225
3226 (define_expand "@aarch64_simd_combine<mode>"
3227 [(match_operand:<VDBL> 0 "register_operand")
3228 (match_operand:VDC 1 "register_operand")
3229 (match_operand:VDC 2 "register_operand")]
3230 "TARGET_SIMD"
3231 {
3232 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
3233 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
3234 DONE;
3235 }
3236 [(set_attr "type" "multiple")]
3237 )
3238
3239 ;; <su><addsub>l<q>.
3240
3241 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
3242 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3243 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3244 (match_operand:VQW 1 "register_operand" "w")
3245 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3246 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3247 (match_operand:VQW 2 "register_operand" "w")
3248 (match_dup 3)))))]
3249 "TARGET_SIMD"
3250 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3251 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3252 )
3253
3254 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
3255 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3256 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3257 (match_operand:VQW 1 "register_operand" "w")
3258 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3259 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
3260 (match_operand:VQW 2 "register_operand" "w")
3261 (match_dup 3)))))]
3262 "TARGET_SIMD"
3263 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
3264 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3265 )
3266
3267
3268 (define_expand "aarch64_saddl2<mode>"
3269 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3270 (match_operand:VQW 1 "register_operand" "w")
3271 (match_operand:VQW 2 "register_operand" "w")]
3272 "TARGET_SIMD"
3273 {
3274 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3275 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3276 operands[2], p));
3277 DONE;
3278 })
3279
3280 (define_expand "aarch64_uaddl2<mode>"
3281 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3282 (match_operand:VQW 1 "register_operand" "w")
3283 (match_operand:VQW 2 "register_operand" "w")]
3284 "TARGET_SIMD"
3285 {
3286 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3287 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3288 operands[2], p));
3289 DONE;
3290 })
3291
3292 (define_expand "aarch64_ssubl2<mode>"
3293 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3294 (match_operand:VQW 1 "register_operand" "w")
3295 (match_operand:VQW 2 "register_operand" "w")]
3296 "TARGET_SIMD"
3297 {
3298 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3299 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3300 operands[2], p));
3301 DONE;
3302 })
3303
3304 (define_expand "aarch64_usubl2<mode>"
3305 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3306 (match_operand:VQW 1 "register_operand" "w")
3307 (match_operand:VQW 2 "register_operand" "w")]
3308 "TARGET_SIMD"
3309 {
3310 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3311 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3312 operands[2], p));
3313 DONE;
3314 })
3315
3316 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
3317 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3318 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
3319 (match_operand:VD_BHSI 1 "register_operand" "w"))
3320 (ANY_EXTEND:<VWIDE>
3321 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3322 "TARGET_SIMD"
3323 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
3324 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
3325 )
3326
3327 ;; <su><addsub>w<q>.
3328
3329 (define_expand "widen_ssum<mode>3"
3330 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3331 (plus:<VDBLW> (sign_extend:<VDBLW>
3332 (match_operand:VQW 1 "register_operand" ""))
3333 (match_operand:<VDBLW> 2 "register_operand" "")))]
3334 "TARGET_SIMD"
3335 {
3336 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3337 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3338
3339 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3340 operands[1], p));
3341 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3342 DONE;
3343 }
3344 )
3345
3346 (define_expand "widen_ssum<mode>3"
3347 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3348 (plus:<VWIDE> (sign_extend:<VWIDE>
3349 (match_operand:VD_BHSI 1 "register_operand" ""))
3350 (match_operand:<VWIDE> 2 "register_operand" "")))]
3351 "TARGET_SIMD"
3352 {
3353 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3354 DONE;
3355 })
3356
3357 (define_expand "widen_usum<mode>3"
3358 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3359 (plus:<VDBLW> (zero_extend:<VDBLW>
3360 (match_operand:VQW 1 "register_operand" ""))
3361 (match_operand:<VDBLW> 2 "register_operand" "")))]
3362 "TARGET_SIMD"
3363 {
3364 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3365 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3366
3367 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3368 operands[1], p));
3369 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3370 DONE;
3371 }
3372 )
3373
3374 (define_expand "widen_usum<mode>3"
3375 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3376 (plus:<VWIDE> (zero_extend:<VWIDE>
3377 (match_operand:VD_BHSI 1 "register_operand" ""))
3378 (match_operand:<VWIDE> 2 "register_operand" "")))]
3379 "TARGET_SIMD"
3380 {
3381 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3382 DONE;
3383 })
3384
3385 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3386 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3387 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3388 (ANY_EXTEND:<VWIDE>
3389 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3390 "TARGET_SIMD"
3391 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3392 [(set_attr "type" "neon_sub_widen")]
3393 )
3394
3395 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3396 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3397 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3398 (ANY_EXTEND:<VWIDE>
3399 (vec_select:<VHALF>
3400 (match_operand:VQW 2 "register_operand" "w")
3401 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3402 "TARGET_SIMD"
3403 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3404 [(set_attr "type" "neon_sub_widen")]
3405 )
3406
3407 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3408 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3409 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3410 (ANY_EXTEND:<VWIDE>
3411 (vec_select:<VHALF>
3412 (match_operand:VQW 2 "register_operand" "w")
3413 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3414 "TARGET_SIMD"
3415 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3416 [(set_attr "type" "neon_sub_widen")]
3417 )
3418
3419 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3420 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3421 (plus:<VWIDE>
3422 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3423 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3424 "TARGET_SIMD"
3425 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3426 [(set_attr "type" "neon_add_widen")]
3427 )
3428
3429 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3430 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3431 (plus:<VWIDE>
3432 (ANY_EXTEND:<VWIDE>
3433 (vec_select:<VHALF>
3434 (match_operand:VQW 2 "register_operand" "w")
3435 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3436 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3437 "TARGET_SIMD"
3438 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3439 [(set_attr "type" "neon_add_widen")]
3440 )
3441
3442 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3443 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3444 (plus:<VWIDE>
3445 (ANY_EXTEND:<VWIDE>
3446 (vec_select:<VHALF>
3447 (match_operand:VQW 2 "register_operand" "w")
3448 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3449 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3450 "TARGET_SIMD"
3451 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3452 [(set_attr "type" "neon_add_widen")]
3453 )
3454
3455 (define_expand "aarch64_saddw2<mode>"
3456 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3457 (match_operand:<VWIDE> 1 "register_operand" "w")
3458 (match_operand:VQW 2 "register_operand" "w")]
3459 "TARGET_SIMD"
3460 {
3461 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3462 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3463 operands[2], p));
3464 DONE;
3465 })
3466
3467 (define_expand "aarch64_uaddw2<mode>"
3468 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3469 (match_operand:<VWIDE> 1 "register_operand" "w")
3470 (match_operand:VQW 2 "register_operand" "w")]
3471 "TARGET_SIMD"
3472 {
3473 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3474 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3475 operands[2], p));
3476 DONE;
3477 })
3478
3479
3480 (define_expand "aarch64_ssubw2<mode>"
3481 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3482 (match_operand:<VWIDE> 1 "register_operand" "w")
3483 (match_operand:VQW 2 "register_operand" "w")]
3484 "TARGET_SIMD"
3485 {
3486 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3487 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3488 operands[2], p));
3489 DONE;
3490 })
3491
3492 (define_expand "aarch64_usubw2<mode>"
3493 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3494 (match_operand:<VWIDE> 1 "register_operand" "w")
3495 (match_operand:VQW 2 "register_operand" "w")]
3496 "TARGET_SIMD"
3497 {
3498 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3499 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3500 operands[2], p));
3501 DONE;
3502 })
3503
3504 ;; <su><r>h<addsub>.
3505
3506 (define_expand "<u>avg<mode>3_floor"
3507 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3508 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3509 (match_operand:VDQ_BHSI 2 "register_operand")]
3510 HADD))]
3511 "TARGET_SIMD"
3512 )
3513
3514 (define_expand "<u>avg<mode>3_ceil"
3515 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3516 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3517 (match_operand:VDQ_BHSI 2 "register_operand")]
3518 RHADD))]
3519 "TARGET_SIMD"
3520 )
3521
3522 (define_insn "aarch64_<sur>h<addsub><mode>"
3523 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3524 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3525 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3526 HADDSUB))]
3527 "TARGET_SIMD"
3528 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3529 [(set_attr "type" "neon_<addsub>_halve<q>")]
3530 )
3531
3532 ;; <r><addsub>hn<q>.
3533
3534 (define_insn "aarch64_<sur><addsub>hn<mode>"
3535 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3536 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3537 (match_operand:VQN 2 "register_operand" "w")]
3538 ADDSUBHN))]
3539 "TARGET_SIMD"
3540 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3541 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3542 )
3543
3544 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3545 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3546 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3547 (match_operand:VQN 2 "register_operand" "w")
3548 (match_operand:VQN 3 "register_operand" "w")]
3549 ADDSUBHN2))]
3550 "TARGET_SIMD"
3551 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3552 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3553 )
3554
3555 ;; pmul.
3556
3557 (define_insn "aarch64_pmul<mode>"
3558 [(set (match_operand:VB 0 "register_operand" "=w")
3559 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3560 (match_operand:VB 2 "register_operand" "w")]
3561 UNSPEC_PMUL))]
3562 "TARGET_SIMD"
3563 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3564 [(set_attr "type" "neon_mul_<Vetype><q>")]
3565 )
3566
3567 ;; fmulx.
3568
3569 (define_insn "aarch64_fmulx<mode>"
3570 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3571 (unspec:VHSDF_HSDF
3572 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3573 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3574 UNSPEC_FMULX))]
3575 "TARGET_SIMD"
3576 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3577 [(set_attr "type" "neon_fp_mul_<stype>")]
3578 )
3579
3580 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3581
3582 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3583 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3584 (unspec:VDQSF
3585 [(match_operand:VDQSF 1 "register_operand" "w")
3586 (vec_duplicate:VDQSF
3587 (vec_select:<VEL>
3588 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3589 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3590 UNSPEC_FMULX))]
3591 "TARGET_SIMD"
3592 {
3593 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3594 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3595 }
3596 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3597 )
3598
3599 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3600
3601 (define_insn "*aarch64_mulx_elt<mode>"
3602 [(set (match_operand:VDQF 0 "register_operand" "=w")
3603 (unspec:VDQF
3604 [(match_operand:VDQF 1 "register_operand" "w")
3605 (vec_duplicate:VDQF
3606 (vec_select:<VEL>
3607 (match_operand:VDQF 2 "register_operand" "w")
3608 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3609 UNSPEC_FMULX))]
3610 "TARGET_SIMD"
3611 {
3612 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3613 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3614 }
3615 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3616 )
3617
3618 ;; vmulxq_lane
3619
3620 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3621 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3622 (unspec:VHSDF
3623 [(match_operand:VHSDF 1 "register_operand" "w")
3624 (vec_duplicate:VHSDF
3625 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3626 UNSPEC_FMULX))]
3627 "TARGET_SIMD"
3628 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3629 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3630 )
3631
3632 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3633 ;; vmulxd_lane_f64 == vmulx_lane_f64
3634 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3635
3636 (define_insn "*aarch64_vgetfmulx<mode>"
3637 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3638 (unspec:<VEL>
3639 [(match_operand:<VEL> 1 "register_operand" "w")
3640 (vec_select:<VEL>
3641 (match_operand:VDQF 2 "register_operand" "w")
3642 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3643 UNSPEC_FMULX))]
3644 "TARGET_SIMD"
3645 {
3646 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3647 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3648 }
3649 [(set_attr "type" "fmul<Vetype>")]
3650 )
3651 ;; <su>q<addsub>
3652
3653 (define_insn "aarch64_<su_optab><optab><mode>"
3654 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3655 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3656 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3657 "TARGET_SIMD"
3658 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3659 [(set_attr "type" "neon_<optab><q>")]
3660 )
3661
3662 ;; suqadd and usqadd
3663
3664 (define_insn "aarch64_<sur>qadd<mode>"
3665 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3666 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3667 (match_operand:VSDQ_I 2 "register_operand" "w")]
3668 USSUQADD))]
3669 "TARGET_SIMD"
3670 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3671 [(set_attr "type" "neon_qadd<q>")]
3672 )
3673
3674 ;; sqmovun
3675
3676 (define_insn "aarch64_sqmovun<mode>"
3677 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3678 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3679 UNSPEC_SQXTUN))]
3680 "TARGET_SIMD"
3681 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3682 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3683 )
3684
3685 ;; sqmovn and uqmovn
3686
3687 (define_insn "aarch64_<sur>qmovn<mode>"
3688 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3689 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3690 SUQMOVN))]
3691 "TARGET_SIMD"
3692 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3693 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3694 )
3695
3696 ;; <su>q<absneg>
3697
3698 (define_insn "aarch64_s<optab><mode>"
3699 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3700 (UNQOPS:VSDQ_I
3701 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3702 "TARGET_SIMD"
3703 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3704 [(set_attr "type" "neon_<optab><q>")]
3705 )
3706
3707 ;; sq<r>dmulh.
3708
3709 (define_insn "aarch64_sq<r>dmulh<mode>"
3710 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3711 (unspec:VSDQ_HSI
3712 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3713 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3714 VQDMULH))]
3715 "TARGET_SIMD"
3716 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3717 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3718 )
3719
3720 ;; sq<r>dmulh_lane
3721
3722 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3723 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3724 (unspec:VDQHS
3725 [(match_operand:VDQHS 1 "register_operand" "w")
3726 (vec_select:<VEL>
3727 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3728 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3729 VQDMULH))]
3730 "TARGET_SIMD"
3731 "*
3732 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3733 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3734 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3735 )
3736
3737 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3738 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3739 (unspec:VDQHS
3740 [(match_operand:VDQHS 1 "register_operand" "w")
3741 (vec_select:<VEL>
3742 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3743 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3744 VQDMULH))]
3745 "TARGET_SIMD"
3746 "*
3747 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3748 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3749 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3750 )
3751
3752 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3753 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3754 (unspec:SD_HSI
3755 [(match_operand:SD_HSI 1 "register_operand" "w")
3756 (vec_select:<VEL>
3757 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3758 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3759 VQDMULH))]
3760 "TARGET_SIMD"
3761 "*
3762 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3763 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3764 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3765 )
3766
3767 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3768 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3769 (unspec:SD_HSI
3770 [(match_operand:SD_HSI 1 "register_operand" "w")
3771 (vec_select:<VEL>
3772 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3773 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3774 VQDMULH))]
3775 "TARGET_SIMD"
3776 "*
3777 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3778 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3779 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3780 )
3781
3782 ;; sqrdml[as]h.
3783
3784 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3785 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3786 (unspec:VSDQ_HSI
3787 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3788 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3789 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3790 SQRDMLH_AS))]
3791 "TARGET_SIMD_RDMA"
3792 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3793 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3794 )
3795
3796 ;; sqrdml[as]h_lane.
3797
3798 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3799 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3800 (unspec:VDQHS
3801 [(match_operand:VDQHS 1 "register_operand" "0")
3802 (match_operand:VDQHS 2 "register_operand" "w")
3803 (vec_select:<VEL>
3804 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3805 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3806 SQRDMLH_AS))]
3807 "TARGET_SIMD_RDMA"
3808 {
3809 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3810 return
3811 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3812 }
3813 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3814 )
3815
3816 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3817 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3818 (unspec:SD_HSI
3819 [(match_operand:SD_HSI 1 "register_operand" "0")
3820 (match_operand:SD_HSI 2 "register_operand" "w")
3821 (vec_select:<VEL>
3822 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3823 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3824 SQRDMLH_AS))]
3825 "TARGET_SIMD_RDMA"
3826 {
3827 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3828 return
3829 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3830 }
3831 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3832 )
3833
3834 ;; sqrdml[as]h_laneq.
3835
3836 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3837 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3838 (unspec:VDQHS
3839 [(match_operand:VDQHS 1 "register_operand" "0")
3840 (match_operand:VDQHS 2 "register_operand" "w")
3841 (vec_select:<VEL>
3842 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3843 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3844 SQRDMLH_AS))]
3845 "TARGET_SIMD_RDMA"
3846 {
3847 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3848 return
3849 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3850 }
3851 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3852 )
3853
3854 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3855 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3856 (unspec:SD_HSI
3857 [(match_operand:SD_HSI 1 "register_operand" "0")
3858 (match_operand:SD_HSI 2 "register_operand" "w")
3859 (vec_select:<VEL>
3860 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3861 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3862 SQRDMLH_AS))]
3863 "TARGET_SIMD_RDMA"
3864 {
3865 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3866 return
3867 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3868 }
3869 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3870 )
3871
3872 ;; vqdml[sa]l
3873
3874 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3875 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3876 (SBINQOPS:<VWIDE>
3877 (match_operand:<VWIDE> 1 "register_operand" "0")
3878 (ss_ashift:<VWIDE>
3879 (mult:<VWIDE>
3880 (sign_extend:<VWIDE>
3881 (match_operand:VSD_HSI 2 "register_operand" "w"))
3882 (sign_extend:<VWIDE>
3883 (match_operand:VSD_HSI 3 "register_operand" "w")))
3884 (const_int 1))))]
3885 "TARGET_SIMD"
3886 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3887 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3888 )
3889
3890 ;; vqdml[sa]l_lane
3891
3892 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3893 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3894 (SBINQOPS:<VWIDE>
3895 (match_operand:<VWIDE> 1 "register_operand" "0")
3896 (ss_ashift:<VWIDE>
3897 (mult:<VWIDE>
3898 (sign_extend:<VWIDE>
3899 (match_operand:VD_HSI 2 "register_operand" "w"))
3900 (sign_extend:<VWIDE>
3901 (vec_duplicate:VD_HSI
3902 (vec_select:<VEL>
3903 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3904 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3905 ))
3906 (const_int 1))))]
3907 "TARGET_SIMD"
3908 {
3909 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3910 return
3911 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3912 }
3913 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3914 )
3915
3916 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3917 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3918 (SBINQOPS:<VWIDE>
3919 (match_operand:<VWIDE> 1 "register_operand" "0")
3920 (ss_ashift:<VWIDE>
3921 (mult:<VWIDE>
3922 (sign_extend:<VWIDE>
3923 (match_operand:VD_HSI 2 "register_operand" "w"))
3924 (sign_extend:<VWIDE>
3925 (vec_duplicate:VD_HSI
3926 (vec_select:<VEL>
3927 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3928 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3929 ))
3930 (const_int 1))))]
3931 "TARGET_SIMD"
3932 {
3933 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3934 return
3935 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3936 }
3937 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3938 )
3939
3940 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3941 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3942 (SBINQOPS:<VWIDE>
3943 (match_operand:<VWIDE> 1 "register_operand" "0")
3944 (ss_ashift:<VWIDE>
3945 (mult:<VWIDE>
3946 (sign_extend:<VWIDE>
3947 (match_operand:SD_HSI 2 "register_operand" "w"))
3948 (sign_extend:<VWIDE>
3949 (vec_select:<VEL>
3950 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3951 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3952 )
3953 (const_int 1))))]
3954 "TARGET_SIMD"
3955 {
3956 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3957 return
3958 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3959 }
3960 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3961 )
3962
3963 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3964 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3965 (SBINQOPS:<VWIDE>
3966 (match_operand:<VWIDE> 1 "register_operand" "0")
3967 (ss_ashift:<VWIDE>
3968 (mult:<VWIDE>
3969 (sign_extend:<VWIDE>
3970 (match_operand:SD_HSI 2 "register_operand" "w"))
3971 (sign_extend:<VWIDE>
3972 (vec_select:<VEL>
3973 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3974 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3975 )
3976 (const_int 1))))]
3977 "TARGET_SIMD"
3978 {
3979 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3980 return
3981 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3982 }
3983 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3984 )
3985
3986 ;; vqdml[sa]l_n
3987
3988 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3989 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3990 (SBINQOPS:<VWIDE>
3991 (match_operand:<VWIDE> 1 "register_operand" "0")
3992 (ss_ashift:<VWIDE>
3993 (mult:<VWIDE>
3994 (sign_extend:<VWIDE>
3995 (match_operand:VD_HSI 2 "register_operand" "w"))
3996 (sign_extend:<VWIDE>
3997 (vec_duplicate:VD_HSI
3998 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3999 (const_int 1))))]
4000 "TARGET_SIMD"
4001 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4002 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4003 )
4004
4005 ;; sqdml[as]l2
4006
4007 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
4008 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4009 (SBINQOPS:<VWIDE>
4010 (match_operand:<VWIDE> 1 "register_operand" "0")
4011 (ss_ashift:<VWIDE>
4012 (mult:<VWIDE>
4013 (sign_extend:<VWIDE>
4014 (vec_select:<VHALF>
4015 (match_operand:VQ_HSI 2 "register_operand" "w")
4016 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4017 (sign_extend:<VWIDE>
4018 (vec_select:<VHALF>
4019 (match_operand:VQ_HSI 3 "register_operand" "w")
4020 (match_dup 4))))
4021 (const_int 1))))]
4022 "TARGET_SIMD"
4023 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
4024 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4025 )
4026
4027 (define_expand "aarch64_sqdmlal2<mode>"
4028 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4029 (match_operand:<VWIDE> 1 "register_operand" "w")
4030 (match_operand:VQ_HSI 2 "register_operand" "w")
4031 (match_operand:VQ_HSI 3 "register_operand" "w")]
4032 "TARGET_SIMD"
4033 {
4034 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4035 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
4036 operands[2], operands[3], p));
4037 DONE;
4038 })
4039
4040 (define_expand "aarch64_sqdmlsl2<mode>"
4041 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4042 (match_operand:<VWIDE> 1 "register_operand" "w")
4043 (match_operand:VQ_HSI 2 "register_operand" "w")
4044 (match_operand:VQ_HSI 3 "register_operand" "w")]
4045 "TARGET_SIMD"
4046 {
4047 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4048 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
4049 operands[2], operands[3], p));
4050 DONE;
4051 })
4052
4053 ;; vqdml[sa]l2_lane
4054
4055 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
4056 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4057 (SBINQOPS:<VWIDE>
4058 (match_operand:<VWIDE> 1 "register_operand" "0")
4059 (ss_ashift:<VWIDE>
4060 (mult:<VWIDE>
4061 (sign_extend:<VWIDE>
4062 (vec_select:<VHALF>
4063 (match_operand:VQ_HSI 2 "register_operand" "w")
4064 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4065 (sign_extend:<VWIDE>
4066 (vec_duplicate:<VHALF>
4067 (vec_select:<VEL>
4068 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4069 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4070 ))))
4071 (const_int 1))))]
4072 "TARGET_SIMD"
4073 {
4074 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
4075 return
4076 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4077 }
4078 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4079 )
4080
4081 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
4082 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4083 (SBINQOPS:<VWIDE>
4084 (match_operand:<VWIDE> 1 "register_operand" "0")
4085 (ss_ashift:<VWIDE>
4086 (mult:<VWIDE>
4087 (sign_extend:<VWIDE>
4088 (vec_select:<VHALF>
4089 (match_operand:VQ_HSI 2 "register_operand" "w")
4090 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
4091 (sign_extend:<VWIDE>
4092 (vec_duplicate:<VHALF>
4093 (vec_select:<VEL>
4094 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4095 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
4096 ))))
4097 (const_int 1))))]
4098 "TARGET_SIMD"
4099 {
4100 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
4101 return
4102 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
4103 }
4104 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4105 )
4106
4107 (define_expand "aarch64_sqdmlal2_lane<mode>"
4108 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4109 (match_operand:<VWIDE> 1 "register_operand" "w")
4110 (match_operand:VQ_HSI 2 "register_operand" "w")
4111 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4112 (match_operand:SI 4 "immediate_operand" "i")]
4113 "TARGET_SIMD"
4114 {
4115 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4116 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
4117 operands[2], operands[3],
4118 operands[4], p));
4119 DONE;
4120 })
4121
4122 (define_expand "aarch64_sqdmlal2_laneq<mode>"
4123 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4124 (match_operand:<VWIDE> 1 "register_operand" "w")
4125 (match_operand:VQ_HSI 2 "register_operand" "w")
4126 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4127 (match_operand:SI 4 "immediate_operand" "i")]
4128 "TARGET_SIMD"
4129 {
4130 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4131 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
4132 operands[2], operands[3],
4133 operands[4], p));
4134 DONE;
4135 })
4136
4137 (define_expand "aarch64_sqdmlsl2_lane<mode>"
4138 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4139 (match_operand:<VWIDE> 1 "register_operand" "w")
4140 (match_operand:VQ_HSI 2 "register_operand" "w")
4141 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
4142 (match_operand:SI 4 "immediate_operand" "i")]
4143 "TARGET_SIMD"
4144 {
4145 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4146 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
4147 operands[2], operands[3],
4148 operands[4], p));
4149 DONE;
4150 })
4151
4152 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
4153 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4154 (match_operand:<VWIDE> 1 "register_operand" "w")
4155 (match_operand:VQ_HSI 2 "register_operand" "w")
4156 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
4157 (match_operand:SI 4 "immediate_operand" "i")]
4158 "TARGET_SIMD"
4159 {
4160 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4161 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
4162 operands[2], operands[3],
4163 operands[4], p));
4164 DONE;
4165 })
4166
4167 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
4168 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4169 (SBINQOPS:<VWIDE>
4170 (match_operand:<VWIDE> 1 "register_operand" "0")
4171 (ss_ashift:<VWIDE>
4172 (mult:<VWIDE>
4173 (sign_extend:<VWIDE>
4174 (vec_select:<VHALF>
4175 (match_operand:VQ_HSI 2 "register_operand" "w")
4176 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4177 (sign_extend:<VWIDE>
4178 (vec_duplicate:<VHALF>
4179 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
4180 (const_int 1))))]
4181 "TARGET_SIMD"
4182 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
4183 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
4184 )
4185
4186 (define_expand "aarch64_sqdmlal2_n<mode>"
4187 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4188 (match_operand:<VWIDE> 1 "register_operand" "w")
4189 (match_operand:VQ_HSI 2 "register_operand" "w")
4190 (match_operand:<VEL> 3 "register_operand" "w")]
4191 "TARGET_SIMD"
4192 {
4193 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4194 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
4195 operands[2], operands[3],
4196 p));
4197 DONE;
4198 })
4199
4200 (define_expand "aarch64_sqdmlsl2_n<mode>"
4201 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4202 (match_operand:<VWIDE> 1 "register_operand" "w")
4203 (match_operand:VQ_HSI 2 "register_operand" "w")
4204 (match_operand:<VEL> 3 "register_operand" "w")]
4205 "TARGET_SIMD"
4206 {
4207 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4208 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
4209 operands[2], operands[3],
4210 p));
4211 DONE;
4212 })
4213
4214 ;; vqdmull
4215
4216 (define_insn "aarch64_sqdmull<mode>"
4217 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4218 (ss_ashift:<VWIDE>
4219 (mult:<VWIDE>
4220 (sign_extend:<VWIDE>
4221 (match_operand:VSD_HSI 1 "register_operand" "w"))
4222 (sign_extend:<VWIDE>
4223 (match_operand:VSD_HSI 2 "register_operand" "w")))
4224 (const_int 1)))]
4225 "TARGET_SIMD"
4226 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4227 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
4228 )
4229
4230 ;; vqdmull_lane
4231
4232 (define_insn "aarch64_sqdmull_lane<mode>"
4233 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4234 (ss_ashift:<VWIDE>
4235 (mult:<VWIDE>
4236 (sign_extend:<VWIDE>
4237 (match_operand:VD_HSI 1 "register_operand" "w"))
4238 (sign_extend:<VWIDE>
4239 (vec_duplicate:VD_HSI
4240 (vec_select:<VEL>
4241 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4242 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4243 ))
4244 (const_int 1)))]
4245 "TARGET_SIMD"
4246 {
4247 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4248 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4249 }
4250 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4251 )
4252
4253 (define_insn "aarch64_sqdmull_laneq<mode>"
4254 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4255 (ss_ashift:<VWIDE>
4256 (mult:<VWIDE>
4257 (sign_extend:<VWIDE>
4258 (match_operand:VD_HSI 1 "register_operand" "w"))
4259 (sign_extend:<VWIDE>
4260 (vec_duplicate:VD_HSI
4261 (vec_select:<VEL>
4262 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4263 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4264 ))
4265 (const_int 1)))]
4266 "TARGET_SIMD"
4267 {
4268 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4269 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4270 }
4271 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4272 )
4273
4274 (define_insn "aarch64_sqdmull_lane<mode>"
4275 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4276 (ss_ashift:<VWIDE>
4277 (mult:<VWIDE>
4278 (sign_extend:<VWIDE>
4279 (match_operand:SD_HSI 1 "register_operand" "w"))
4280 (sign_extend:<VWIDE>
4281 (vec_select:<VEL>
4282 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4283 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4284 ))
4285 (const_int 1)))]
4286 "TARGET_SIMD"
4287 {
4288 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4289 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4290 }
4291 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4292 )
4293
4294 (define_insn "aarch64_sqdmull_laneq<mode>"
4295 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4296 (ss_ashift:<VWIDE>
4297 (mult:<VWIDE>
4298 (sign_extend:<VWIDE>
4299 (match_operand:SD_HSI 1 "register_operand" "w"))
4300 (sign_extend:<VWIDE>
4301 (vec_select:<VEL>
4302 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4303 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4304 ))
4305 (const_int 1)))]
4306 "TARGET_SIMD"
4307 {
4308 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4309 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4310 }
4311 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4312 )
4313
4314 ;; vqdmull_n
4315
4316 (define_insn "aarch64_sqdmull_n<mode>"
4317 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4318 (ss_ashift:<VWIDE>
4319 (mult:<VWIDE>
4320 (sign_extend:<VWIDE>
4321 (match_operand:VD_HSI 1 "register_operand" "w"))
4322 (sign_extend:<VWIDE>
4323 (vec_duplicate:VD_HSI
4324 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4325 )
4326 (const_int 1)))]
4327 "TARGET_SIMD"
4328 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4329 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4330 )
4331
4332 ;; vqdmull2
4333
4334
4335
4336 (define_insn "aarch64_sqdmull2<mode>_internal"
4337 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4338 (ss_ashift:<VWIDE>
4339 (mult:<VWIDE>
4340 (sign_extend:<VWIDE>
4341 (vec_select:<VHALF>
4342 (match_operand:VQ_HSI 1 "register_operand" "w")
4343 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4344 (sign_extend:<VWIDE>
4345 (vec_select:<VHALF>
4346 (match_operand:VQ_HSI 2 "register_operand" "w")
4347 (match_dup 3)))
4348 )
4349 (const_int 1)))]
4350 "TARGET_SIMD"
4351 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4352 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4353 )
4354
4355 (define_expand "aarch64_sqdmull2<mode>"
4356 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4357 (match_operand:VQ_HSI 1 "register_operand" "w")
4358 (match_operand:VQ_HSI 2 "register_operand" "w")]
4359 "TARGET_SIMD"
4360 {
4361 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4362 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4363 operands[2], p));
4364 DONE;
4365 })
4366
4367 ;; vqdmull2_lane
4368
4369 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
4370 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4371 (ss_ashift:<VWIDE>
4372 (mult:<VWIDE>
4373 (sign_extend:<VWIDE>
4374 (vec_select:<VHALF>
4375 (match_operand:VQ_HSI 1 "register_operand" "w")
4376 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4377 (sign_extend:<VWIDE>
4378 (vec_duplicate:<VHALF>
4379 (vec_select:<VEL>
4380 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4381 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4382 ))
4383 (const_int 1)))]
4384 "TARGET_SIMD"
4385 {
4386 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4387 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4388 }
4389 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4390 )
4391
4392 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
4393 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4394 (ss_ashift:<VWIDE>
4395 (mult:<VWIDE>
4396 (sign_extend:<VWIDE>
4397 (vec_select:<VHALF>
4398 (match_operand:VQ_HSI 1 "register_operand" "w")
4399 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4400 (sign_extend:<VWIDE>
4401 (vec_duplicate:<VHALF>
4402 (vec_select:<VEL>
4403 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4404 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4405 ))
4406 (const_int 1)))]
4407 "TARGET_SIMD"
4408 {
4409 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4410 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4411 }
4412 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4413 )
4414
4415 (define_expand "aarch64_sqdmull2_lane<mode>"
4416 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4417 (match_operand:VQ_HSI 1 "register_operand" "w")
4418 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4419 (match_operand:SI 3 "immediate_operand" "i")]
4420 "TARGET_SIMD"
4421 {
4422 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4423 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4424 operands[2], operands[3],
4425 p));
4426 DONE;
4427 })
4428
4429 (define_expand "aarch64_sqdmull2_laneq<mode>"
4430 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4431 (match_operand:VQ_HSI 1 "register_operand" "w")
4432 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4433 (match_operand:SI 3 "immediate_operand" "i")]
4434 "TARGET_SIMD"
4435 {
4436 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4437 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4438 operands[2], operands[3],
4439 p));
4440 DONE;
4441 })
4442
4443 ;; vqdmull2_n
4444
4445 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4446 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4447 (ss_ashift:<VWIDE>
4448 (mult:<VWIDE>
4449 (sign_extend:<VWIDE>
4450 (vec_select:<VHALF>
4451 (match_operand:VQ_HSI 1 "register_operand" "w")
4452 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4453 (sign_extend:<VWIDE>
4454 (vec_duplicate:<VHALF>
4455 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4456 )
4457 (const_int 1)))]
4458 "TARGET_SIMD"
4459 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4460 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4461 )
4462
4463 (define_expand "aarch64_sqdmull2_n<mode>"
4464 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4465 (match_operand:VQ_HSI 1 "register_operand" "w")
4466 (match_operand:<VEL> 2 "register_operand" "w")]
4467 "TARGET_SIMD"
4468 {
4469 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4470 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4471 operands[2], p));
4472 DONE;
4473 })
4474
4475 ;; vshl
4476
4477 (define_insn "aarch64_<sur>shl<mode>"
4478 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4479 (unspec:VSDQ_I_DI
4480 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4481 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4482 VSHL))]
4483 "TARGET_SIMD"
4484 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4485 [(set_attr "type" "neon_shift_reg<q>")]
4486 )
4487
4488
4489 ;; vqshl
4490
4491 (define_insn "aarch64_<sur>q<r>shl<mode>"
4492 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4493 (unspec:VSDQ_I
4494 [(match_operand:VSDQ_I 1 "register_operand" "w")
4495 (match_operand:VSDQ_I 2 "register_operand" "w")]
4496 VQSHL))]
4497 "TARGET_SIMD"
4498 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4499 [(set_attr "type" "neon_sat_shift_reg<q>")]
4500 )
4501
4502 ;; vshll_n
4503
4504 (define_insn "aarch64_<sur>shll_n<mode>"
4505 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4506 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4507 (match_operand:SI 2
4508 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4509 VSHLL))]
4510 "TARGET_SIMD"
4511 {
4512 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4513 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4514 else
4515 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4516 }
4517 [(set_attr "type" "neon_shift_imm_long")]
4518 )
4519
4520 ;; vshll_high_n
4521
4522 (define_insn "aarch64_<sur>shll2_n<mode>"
4523 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4524 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4525 (match_operand:SI 2 "immediate_operand" "i")]
4526 VSHLL))]
4527 "TARGET_SIMD"
4528 {
4529 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4530 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4531 else
4532 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4533 }
4534 [(set_attr "type" "neon_shift_imm_long")]
4535 )
4536
4537 ;; vrshr_n
4538
4539 (define_insn "aarch64_<sur>shr_n<mode>"
4540 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4541 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4542 (match_operand:SI 2
4543 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4544 VRSHR_N))]
4545 "TARGET_SIMD"
4546 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4547 [(set_attr "type" "neon_sat_shift_imm<q>")]
4548 )
4549
4550 ;; v(r)sra_n
4551
4552 (define_insn "aarch64_<sur>sra_n<mode>"
4553 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4554 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4555 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4556 (match_operand:SI 3
4557 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4558 VSRA))]
4559 "TARGET_SIMD"
4560 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4561 [(set_attr "type" "neon_shift_acc<q>")]
4562 )
4563
4564 ;; vs<lr>i_n
4565
4566 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4567 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4568 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4569 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4570 (match_operand:SI 3
4571 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4572 VSLRI))]
4573 "TARGET_SIMD"
4574 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4575 [(set_attr "type" "neon_shift_imm<q>")]
4576 )
4577
4578 ;; vqshl(u)
4579
4580 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4581 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4582 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4583 (match_operand:SI 2
4584 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4585 VQSHL_N))]
4586 "TARGET_SIMD"
4587 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4588 [(set_attr "type" "neon_sat_shift_imm<q>")]
4589 )
4590
4591
4592 ;; vq(r)shr(u)n_n
4593
4594 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4595 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4596 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4597 (match_operand:SI 2
4598 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4599 VQSHRN_N))]
4600 "TARGET_SIMD"
4601 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4602 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4603 )
4604
4605
4606 ;; cm(eq|ge|gt|lt|le)
4607 ;; Note, we have constraints for Dz and Z as different expanders
4608 ;; have different ideas of what should be passed to this pattern.
4609
4610 (define_insn "aarch64_cm<optab><mode>"
4611 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4612 (neg:<V_INT_EQUIV>
4613 (COMPARISONS:<V_INT_EQUIV>
4614 (match_operand:VDQ_I 1 "register_operand" "w,w")
4615 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4616 )))]
4617 "TARGET_SIMD"
4618 "@
4619 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4620 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4621 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4622 )
4623
4624 (define_insn_and_split "aarch64_cm<optab>di"
4625 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4626 (neg:DI
4627 (COMPARISONS:DI
4628 (match_operand:DI 1 "register_operand" "w,w,r")
4629 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4630 )))
4631 (clobber (reg:CC CC_REGNUM))]
4632 "TARGET_SIMD"
4633 "#"
4634 "&& reload_completed"
4635 [(set (match_operand:DI 0 "register_operand")
4636 (neg:DI
4637 (COMPARISONS:DI
4638 (match_operand:DI 1 "register_operand")
4639 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4640 )))]
4641 {
4642 /* If we are in the general purpose register file,
4643 we split to a sequence of comparison and store. */
4644 if (GP_REGNUM_P (REGNO (operands[0]))
4645 && GP_REGNUM_P (REGNO (operands[1])))
4646 {
4647 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4648 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4649 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4650 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4651 DONE;
4652 }
4653 /* Otherwise, we expand to a similar pattern which does not
4654 clobber CC_REGNUM. */
4655 }
4656 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4657 )
4658
4659 (define_insn "*aarch64_cm<optab>di"
4660 [(set (match_operand:DI 0 "register_operand" "=w,w")
4661 (neg:DI
4662 (COMPARISONS:DI
4663 (match_operand:DI 1 "register_operand" "w,w")
4664 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4665 )))]
4666 "TARGET_SIMD && reload_completed"
4667 "@
4668 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4669 cm<optab>\t%d0, %d1, #0"
4670 [(set_attr "type" "neon_compare, neon_compare_zero")]
4671 )
4672
4673 ;; cm(hs|hi)
4674
4675 (define_insn "aarch64_cm<optab><mode>"
4676 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4677 (neg:<V_INT_EQUIV>
4678 (UCOMPARISONS:<V_INT_EQUIV>
4679 (match_operand:VDQ_I 1 "register_operand" "w")
4680 (match_operand:VDQ_I 2 "register_operand" "w")
4681 )))]
4682 "TARGET_SIMD"
4683 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4684 [(set_attr "type" "neon_compare<q>")]
4685 )
4686
4687 (define_insn_and_split "aarch64_cm<optab>di"
4688 [(set (match_operand:DI 0 "register_operand" "=w,r")
4689 (neg:DI
4690 (UCOMPARISONS:DI
4691 (match_operand:DI 1 "register_operand" "w,r")
4692 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4693 )))
4694 (clobber (reg:CC CC_REGNUM))]
4695 "TARGET_SIMD"
4696 "#"
4697 "&& reload_completed"
4698 [(set (match_operand:DI 0 "register_operand")
4699 (neg:DI
4700 (UCOMPARISONS:DI
4701 (match_operand:DI 1 "register_operand")
4702 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4703 )))]
4704 {
4705 /* If we are in the general purpose register file,
4706 we split to a sequence of comparison and store. */
4707 if (GP_REGNUM_P (REGNO (operands[0]))
4708 && GP_REGNUM_P (REGNO (operands[1])))
4709 {
4710 machine_mode mode = CCmode;
4711 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4712 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4713 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4714 DONE;
4715 }
4716 /* Otherwise, we expand to a similar pattern which does not
4717 clobber CC_REGNUM. */
4718 }
4719 [(set_attr "type" "neon_compare,multiple")]
4720 )
4721
4722 (define_insn "*aarch64_cm<optab>di"
4723 [(set (match_operand:DI 0 "register_operand" "=w")
4724 (neg:DI
4725 (UCOMPARISONS:DI
4726 (match_operand:DI 1 "register_operand" "w")
4727 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4728 )))]
4729 "TARGET_SIMD && reload_completed"
4730 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4731 [(set_attr "type" "neon_compare")]
4732 )
4733
4734 ;; cmtst
4735
4736 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4737 ;; we don't have any insns using ne, and aarch64_vcond outputs
4738 ;; not (neg (eq (and x y) 0))
4739 ;; which is rewritten by simplify_rtx as
4740 ;; plus (eq (and x y) 0) -1.
4741
4742 (define_insn "aarch64_cmtst<mode>"
4743 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4744 (plus:<V_INT_EQUIV>
4745 (eq:<V_INT_EQUIV>
4746 (and:VDQ_I
4747 (match_operand:VDQ_I 1 "register_operand" "w")
4748 (match_operand:VDQ_I 2 "register_operand" "w"))
4749 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4750 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4751 ]
4752 "TARGET_SIMD"
4753 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4754 [(set_attr "type" "neon_tst<q>")]
4755 )
4756
4757 (define_insn_and_split "aarch64_cmtstdi"
4758 [(set (match_operand:DI 0 "register_operand" "=w,r")
4759 (neg:DI
4760 (ne:DI
4761 (and:DI
4762 (match_operand:DI 1 "register_operand" "w,r")
4763 (match_operand:DI 2 "register_operand" "w,r"))
4764 (const_int 0))))
4765 (clobber (reg:CC CC_REGNUM))]
4766 "TARGET_SIMD"
4767 "#"
4768 "&& reload_completed"
4769 [(set (match_operand:DI 0 "register_operand")
4770 (neg:DI
4771 (ne:DI
4772 (and:DI
4773 (match_operand:DI 1 "register_operand")
4774 (match_operand:DI 2 "register_operand"))
4775 (const_int 0))))]
4776 {
4777 /* If we are in the general purpose register file,
4778 we split to a sequence of comparison and store. */
4779 if (GP_REGNUM_P (REGNO (operands[0]))
4780 && GP_REGNUM_P (REGNO (operands[1])))
4781 {
4782 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4783 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4784 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4785 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4786 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4787 DONE;
4788 }
4789 /* Otherwise, we expand to a similar pattern which does not
4790 clobber CC_REGNUM. */
4791 }
4792 [(set_attr "type" "neon_tst,multiple")]
4793 )
4794
4795 (define_insn "*aarch64_cmtstdi"
4796 [(set (match_operand:DI 0 "register_operand" "=w")
4797 (neg:DI
4798 (ne:DI
4799 (and:DI
4800 (match_operand:DI 1 "register_operand" "w")
4801 (match_operand:DI 2 "register_operand" "w"))
4802 (const_int 0))))]
4803 "TARGET_SIMD"
4804 "cmtst\t%d0, %d1, %d2"
4805 [(set_attr "type" "neon_tst")]
4806 )
4807
4808 ;; fcm(eq|ge|gt|le|lt)
4809
4810 (define_insn "aarch64_cm<optab><mode>"
4811 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4812 (neg:<V_INT_EQUIV>
4813 (COMPARISONS:<V_INT_EQUIV>
4814 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4815 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4816 )))]
4817 "TARGET_SIMD"
4818 "@
4819 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4820 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4821 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4822 )
4823
4824 ;; fac(ge|gt)
4825 ;; Note we can also handle what would be fac(le|lt) by
4826 ;; generating fac(ge|gt).
4827
4828 (define_insn "aarch64_fac<optab><mode>"
4829 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4830 (neg:<V_INT_EQUIV>
4831 (FAC_COMPARISONS:<V_INT_EQUIV>
4832 (abs:VHSDF_HSDF
4833 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4834 (abs:VHSDF_HSDF
4835 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4836 )))]
4837 "TARGET_SIMD"
4838 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4839 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4840 )
4841
4842 ;; addp
4843
4844 (define_insn "aarch64_addp<mode>"
4845 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4846 (unspec:VD_BHSI
4847 [(match_operand:VD_BHSI 1 "register_operand" "w")
4848 (match_operand:VD_BHSI 2 "register_operand" "w")]
4849 UNSPEC_ADDP))]
4850 "TARGET_SIMD"
4851 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4852 [(set_attr "type" "neon_reduc_add<q>")]
4853 )
4854
4855 (define_insn "aarch64_addpdi"
4856 [(set (match_operand:DI 0 "register_operand" "=w")
4857 (unspec:DI
4858 [(match_operand:V2DI 1 "register_operand" "w")]
4859 UNSPEC_ADDP))]
4860 "TARGET_SIMD"
4861 "addp\t%d0, %1.2d"
4862 [(set_attr "type" "neon_reduc_add")]
4863 )
4864
4865 ;; sqrt
4866
4867 (define_expand "sqrt<mode>2"
4868 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4869 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4870 "TARGET_SIMD"
4871 {
4872 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4873 DONE;
4874 })
4875
4876 (define_insn "*sqrt<mode>2"
4877 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4878 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4879 "TARGET_SIMD"
4880 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4881 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4882 )
4883
4884 ;; Patterns for vector struct loads and stores.
4885
4886 (define_insn "aarch64_simd_ld2<mode>"
4887 [(set (match_operand:OI 0 "register_operand" "=w")
4888 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4889 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4890 UNSPEC_LD2))]
4891 "TARGET_SIMD"
4892 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4893 [(set_attr "type" "neon_load2_2reg<q>")]
4894 )
4895
4896 (define_insn "aarch64_simd_ld2r<mode>"
4897 [(set (match_operand:OI 0 "register_operand" "=w")
4898 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4899 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4900 UNSPEC_LD2_DUP))]
4901 "TARGET_SIMD"
4902 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4903 [(set_attr "type" "neon_load2_all_lanes<q>")]
4904 )
4905
4906 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4907 [(set (match_operand:OI 0 "register_operand" "=w")
4908 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4909 (match_operand:OI 2 "register_operand" "0")
4910 (match_operand:SI 3 "immediate_operand" "i")
4911 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4912 UNSPEC_LD2_LANE))]
4913 "TARGET_SIMD"
4914 {
4915 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4916 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4917 }
4918 [(set_attr "type" "neon_load2_one_lane")]
4919 )
4920
4921 (define_expand "vec_load_lanesoi<mode>"
4922 [(set (match_operand:OI 0 "register_operand" "=w")
4923 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4924 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4925 UNSPEC_LD2))]
4926 "TARGET_SIMD"
4927 {
4928 if (BYTES_BIG_ENDIAN)
4929 {
4930 rtx tmp = gen_reg_rtx (OImode);
4931 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4932 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4933 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4934 }
4935 else
4936 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4937 DONE;
4938 })
4939
4940 (define_insn "aarch64_simd_st2<mode>"
4941 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4942 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4943 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4944 UNSPEC_ST2))]
4945 "TARGET_SIMD"
4946 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4947 [(set_attr "type" "neon_store2_2reg<q>")]
4948 )
4949
4950 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4951 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4952 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4953 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4954 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4955 (match_operand:SI 2 "immediate_operand" "i")]
4956 UNSPEC_ST2_LANE))]
4957 "TARGET_SIMD"
4958 {
4959 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4960 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4961 }
4962 [(set_attr "type" "neon_store2_one_lane<q>")]
4963 )
4964
4965 (define_expand "vec_store_lanesoi<mode>"
4966 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4967 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4968 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4969 UNSPEC_ST2))]
4970 "TARGET_SIMD"
4971 {
4972 if (BYTES_BIG_ENDIAN)
4973 {
4974 rtx tmp = gen_reg_rtx (OImode);
4975 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4976 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4977 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4978 }
4979 else
4980 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4981 DONE;
4982 })
4983
4984 (define_insn "aarch64_simd_ld3<mode>"
4985 [(set (match_operand:CI 0 "register_operand" "=w")
4986 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4987 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4988 UNSPEC_LD3))]
4989 "TARGET_SIMD"
4990 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4991 [(set_attr "type" "neon_load3_3reg<q>")]
4992 )
4993
4994 (define_insn "aarch64_simd_ld3r<mode>"
4995 [(set (match_operand:CI 0 "register_operand" "=w")
4996 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4997 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4998 UNSPEC_LD3_DUP))]
4999 "TARGET_SIMD"
5000 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5001 [(set_attr "type" "neon_load3_all_lanes<q>")]
5002 )
5003
5004 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
5005 [(set (match_operand:CI 0 "register_operand" "=w")
5006 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5007 (match_operand:CI 2 "register_operand" "0")
5008 (match_operand:SI 3 "immediate_operand" "i")
5009 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5010 UNSPEC_LD3_LANE))]
5011 "TARGET_SIMD"
5012 {
5013 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5014 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
5015 }
5016 [(set_attr "type" "neon_load3_one_lane")]
5017 )
5018
5019 (define_expand "vec_load_lanesci<mode>"
5020 [(set (match_operand:CI 0 "register_operand" "=w")
5021 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5022 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5023 UNSPEC_LD3))]
5024 "TARGET_SIMD"
5025 {
5026 if (BYTES_BIG_ENDIAN)
5027 {
5028 rtx tmp = gen_reg_rtx (CImode);
5029 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5030 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
5031 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
5032 }
5033 else
5034 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
5035 DONE;
5036 })
5037
5038 (define_insn "aarch64_simd_st3<mode>"
5039 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5040 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5041 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5042 UNSPEC_ST3))]
5043 "TARGET_SIMD"
5044 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5045 [(set_attr "type" "neon_store3_3reg<q>")]
5046 )
5047
5048 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5049 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
5050 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5051 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5052 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5053 (match_operand:SI 2 "immediate_operand" "i")]
5054 UNSPEC_ST3_LANE))]
5055 "TARGET_SIMD"
5056 {
5057 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5058 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
5059 }
5060 [(set_attr "type" "neon_store3_one_lane<q>")]
5061 )
5062
5063 (define_expand "vec_store_lanesci<mode>"
5064 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5065 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
5066 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5067 UNSPEC_ST3))]
5068 "TARGET_SIMD"
5069 {
5070 if (BYTES_BIG_ENDIAN)
5071 {
5072 rtx tmp = gen_reg_rtx (CImode);
5073 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5074 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
5075 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
5076 }
5077 else
5078 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
5079 DONE;
5080 })
5081
5082 (define_insn "aarch64_simd_ld4<mode>"
5083 [(set (match_operand:XI 0 "register_operand" "=w")
5084 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5085 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5086 UNSPEC_LD4))]
5087 "TARGET_SIMD"
5088 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5089 [(set_attr "type" "neon_load4_4reg<q>")]
5090 )
5091
5092 (define_insn "aarch64_simd_ld4r<mode>"
5093 [(set (match_operand:XI 0 "register_operand" "=w")
5094 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5095 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
5096 UNSPEC_LD4_DUP))]
5097 "TARGET_SIMD"
5098 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5099 [(set_attr "type" "neon_load4_all_lanes<q>")]
5100 )
5101
5102 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
5103 [(set (match_operand:XI 0 "register_operand" "=w")
5104 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5105 (match_operand:XI 2 "register_operand" "0")
5106 (match_operand:SI 3 "immediate_operand" "i")
5107 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5108 UNSPEC_LD4_LANE))]
5109 "TARGET_SIMD"
5110 {
5111 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
5112 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
5113 }
5114 [(set_attr "type" "neon_load4_one_lane")]
5115 )
5116
5117 (define_expand "vec_load_lanesxi<mode>"
5118 [(set (match_operand:XI 0 "register_operand" "=w")
5119 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
5120 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5121 UNSPEC_LD4))]
5122 "TARGET_SIMD"
5123 {
5124 if (BYTES_BIG_ENDIAN)
5125 {
5126 rtx tmp = gen_reg_rtx (XImode);
5127 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5128 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
5129 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
5130 }
5131 else
5132 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
5133 DONE;
5134 })
5135
5136 (define_insn "aarch64_simd_st4<mode>"
5137 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5138 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5139 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5140 UNSPEC_ST4))]
5141 "TARGET_SIMD"
5142 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5143 [(set_attr "type" "neon_store4_4reg<q>")]
5144 )
5145
5146 ;; RTL uses GCC vector extension indices, so flip only for assembly.
5147 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
5148 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5149 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5150 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5151 (match_operand:SI 2 "immediate_operand" "i")]
5152 UNSPEC_ST4_LANE))]
5153 "TARGET_SIMD"
5154 {
5155 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
5156 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
5157 }
5158 [(set_attr "type" "neon_store4_one_lane<q>")]
5159 )
5160
5161 (define_expand "vec_store_lanesxi<mode>"
5162 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
5163 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
5164 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5165 UNSPEC_ST4))]
5166 "TARGET_SIMD"
5167 {
5168 if (BYTES_BIG_ENDIAN)
5169 {
5170 rtx tmp = gen_reg_rtx (XImode);
5171 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
5172 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
5173 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
5174 }
5175 else
5176 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
5177 DONE;
5178 })
5179
5180 (define_insn_and_split "aarch64_rev_reglist<mode>"
5181 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
5182 (unspec:VSTRUCT
5183 [(match_operand:VSTRUCT 1 "register_operand" "w")
5184 (match_operand:V16QI 2 "register_operand" "w")]
5185 UNSPEC_REV_REGLIST))]
5186 "TARGET_SIMD"
5187 "#"
5188 "&& reload_completed"
5189 [(const_int 0)]
5190 {
5191 int i;
5192 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
5193 for (i = 0; i < nregs; i++)
5194 {
5195 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
5196 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
5197 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
5198 }
5199 DONE;
5200 }
5201 [(set_attr "type" "neon_tbl1_q")
5202 (set_attr "length" "<insn_count>")]
5203 )
5204
5205 ;; Reload patterns for AdvSIMD register list operands.
5206
5207 (define_expand "mov<mode>"
5208 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
5209 (match_operand:VSTRUCT 1 "general_operand" ""))]
5210 "TARGET_SIMD"
5211 {
5212 if (can_create_pseudo_p ())
5213 {
5214 if (GET_CODE (operands[0]) != REG)
5215 operands[1] = force_reg (<MODE>mode, operands[1]);
5216 }
5217 })
5218
5219
5220 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5221 [(match_operand:CI 0 "register_operand" "=w")
5222 (match_operand:DI 1 "register_operand" "r")
5223 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5224 "TARGET_SIMD"
5225 {
5226 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5227 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5228 DONE;
5229 })
5230
5231 (define_insn "aarch64_ld1_x3_<mode>"
5232 [(set (match_operand:CI 0 "register_operand" "=w")
5233 (unspec:CI
5234 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5235 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5236 "TARGET_SIMD"
5237 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5238 [(set_attr "type" "neon_load1_3reg<q>")]
5239 )
5240
5241 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5242 [(match_operand:DI 0 "register_operand" "")
5243 (match_operand:OI 1 "register_operand" "")
5244 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5245 "TARGET_SIMD"
5246 {
5247 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5248 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5249 DONE;
5250 })
5251
5252 (define_insn "aarch64_st1_x2_<mode>"
5253 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5254 (unspec:OI
5255 [(match_operand:OI 1 "register_operand" "w")
5256 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5257 "TARGET_SIMD"
5258 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5259 [(set_attr "type" "neon_store1_2reg<q>")]
5260 )
5261
5262 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5263 [(match_operand:DI 0 "register_operand" "")
5264 (match_operand:CI 1 "register_operand" "")
5265 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5266 "TARGET_SIMD"
5267 {
5268 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5269 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5270 DONE;
5271 })
5272
5273 (define_insn "aarch64_st1_x3_<mode>"
5274 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5275 (unspec:CI
5276 [(match_operand:CI 1 "register_operand" "w")
5277 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5278 "TARGET_SIMD"
5279 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5280 [(set_attr "type" "neon_store1_3reg<q>")]
5281 )
5282
5283 (define_insn "*aarch64_mov<mode>"
5284 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
5285 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
5286 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5287 && (register_operand (operands[0], <MODE>mode)
5288 || register_operand (operands[1], <MODE>mode))"
5289 "@
5290 #
5291 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
5292 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
5293 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
5294 neon_load<nregs>_<nregs>reg_q")
5295 (set_attr "length" "<insn_count>,4,4")]
5296 )
5297
5298 (define_insn "aarch64_be_ld1<mode>"
5299 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
5300 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
5301 "aarch64_simd_struct_operand" "Utv")]
5302 UNSPEC_LD1))]
5303 "TARGET_SIMD"
5304 "ld1\\t{%0<Vmtype>}, %1"
5305 [(set_attr "type" "neon_load1_1reg<q>")]
5306 )
5307
5308 (define_insn "aarch64_be_st1<mode>"
5309 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
5310 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
5311 UNSPEC_ST1))]
5312 "TARGET_SIMD"
5313 "st1\\t{%1<Vmtype>}, %0"
5314 [(set_attr "type" "neon_store1_1reg<q>")]
5315 )
5316
5317 (define_insn "*aarch64_be_movoi"
5318 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
5319 (match_operand:OI 1 "general_operand" " w,w,m"))]
5320 "TARGET_SIMD && BYTES_BIG_ENDIAN
5321 && (register_operand (operands[0], OImode)
5322 || register_operand (operands[1], OImode))"
5323 "@
5324 #
5325 stp\\t%q1, %R1, %0
5326 ldp\\t%q0, %R0, %1"
5327 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
5328 (set_attr "length" "8,4,4")]
5329 )
5330
5331 (define_insn "*aarch64_be_movci"
5332 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
5333 (match_operand:CI 1 "general_operand" " w,w,o"))]
5334 "TARGET_SIMD && BYTES_BIG_ENDIAN
5335 && (register_operand (operands[0], CImode)
5336 || register_operand (operands[1], CImode))"
5337 "#"
5338 [(set_attr "type" "multiple")
5339 (set_attr "length" "12,4,4")]
5340 )
5341
5342 (define_insn "*aarch64_be_movxi"
5343 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
5344 (match_operand:XI 1 "general_operand" " w,w,o"))]
5345 "TARGET_SIMD && BYTES_BIG_ENDIAN
5346 && (register_operand (operands[0], XImode)
5347 || register_operand (operands[1], XImode))"
5348 "#"
5349 [(set_attr "type" "multiple")
5350 (set_attr "length" "16,4,4")]
5351 )
5352
5353 (define_split
5354 [(set (match_operand:OI 0 "register_operand")
5355 (match_operand:OI 1 "register_operand"))]
5356 "TARGET_SIMD && reload_completed"
5357 [(const_int 0)]
5358 {
5359 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
5360 DONE;
5361 })
5362
5363 (define_split
5364 [(set (match_operand:CI 0 "nonimmediate_operand")
5365 (match_operand:CI 1 "general_operand"))]
5366 "TARGET_SIMD && reload_completed"
5367 [(const_int 0)]
5368 {
5369 if (register_operand (operands[0], CImode)
5370 && register_operand (operands[1], CImode))
5371 {
5372 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
5373 DONE;
5374 }
5375 else if (BYTES_BIG_ENDIAN)
5376 {
5377 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
5378 simplify_gen_subreg (OImode, operands[1], CImode, 0));
5379 emit_move_insn (gen_lowpart (V16QImode,
5380 simplify_gen_subreg (TImode, operands[0],
5381 CImode, 32)),
5382 gen_lowpart (V16QImode,
5383 simplify_gen_subreg (TImode, operands[1],
5384 CImode, 32)));
5385 DONE;
5386 }
5387 else
5388 FAIL;
5389 })
5390
5391 (define_split
5392 [(set (match_operand:XI 0 "nonimmediate_operand")
5393 (match_operand:XI 1 "general_operand"))]
5394 "TARGET_SIMD && reload_completed"
5395 [(const_int 0)]
5396 {
5397 if (register_operand (operands[0], XImode)
5398 && register_operand (operands[1], XImode))
5399 {
5400 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
5401 DONE;
5402 }
5403 else if (BYTES_BIG_ENDIAN)
5404 {
5405 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
5406 simplify_gen_subreg (OImode, operands[1], XImode, 0));
5407 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
5408 simplify_gen_subreg (OImode, operands[1], XImode, 32));
5409 DONE;
5410 }
5411 else
5412 FAIL;
5413 })
5414
5415 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
5416 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5417 (match_operand:DI 1 "register_operand" "w")
5418 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5419 "TARGET_SIMD"
5420 {
5421 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5422 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5423 * <VSTRUCT:nregs>);
5424
5425 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
5426 mem));
5427 DONE;
5428 })
5429
5430 (define_insn "aarch64_ld2<mode>_dreg"
5431 [(set (match_operand:OI 0 "register_operand" "=w")
5432 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5433 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5434 UNSPEC_LD2_DREG))]
5435 "TARGET_SIMD"
5436 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5437 [(set_attr "type" "neon_load2_2reg<q>")]
5438 )
5439
5440 (define_insn "aarch64_ld2<mode>_dreg"
5441 [(set (match_operand:OI 0 "register_operand" "=w")
5442 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5443 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5444 UNSPEC_LD2_DREG))]
5445 "TARGET_SIMD"
5446 "ld1\\t{%S0.1d - %T0.1d}, %1"
5447 [(set_attr "type" "neon_load1_2reg<q>")]
5448 )
5449
5450 (define_insn "aarch64_ld3<mode>_dreg"
5451 [(set (match_operand:CI 0 "register_operand" "=w")
5452 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5453 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5454 UNSPEC_LD3_DREG))]
5455 "TARGET_SIMD"
5456 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5457 [(set_attr "type" "neon_load3_3reg<q>")]
5458 )
5459
5460 (define_insn "aarch64_ld3<mode>_dreg"
5461 [(set (match_operand:CI 0 "register_operand" "=w")
5462 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5463 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5464 UNSPEC_LD3_DREG))]
5465 "TARGET_SIMD"
5466 "ld1\\t{%S0.1d - %U0.1d}, %1"
5467 [(set_attr "type" "neon_load1_3reg<q>")]
5468 )
5469
5470 (define_insn "aarch64_ld4<mode>_dreg"
5471 [(set (match_operand:XI 0 "register_operand" "=w")
5472 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5473 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5474 UNSPEC_LD4_DREG))]
5475 "TARGET_SIMD"
5476 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5477 [(set_attr "type" "neon_load4_4reg<q>")]
5478 )
5479
5480 (define_insn "aarch64_ld4<mode>_dreg"
5481 [(set (match_operand:XI 0 "register_operand" "=w")
5482 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5483 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5484 UNSPEC_LD4_DREG))]
5485 "TARGET_SIMD"
5486 "ld1\\t{%S0.1d - %V0.1d}, %1"
5487 [(set_attr "type" "neon_load1_4reg<q>")]
5488 )
5489
5490 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5491 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5492 (match_operand:DI 1 "register_operand" "r")
5493 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5494 "TARGET_SIMD"
5495 {
5496 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5497 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5498
5499 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5500 DONE;
5501 })
5502
5503 (define_expand "aarch64_ld1<VALL_F16:mode>"
5504 [(match_operand:VALL_F16 0 "register_operand")
5505 (match_operand:DI 1 "register_operand")]
5506 "TARGET_SIMD"
5507 {
5508 machine_mode mode = <VALL_F16:MODE>mode;
5509 rtx mem = gen_rtx_MEM (mode, operands[1]);
5510
5511 if (BYTES_BIG_ENDIAN)
5512 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5513 else
5514 emit_move_insn (operands[0], mem);
5515 DONE;
5516 })
5517
5518 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5519 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5520 (match_operand:DI 1 "register_operand" "r")
5521 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5522 "TARGET_SIMD"
5523 {
5524 machine_mode mode = <VSTRUCT:MODE>mode;
5525 rtx mem = gen_rtx_MEM (mode, operands[1]);
5526
5527 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5528 DONE;
5529 })
5530
5531 (define_expand "aarch64_ld1x2<VQ:mode>"
5532 [(match_operand:OI 0 "register_operand" "=w")
5533 (match_operand:DI 1 "register_operand" "r")
5534 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5535 "TARGET_SIMD"
5536 {
5537 machine_mode mode = OImode;
5538 rtx mem = gen_rtx_MEM (mode, operands[1]);
5539
5540 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5541 DONE;
5542 })
5543
5544 (define_expand "aarch64_ld1x2<VDC:mode>"
5545 [(match_operand:OI 0 "register_operand" "=w")
5546 (match_operand:DI 1 "register_operand" "r")
5547 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5548 "TARGET_SIMD"
5549 {
5550 machine_mode mode = OImode;
5551 rtx mem = gen_rtx_MEM (mode, operands[1]);
5552
5553 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5554 DONE;
5555 })
5556
5557
5558 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5559 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5560 (match_operand:DI 1 "register_operand" "w")
5561 (match_operand:VSTRUCT 2 "register_operand" "0")
5562 (match_operand:SI 3 "immediate_operand" "i")
5563 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5564 "TARGET_SIMD"
5565 {
5566 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5567 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5568 * <VSTRUCT:nregs>);
5569
5570 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5571 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5572 operands[0], mem, operands[2], operands[3]));
5573 DONE;
5574 })
5575
5576 ;; Expanders for builtins to extract vector registers from large
5577 ;; opaque integer modes.
5578
5579 ;; D-register list.
5580
5581 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5582 [(match_operand:VDC 0 "register_operand" "=w")
5583 (match_operand:VSTRUCT 1 "register_operand" "w")
5584 (match_operand:SI 2 "immediate_operand" "i")]
5585 "TARGET_SIMD"
5586 {
5587 int part = INTVAL (operands[2]);
5588 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5589 int offset = part * 16;
5590
5591 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5592 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5593 DONE;
5594 })
5595
5596 ;; Q-register list.
5597
5598 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5599 [(match_operand:VQ 0 "register_operand" "=w")
5600 (match_operand:VSTRUCT 1 "register_operand" "w")
5601 (match_operand:SI 2 "immediate_operand" "i")]
5602 "TARGET_SIMD"
5603 {
5604 int part = INTVAL (operands[2]);
5605 int offset = part * 16;
5606
5607 emit_move_insn (operands[0],
5608 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5609 DONE;
5610 })
5611
5612 ;; Permuted-store expanders for neon intrinsics.
5613
5614 ;; Permute instructions
5615
5616 ;; vec_perm support
5617
5618 (define_expand "vec_perm<mode>"
5619 [(match_operand:VB 0 "register_operand")
5620 (match_operand:VB 1 "register_operand")
5621 (match_operand:VB 2 "register_operand")
5622 (match_operand:VB 3 "register_operand")]
5623 "TARGET_SIMD"
5624 {
5625 aarch64_expand_vec_perm (operands[0], operands[1],
5626 operands[2], operands[3], <nunits>);
5627 DONE;
5628 })
5629
5630 (define_insn "aarch64_tbl1<mode>"
5631 [(set (match_operand:VB 0 "register_operand" "=w")
5632 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5633 (match_operand:VB 2 "register_operand" "w")]
5634 UNSPEC_TBL))]
5635 "TARGET_SIMD"
5636 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5637 [(set_attr "type" "neon_tbl1<q>")]
5638 )
5639
5640 ;; Two source registers.
5641
5642 (define_insn "aarch64_tbl2v16qi"
5643 [(set (match_operand:V16QI 0 "register_operand" "=w")
5644 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5645 (match_operand:V16QI 2 "register_operand" "w")]
5646 UNSPEC_TBL))]
5647 "TARGET_SIMD"
5648 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5649 [(set_attr "type" "neon_tbl2_q")]
5650 )
5651
5652 (define_insn "aarch64_tbl3<mode>"
5653 [(set (match_operand:VB 0 "register_operand" "=w")
5654 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5655 (match_operand:VB 2 "register_operand" "w")]
5656 UNSPEC_TBL))]
5657 "TARGET_SIMD"
5658 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5659 [(set_attr "type" "neon_tbl3")]
5660 )
5661
5662 (define_insn "aarch64_tbx4<mode>"
5663 [(set (match_operand:VB 0 "register_operand" "=w")
5664 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5665 (match_operand:OI 2 "register_operand" "w")
5666 (match_operand:VB 3 "register_operand" "w")]
5667 UNSPEC_TBX))]
5668 "TARGET_SIMD"
5669 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5670 [(set_attr "type" "neon_tbl4")]
5671 )
5672
5673 ;; Three source registers.
5674
5675 (define_insn "aarch64_qtbl3<mode>"
5676 [(set (match_operand:VB 0 "register_operand" "=w")
5677 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5678 (match_operand:VB 2 "register_operand" "w")]
5679 UNSPEC_TBL))]
5680 "TARGET_SIMD"
5681 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5682 [(set_attr "type" "neon_tbl3")]
5683 )
5684
5685 (define_insn "aarch64_qtbx3<mode>"
5686 [(set (match_operand:VB 0 "register_operand" "=w")
5687 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5688 (match_operand:CI 2 "register_operand" "w")
5689 (match_operand:VB 3 "register_operand" "w")]
5690 UNSPEC_TBX))]
5691 "TARGET_SIMD"
5692 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5693 [(set_attr "type" "neon_tbl3")]
5694 )
5695
5696 ;; Four source registers.
5697
5698 (define_insn "aarch64_qtbl4<mode>"
5699 [(set (match_operand:VB 0 "register_operand" "=w")
5700 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5701 (match_operand:VB 2 "register_operand" "w")]
5702 UNSPEC_TBL))]
5703 "TARGET_SIMD"
5704 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5705 [(set_attr "type" "neon_tbl4")]
5706 )
5707
5708 (define_insn "aarch64_qtbx4<mode>"
5709 [(set (match_operand:VB 0 "register_operand" "=w")
5710 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5711 (match_operand:XI 2 "register_operand" "w")
5712 (match_operand:VB 3 "register_operand" "w")]
5713 UNSPEC_TBX))]
5714 "TARGET_SIMD"
5715 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5716 [(set_attr "type" "neon_tbl4")]
5717 )
5718
5719 (define_insn_and_split "aarch64_combinev16qi"
5720 [(set (match_operand:OI 0 "register_operand" "=w")
5721 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5722 (match_operand:V16QI 2 "register_operand" "w")]
5723 UNSPEC_CONCAT))]
5724 "TARGET_SIMD"
5725 "#"
5726 "&& reload_completed"
5727 [(const_int 0)]
5728 {
5729 aarch64_split_combinev16qi (operands);
5730 DONE;
5731 }
5732 [(set_attr "type" "multiple")]
5733 )
5734
5735 ;; This instruction's pattern is generated directly by
5736 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5737 ;; need corresponding changes there.
5738 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5739 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5740 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5741 (match_operand:VALL_F16 2 "register_operand" "w")]
5742 PERMUTE))]
5743 "TARGET_SIMD"
5744 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5745 [(set_attr "type" "neon_permute<q>")]
5746 )
5747
5748 ;; This instruction's pattern is generated directly by
5749 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5750 ;; need corresponding changes there. Note that the immediate (third)
5751 ;; operand is a lane index not a byte index.
5752 (define_insn "aarch64_ext<mode>"
5753 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5754 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5755 (match_operand:VALL_F16 2 "register_operand" "w")
5756 (match_operand:SI 3 "immediate_operand" "i")]
5757 UNSPEC_EXT))]
5758 "TARGET_SIMD"
5759 {
5760 operands[3] = GEN_INT (INTVAL (operands[3])
5761 * GET_MODE_UNIT_SIZE (<MODE>mode));
5762 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5763 }
5764 [(set_attr "type" "neon_ext<q>")]
5765 )
5766
5767 ;; This instruction's pattern is generated directly by
5768 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5769 ;; need corresponding changes there.
5770 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5771 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5772 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5773 REVERSE))]
5774 "TARGET_SIMD"
5775 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5776 [(set_attr "type" "neon_rev<q>")]
5777 )
5778
5779 (define_insn "aarch64_st2<mode>_dreg"
5780 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5781 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5782 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5783 UNSPEC_ST2))]
5784 "TARGET_SIMD"
5785 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5786 [(set_attr "type" "neon_store2_2reg")]
5787 )
5788
5789 (define_insn "aarch64_st2<mode>_dreg"
5790 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5791 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5792 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5793 UNSPEC_ST2))]
5794 "TARGET_SIMD"
5795 "st1\\t{%S1.1d - %T1.1d}, %0"
5796 [(set_attr "type" "neon_store1_2reg")]
5797 )
5798
5799 (define_insn "aarch64_st3<mode>_dreg"
5800 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5801 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5802 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5803 UNSPEC_ST3))]
5804 "TARGET_SIMD"
5805 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5806 [(set_attr "type" "neon_store3_3reg")]
5807 )
5808
5809 (define_insn "aarch64_st3<mode>_dreg"
5810 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5811 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5812 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5813 UNSPEC_ST3))]
5814 "TARGET_SIMD"
5815 "st1\\t{%S1.1d - %U1.1d}, %0"
5816 [(set_attr "type" "neon_store1_3reg")]
5817 )
5818
5819 (define_insn "aarch64_st4<mode>_dreg"
5820 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5821 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5822 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5823 UNSPEC_ST4))]
5824 "TARGET_SIMD"
5825 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5826 [(set_attr "type" "neon_store4_4reg")]
5827 )
5828
5829 (define_insn "aarch64_st4<mode>_dreg"
5830 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5831 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5832 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5833 UNSPEC_ST4))]
5834 "TARGET_SIMD"
5835 "st1\\t{%S1.1d - %V1.1d}, %0"
5836 [(set_attr "type" "neon_store1_4reg")]
5837 )
5838
5839 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5840 [(match_operand:DI 0 "register_operand" "r")
5841 (match_operand:VSTRUCT 1 "register_operand" "w")
5842 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5843 "TARGET_SIMD"
5844 {
5845 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5846 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5847
5848 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5849 DONE;
5850 })
5851
5852 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5853 [(match_operand:DI 0 "register_operand" "r")
5854 (match_operand:VSTRUCT 1 "register_operand" "w")
5855 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5856 "TARGET_SIMD"
5857 {
5858 machine_mode mode = <VSTRUCT:MODE>mode;
5859 rtx mem = gen_rtx_MEM (mode, operands[0]);
5860
5861 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5862 DONE;
5863 })
5864
5865 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5866 [(match_operand:DI 0 "register_operand" "r")
5867 (match_operand:VSTRUCT 1 "register_operand" "w")
5868 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5869 (match_operand:SI 2 "immediate_operand")]
5870 "TARGET_SIMD"
5871 {
5872 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5873 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5874 * <VSTRUCT:nregs>);
5875
5876 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5877 mem, operands[1], operands[2]));
5878 DONE;
5879 })
5880
5881 (define_expand "aarch64_st1<VALL_F16:mode>"
5882 [(match_operand:DI 0 "register_operand")
5883 (match_operand:VALL_F16 1 "register_operand")]
5884 "TARGET_SIMD"
5885 {
5886 machine_mode mode = <VALL_F16:MODE>mode;
5887 rtx mem = gen_rtx_MEM (mode, operands[0]);
5888
5889 if (BYTES_BIG_ENDIAN)
5890 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5891 else
5892 emit_move_insn (mem, operands[1]);
5893 DONE;
5894 })
5895
5896 ;; Expander for builtins to insert vector registers into large
5897 ;; opaque integer modes.
5898
5899 ;; Q-register list. We don't need a D-reg inserter as we zero
5900 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5901
5902 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5903 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5904 (match_operand:VSTRUCT 1 "register_operand" "0")
5905 (match_operand:VQ 2 "register_operand" "w")
5906 (match_operand:SI 3 "immediate_operand" "i")]
5907 "TARGET_SIMD"
5908 {
5909 int part = INTVAL (operands[3]);
5910 int offset = part * 16;
5911
5912 emit_move_insn (operands[0], operands[1]);
5913 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5914 operands[2]);
5915 DONE;
5916 })
5917
5918 ;; Standard pattern name vec_init<mode><Vel>.
5919
5920 (define_expand "vec_init<mode><Vel>"
5921 [(match_operand:VALL_F16 0 "register_operand" "")
5922 (match_operand 1 "" "")]
5923 "TARGET_SIMD"
5924 {
5925 aarch64_expand_vector_init (operands[0], operands[1]);
5926 DONE;
5927 })
5928
5929 (define_insn "*aarch64_simd_ld1r<mode>"
5930 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5931 (vec_duplicate:VALL_F16
5932 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5933 "TARGET_SIMD"
5934 "ld1r\\t{%0.<Vtype>}, %1"
5935 [(set_attr "type" "neon_load1_all_lanes")]
5936 )
5937
5938 (define_insn "aarch64_simd_ld1<mode>_x2"
5939 [(set (match_operand:OI 0 "register_operand" "=w")
5940 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5941 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5942 UNSPEC_LD1))]
5943 "TARGET_SIMD"
5944 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5945 [(set_attr "type" "neon_load1_2reg<q>")]
5946 )
5947
5948 (define_insn "aarch64_simd_ld1<mode>_x2"
5949 [(set (match_operand:OI 0 "register_operand" "=w")
5950 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5951 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5952 UNSPEC_LD1))]
5953 "TARGET_SIMD"
5954 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5955 [(set_attr "type" "neon_load1_2reg<q>")]
5956 )
5957
5958
5959 (define_insn "@aarch64_frecpe<mode>"
5960 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5961 (unspec:VHSDF_HSDF
5962 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
5963 UNSPEC_FRECPE))]
5964 "TARGET_SIMD"
5965 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5966 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5967 )
5968
5969 (define_insn "aarch64_frecpx<mode>"
5970 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5971 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5972 UNSPEC_FRECPX))]
5973 "TARGET_SIMD"
5974 "frecpx\t%<s>0, %<s>1"
5975 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
5976 )
5977
5978 (define_insn "@aarch64_frecps<mode>"
5979 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5980 (unspec:VHSDF_HSDF
5981 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5982 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5983 UNSPEC_FRECPS))]
5984 "TARGET_SIMD"
5985 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5986 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5987 )
5988
5989 (define_insn "aarch64_urecpe<mode>"
5990 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5991 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5992 UNSPEC_URECPE))]
5993 "TARGET_SIMD"
5994 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5995 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5996
5997 ;; Standard pattern name vec_extract<mode><Vel>.
5998
5999 (define_expand "vec_extract<mode><Vel>"
6000 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
6001 (match_operand:VALL_F16 1 "register_operand" "")
6002 (match_operand:SI 2 "immediate_operand" "")]
6003 "TARGET_SIMD"
6004 {
6005 emit_insn
6006 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
6007 DONE;
6008 })
6009
6010 ;; aes
6011
6012 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
6013 [(set (match_operand:V16QI 0 "register_operand" "=w")
6014 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
6015 (match_operand:V16QI 2 "register_operand" "w")]
6016 CRYPTO_AES))]
6017 "TARGET_SIMD && TARGET_AES"
6018 "aes<aes_op>\\t%0.16b, %2.16b"
6019 [(set_attr "type" "crypto_aese")]
6020 )
6021
6022 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
6023 [(set (match_operand:V16QI 0 "register_operand" "=w")
6024 (unspec:V16QI [(xor:V16QI
6025 (match_operand:V16QI 1 "register_operand" "%0")
6026 (match_operand:V16QI 2 "register_operand" "w"))
6027 (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
6028 CRYPTO_AES))]
6029 "TARGET_SIMD && TARGET_AES"
6030 "aes<aes_op>\\t%0.16b, %2.16b"
6031 [(set_attr "type" "crypto_aese")]
6032 )
6033
6034 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
6035 [(set (match_operand:V16QI 0 "register_operand" "=w")
6036 (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
6037 (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
6038 (match_operand:V16QI 2 "register_operand" "w"))]
6039 CRYPTO_AES))]
6040 "TARGET_SIMD && TARGET_AES"
6041 "aes<aes_op>\\t%0.16b, %2.16b"
6042 [(set_attr "type" "crypto_aese")]
6043 )
6044
6045 ;; When AES/AESMC fusion is enabled we want the register allocation to
6046 ;; look like:
6047 ;; AESE Vn, _
6048 ;; AESMC Vn, Vn
6049 ;; So prefer to tie operand 1 to operand 0 when fusing.
6050
6051 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
6052 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
6053 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
6054 CRYPTO_AESMC))]
6055 "TARGET_SIMD && TARGET_AES"
6056 "aes<aesmc_op>\\t%0.16b, %1.16b"
6057 [(set_attr "type" "crypto_aesmc")
6058 (set_attr_alternative "enabled"
6059 [(if_then_else (match_test
6060 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
6061 (const_string "yes" )
6062 (const_string "no"))
6063 (const_string "yes")])]
6064 )
6065
6066 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
6067 ;; and enforce the register dependency without scheduling or register
6068 ;; allocation messing up the order or introducing moves inbetween.
6069 ;; Mash the two together during combine.
6070
6071 (define_insn "*aarch64_crypto_aese_fused"
6072 [(set (match_operand:V16QI 0 "register_operand" "=&w")
6073 (unspec:V16QI
6074 [(unspec:V16QI
6075 [(match_operand:V16QI 1 "register_operand" "0")
6076 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
6077 ] UNSPEC_AESMC))]
6078 "TARGET_SIMD && TARGET_AES
6079 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6080 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
6081 [(set_attr "type" "crypto_aese")
6082 (set_attr "length" "8")]
6083 )
6084
6085 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6086 ;; and enforce the register dependency without scheduling or register
6087 ;; allocation messing up the order or introducing moves inbetween.
6088 ;; Mash the two together during combine.
6089
6090 (define_insn "*aarch64_crypto_aesd_fused"
6091 [(set (match_operand:V16QI 0 "register_operand" "=&w")
6092 (unspec:V16QI
6093 [(unspec:V16QI
6094 [(match_operand:V16QI 1 "register_operand" "0")
6095 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6096 ] UNSPEC_AESIMC))]
6097 "TARGET_SIMD && TARGET_AES
6098 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6099 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6100 [(set_attr "type" "crypto_aese")
6101 (set_attr "length" "8")]
6102 )
6103
6104 ;; sha1
6105
6106 (define_insn "aarch64_crypto_sha1hsi"
6107 [(set (match_operand:SI 0 "register_operand" "=w")
6108 (unspec:SI [(match_operand:SI 1
6109 "register_operand" "w")]
6110 UNSPEC_SHA1H))]
6111 "TARGET_SIMD && TARGET_SHA2"
6112 "sha1h\\t%s0, %s1"
6113 [(set_attr "type" "crypto_sha1_fast")]
6114 )
6115
6116 (define_insn "aarch64_crypto_sha1hv4si"
6117 [(set (match_operand:SI 0 "register_operand" "=w")
6118 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6119 (parallel [(const_int 0)]))]
6120 UNSPEC_SHA1H))]
6121 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
6122 "sha1h\\t%s0, %s1"
6123 [(set_attr "type" "crypto_sha1_fast")]
6124 )
6125
6126 (define_insn "aarch64_be_crypto_sha1hv4si"
6127 [(set (match_operand:SI 0 "register_operand" "=w")
6128 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
6129 (parallel [(const_int 3)]))]
6130 UNSPEC_SHA1H))]
6131 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
6132 "sha1h\\t%s0, %s1"
6133 [(set_attr "type" "crypto_sha1_fast")]
6134 )
6135
6136 (define_insn "aarch64_crypto_sha1su1v4si"
6137 [(set (match_operand:V4SI 0 "register_operand" "=w")
6138 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6139 (match_operand:V4SI 2 "register_operand" "w")]
6140 UNSPEC_SHA1SU1))]
6141 "TARGET_SIMD && TARGET_SHA2"
6142 "sha1su1\\t%0.4s, %2.4s"
6143 [(set_attr "type" "crypto_sha1_fast")]
6144 )
6145
6146 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
6147 [(set (match_operand:V4SI 0 "register_operand" "=w")
6148 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6149 (match_operand:SI 2 "register_operand" "w")
6150 (match_operand:V4SI 3 "register_operand" "w")]
6151 CRYPTO_SHA1))]
6152 "TARGET_SIMD && TARGET_SHA2"
6153 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
6154 [(set_attr "type" "crypto_sha1_slow")]
6155 )
6156
6157 (define_insn "aarch64_crypto_sha1su0v4si"
6158 [(set (match_operand:V4SI 0 "register_operand" "=w")
6159 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6160 (match_operand:V4SI 2 "register_operand" "w")
6161 (match_operand:V4SI 3 "register_operand" "w")]
6162 UNSPEC_SHA1SU0))]
6163 "TARGET_SIMD && TARGET_SHA2"
6164 "sha1su0\\t%0.4s, %2.4s, %3.4s"
6165 [(set_attr "type" "crypto_sha1_xor")]
6166 )
6167
6168 ;; sha256
6169
6170 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
6171 [(set (match_operand:V4SI 0 "register_operand" "=w")
6172 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6173 (match_operand:V4SI 2 "register_operand" "w")
6174 (match_operand:V4SI 3 "register_operand" "w")]
6175 CRYPTO_SHA256))]
6176 "TARGET_SIMD && TARGET_SHA2"
6177 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
6178 [(set_attr "type" "crypto_sha256_slow")]
6179 )
6180
6181 (define_insn "aarch64_crypto_sha256su0v4si"
6182 [(set (match_operand:V4SI 0 "register_operand" "=w")
6183 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6184 (match_operand:V4SI 2 "register_operand" "w")]
6185 UNSPEC_SHA256SU0))]
6186 "TARGET_SIMD && TARGET_SHA2"
6187 "sha256su0\\t%0.4s, %2.4s"
6188 [(set_attr "type" "crypto_sha256_fast")]
6189 )
6190
6191 (define_insn "aarch64_crypto_sha256su1v4si"
6192 [(set (match_operand:V4SI 0 "register_operand" "=w")
6193 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6194 (match_operand:V4SI 2 "register_operand" "w")
6195 (match_operand:V4SI 3 "register_operand" "w")]
6196 UNSPEC_SHA256SU1))]
6197 "TARGET_SIMD && TARGET_SHA2"
6198 "sha256su1\\t%0.4s, %2.4s, %3.4s"
6199 [(set_attr "type" "crypto_sha256_slow")]
6200 )
6201
6202 ;; sha512
6203
6204 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6205 [(set (match_operand:V2DI 0 "register_operand" "=w")
6206 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6207 (match_operand:V2DI 2 "register_operand" "w")
6208 (match_operand:V2DI 3 "register_operand" "w")]
6209 CRYPTO_SHA512))]
6210 "TARGET_SIMD && TARGET_SHA3"
6211 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6212 [(set_attr "type" "crypto_sha512")]
6213 )
6214
6215 (define_insn "aarch64_crypto_sha512su0qv2di"
6216 [(set (match_operand:V2DI 0 "register_operand" "=w")
6217 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6218 (match_operand:V2DI 2 "register_operand" "w")]
6219 UNSPEC_SHA512SU0))]
6220 "TARGET_SIMD && TARGET_SHA3"
6221 "sha512su0\\t%0.2d, %2.2d"
6222 [(set_attr "type" "crypto_sha512")]
6223 )
6224
6225 (define_insn "aarch64_crypto_sha512su1qv2di"
6226 [(set (match_operand:V2DI 0 "register_operand" "=w")
6227 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6228 (match_operand:V2DI 2 "register_operand" "w")
6229 (match_operand:V2DI 3 "register_operand" "w")]
6230 UNSPEC_SHA512SU1))]
6231 "TARGET_SIMD && TARGET_SHA3"
6232 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6233 [(set_attr "type" "crypto_sha512")]
6234 )
6235
6236 ;; sha3
6237
6238 (define_insn "eor3q<mode>4"
6239 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6240 (xor:VQ_I
6241 (xor:VQ_I
6242 (match_operand:VQ_I 2 "register_operand" "w")
6243 (match_operand:VQ_I 3 "register_operand" "w"))
6244 (match_operand:VQ_I 1 "register_operand" "w")))]
6245 "TARGET_SIMD && TARGET_SHA3"
6246 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6247 [(set_attr "type" "crypto_sha3")]
6248 )
6249
6250 (define_insn "aarch64_rax1qv2di"
6251 [(set (match_operand:V2DI 0 "register_operand" "=w")
6252 (xor:V2DI
6253 (rotate:V2DI
6254 (match_operand:V2DI 2 "register_operand" "w")
6255 (const_int 1))
6256 (match_operand:V2DI 1 "register_operand" "w")))]
6257 "TARGET_SIMD && TARGET_SHA3"
6258 "rax1\\t%0.2d, %1.2d, %2.2d"
6259 [(set_attr "type" "crypto_sha3")]
6260 )
6261
6262 (define_insn "aarch64_xarqv2di"
6263 [(set (match_operand:V2DI 0 "register_operand" "=w")
6264 (rotatert:V2DI
6265 (xor:V2DI
6266 (match_operand:V2DI 1 "register_operand" "%w")
6267 (match_operand:V2DI 2 "register_operand" "w"))
6268 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6269 "TARGET_SIMD && TARGET_SHA3"
6270 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6271 [(set_attr "type" "crypto_sha3")]
6272 )
6273
6274 (define_insn "bcaxq<mode>4"
6275 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6276 (xor:VQ_I
6277 (and:VQ_I
6278 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6279 (match_operand:VQ_I 2 "register_operand" "w"))
6280 (match_operand:VQ_I 1 "register_operand" "w")))]
6281 "TARGET_SIMD && TARGET_SHA3"
6282 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6283 [(set_attr "type" "crypto_sha3")]
6284 )
6285
6286 ;; SM3
6287
6288 (define_insn "aarch64_sm3ss1qv4si"
6289 [(set (match_operand:V4SI 0 "register_operand" "=w")
6290 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6291 (match_operand:V4SI 2 "register_operand" "w")
6292 (match_operand:V4SI 3 "register_operand" "w")]
6293 UNSPEC_SM3SS1))]
6294 "TARGET_SIMD && TARGET_SM4"
6295 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6296 [(set_attr "type" "crypto_sm3")]
6297 )
6298
6299
6300 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6301 [(set (match_operand:V4SI 0 "register_operand" "=w")
6302 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6303 (match_operand:V4SI 2 "register_operand" "w")
6304 (match_operand:V4SI 3 "register_operand" "w")
6305 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6306 CRYPTO_SM3TT))]
6307 "TARGET_SIMD && TARGET_SM4"
6308 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6309 [(set_attr "type" "crypto_sm3")]
6310 )
6311
6312 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6313 [(set (match_operand:V4SI 0 "register_operand" "=w")
6314 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6315 (match_operand:V4SI 2 "register_operand" "w")
6316 (match_operand:V4SI 3 "register_operand" "w")]
6317 CRYPTO_SM3PART))]
6318 "TARGET_SIMD && TARGET_SM4"
6319 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6320 [(set_attr "type" "crypto_sm3")]
6321 )
6322
6323 ;; SM4
6324
6325 (define_insn "aarch64_sm4eqv4si"
6326 [(set (match_operand:V4SI 0 "register_operand" "=w")
6327 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6328 (match_operand:V4SI 2 "register_operand" "w")]
6329 UNSPEC_SM4E))]
6330 "TARGET_SIMD && TARGET_SM4"
6331 "sm4e\\t%0.4s, %2.4s"
6332 [(set_attr "type" "crypto_sm4")]
6333 )
6334
6335 (define_insn "aarch64_sm4ekeyqv4si"
6336 [(set (match_operand:V4SI 0 "register_operand" "=w")
6337 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6338 (match_operand:V4SI 2 "register_operand" "w")]
6339 UNSPEC_SM4EKEY))]
6340 "TARGET_SIMD && TARGET_SM4"
6341 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6342 [(set_attr "type" "crypto_sm4")]
6343 )
6344
6345 ;; fp16fml
6346
6347 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6348 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6349 (unspec:VDQSF
6350 [(match_operand:VDQSF 1 "register_operand" "0")
6351 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6352 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6353 VFMLA16_LOW))]
6354 "TARGET_F16FML"
6355 {
6356 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6357 <nunits> * 2, false);
6358 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6359 <nunits> * 2, false);
6360
6361 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6362 operands[1],
6363 operands[2],
6364 operands[3],
6365 p1, p2));
6366 DONE;
6367
6368 })
6369
6370 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6371 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6372 (unspec:VDQSF
6373 [(match_operand:VDQSF 1 "register_operand" "0")
6374 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6375 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6376 VFMLA16_HIGH))]
6377 "TARGET_F16FML"
6378 {
6379 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6380 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6381
6382 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6383 operands[1],
6384 operands[2],
6385 operands[3],
6386 p1, p2));
6387 DONE;
6388 })
6389
6390 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6391 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6392 (fma:VDQSF
6393 (float_extend:VDQSF
6394 (vec_select:<VFMLA_SEL_W>
6395 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6396 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6397 (float_extend:VDQSF
6398 (vec_select:<VFMLA_SEL_W>
6399 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6400 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6401 (match_operand:VDQSF 1 "register_operand" "0")))]
6402 "TARGET_F16FML"
6403 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6404 [(set_attr "type" "neon_fp_mul_s")]
6405 )
6406
6407 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6408 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6409 (fma:VDQSF
6410 (float_extend:VDQSF
6411 (neg:<VFMLA_SEL_W>
6412 (vec_select:<VFMLA_SEL_W>
6413 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6414 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6415 (float_extend:VDQSF
6416 (vec_select:<VFMLA_SEL_W>
6417 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6418 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6419 (match_operand:VDQSF 1 "register_operand" "0")))]
6420 "TARGET_F16FML"
6421 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6422 [(set_attr "type" "neon_fp_mul_s")]
6423 )
6424
6425 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6426 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6427 (fma:VDQSF
6428 (float_extend:VDQSF
6429 (vec_select:<VFMLA_SEL_W>
6430 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6431 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6432 (float_extend:VDQSF
6433 (vec_select:<VFMLA_SEL_W>
6434 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6435 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6436 (match_operand:VDQSF 1 "register_operand" "0")))]
6437 "TARGET_F16FML"
6438 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6439 [(set_attr "type" "neon_fp_mul_s")]
6440 )
6441
6442 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6443 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6444 (fma:VDQSF
6445 (float_extend:VDQSF
6446 (neg:<VFMLA_SEL_W>
6447 (vec_select:<VFMLA_SEL_W>
6448 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6449 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6450 (float_extend:VDQSF
6451 (vec_select:<VFMLA_SEL_W>
6452 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6453 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6454 (match_operand:VDQSF 1 "register_operand" "0")))]
6455 "TARGET_F16FML"
6456 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6457 [(set_attr "type" "neon_fp_mul_s")]
6458 )
6459
6460 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6461 [(set (match_operand:V2SF 0 "register_operand" "")
6462 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6463 (match_operand:V4HF 2 "register_operand" "")
6464 (match_operand:V4HF 3 "register_operand" "")
6465 (match_operand:SI 4 "aarch64_imm2" "")]
6466 VFMLA16_LOW))]
6467 "TARGET_F16FML"
6468 {
6469 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6470 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6471
6472 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6473 operands[1],
6474 operands[2],
6475 operands[3],
6476 p1, lane));
6477 DONE;
6478 }
6479 )
6480
6481 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6482 [(set (match_operand:V2SF 0 "register_operand" "")
6483 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6484 (match_operand:V4HF 2 "register_operand" "")
6485 (match_operand:V4HF 3 "register_operand" "")
6486 (match_operand:SI 4 "aarch64_imm2" "")]
6487 VFMLA16_HIGH))]
6488 "TARGET_F16FML"
6489 {
6490 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6491 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6492
6493 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6494 operands[1],
6495 operands[2],
6496 operands[3],
6497 p1, lane));
6498 DONE;
6499 })
6500
6501 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6502 [(set (match_operand:V2SF 0 "register_operand" "=w")
6503 (fma:V2SF
6504 (float_extend:V2SF
6505 (vec_select:V2HF
6506 (match_operand:V4HF 2 "register_operand" "w")
6507 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6508 (float_extend:V2SF
6509 (vec_duplicate:V2HF
6510 (vec_select:HF
6511 (match_operand:V4HF 3 "register_operand" "x")
6512 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6513 (match_operand:V2SF 1 "register_operand" "0")))]
6514 "TARGET_F16FML"
6515 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6516 [(set_attr "type" "neon_fp_mul_s")]
6517 )
6518
6519 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6520 [(set (match_operand:V2SF 0 "register_operand" "=w")
6521 (fma:V2SF
6522 (float_extend:V2SF
6523 (neg:V2HF
6524 (vec_select:V2HF
6525 (match_operand:V4HF 2 "register_operand" "w")
6526 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6527 (float_extend:V2SF
6528 (vec_duplicate:V2HF
6529 (vec_select:HF
6530 (match_operand:V4HF 3 "register_operand" "x")
6531 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6532 (match_operand:V2SF 1 "register_operand" "0")))]
6533 "TARGET_F16FML"
6534 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6535 [(set_attr "type" "neon_fp_mul_s")]
6536 )
6537
6538 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6539 [(set (match_operand:V2SF 0 "register_operand" "=w")
6540 (fma:V2SF
6541 (float_extend:V2SF
6542 (vec_select:V2HF
6543 (match_operand:V4HF 2 "register_operand" "w")
6544 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6545 (float_extend:V2SF
6546 (vec_duplicate:V2HF
6547 (vec_select:HF
6548 (match_operand:V4HF 3 "register_operand" "x")
6549 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6550 (match_operand:V2SF 1 "register_operand" "0")))]
6551 "TARGET_F16FML"
6552 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6553 [(set_attr "type" "neon_fp_mul_s")]
6554 )
6555
6556 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6557 [(set (match_operand:V2SF 0 "register_operand" "=w")
6558 (fma:V2SF
6559 (float_extend:V2SF
6560 (neg:V2HF
6561 (vec_select:V2HF
6562 (match_operand:V4HF 2 "register_operand" "w")
6563 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6564 (float_extend:V2SF
6565 (vec_duplicate:V2HF
6566 (vec_select:HF
6567 (match_operand:V4HF 3 "register_operand" "x")
6568 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6569 (match_operand:V2SF 1 "register_operand" "0")))]
6570 "TARGET_F16FML"
6571 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6572 [(set_attr "type" "neon_fp_mul_s")]
6573 )
6574
6575 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6576 [(set (match_operand:V4SF 0 "register_operand" "")
6577 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6578 (match_operand:V8HF 2 "register_operand" "")
6579 (match_operand:V8HF 3 "register_operand" "")
6580 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6581 VFMLA16_LOW))]
6582 "TARGET_F16FML"
6583 {
6584 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6585 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6586
6587 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6588 operands[1],
6589 operands[2],
6590 operands[3],
6591 p1, lane));
6592 DONE;
6593 })
6594
6595 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6596 [(set (match_operand:V4SF 0 "register_operand" "")
6597 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6598 (match_operand:V8HF 2 "register_operand" "")
6599 (match_operand:V8HF 3 "register_operand" "")
6600 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6601 VFMLA16_HIGH))]
6602 "TARGET_F16FML"
6603 {
6604 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6605 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6606
6607 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6608 operands[1],
6609 operands[2],
6610 operands[3],
6611 p1, lane));
6612 DONE;
6613 })
6614
6615 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6616 [(set (match_operand:V4SF 0 "register_operand" "=w")
6617 (fma:V4SF
6618 (float_extend:V4SF
6619 (vec_select:V4HF
6620 (match_operand:V8HF 2 "register_operand" "w")
6621 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6622 (float_extend:V4SF
6623 (vec_duplicate:V4HF
6624 (vec_select:HF
6625 (match_operand:V8HF 3 "register_operand" "x")
6626 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6627 (match_operand:V4SF 1 "register_operand" "0")))]
6628 "TARGET_F16FML"
6629 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6630 [(set_attr "type" "neon_fp_mul_s")]
6631 )
6632
6633 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6634 [(set (match_operand:V4SF 0 "register_operand" "=w")
6635 (fma:V4SF
6636 (float_extend:V4SF
6637 (neg:V4HF
6638 (vec_select:V4HF
6639 (match_operand:V8HF 2 "register_operand" "w")
6640 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6641 (float_extend:V4SF
6642 (vec_duplicate:V4HF
6643 (vec_select:HF
6644 (match_operand:V8HF 3 "register_operand" "x")
6645 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6646 (match_operand:V4SF 1 "register_operand" "0")))]
6647 "TARGET_F16FML"
6648 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6649 [(set_attr "type" "neon_fp_mul_s")]
6650 )
6651
6652 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6653 [(set (match_operand:V4SF 0 "register_operand" "=w")
6654 (fma:V4SF
6655 (float_extend:V4SF
6656 (vec_select:V4HF
6657 (match_operand:V8HF 2 "register_operand" "w")
6658 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6659 (float_extend:V4SF
6660 (vec_duplicate:V4HF
6661 (vec_select:HF
6662 (match_operand:V8HF 3 "register_operand" "x")
6663 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6664 (match_operand:V4SF 1 "register_operand" "0")))]
6665 "TARGET_F16FML"
6666 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6667 [(set_attr "type" "neon_fp_mul_s")]
6668 )
6669
6670 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6671 [(set (match_operand:V4SF 0 "register_operand" "=w")
6672 (fma:V4SF
6673 (float_extend:V4SF
6674 (neg:V4HF
6675 (vec_select:V4HF
6676 (match_operand:V8HF 2 "register_operand" "w")
6677 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6678 (float_extend:V4SF
6679 (vec_duplicate:V4HF
6680 (vec_select:HF
6681 (match_operand:V8HF 3 "register_operand" "x")
6682 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6683 (match_operand:V4SF 1 "register_operand" "0")))]
6684 "TARGET_F16FML"
6685 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6686 [(set_attr "type" "neon_fp_mul_s")]
6687 )
6688
6689 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6690 [(set (match_operand:V2SF 0 "register_operand" "")
6691 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6692 (match_operand:V4HF 2 "register_operand" "")
6693 (match_operand:V8HF 3 "register_operand" "")
6694 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6695 VFMLA16_LOW))]
6696 "TARGET_F16FML"
6697 {
6698 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6699 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6700
6701 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6702 operands[1],
6703 operands[2],
6704 operands[3],
6705 p1, lane));
6706 DONE;
6707
6708 })
6709
6710 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6711 [(set (match_operand:V2SF 0 "register_operand" "")
6712 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6713 (match_operand:V4HF 2 "register_operand" "")
6714 (match_operand:V8HF 3 "register_operand" "")
6715 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6716 VFMLA16_HIGH))]
6717 "TARGET_F16FML"
6718 {
6719 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6720 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6721
6722 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6723 operands[1],
6724 operands[2],
6725 operands[3],
6726 p1, lane));
6727 DONE;
6728
6729 })
6730
6731 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6732 [(set (match_operand:V2SF 0 "register_operand" "=w")
6733 (fma:V2SF
6734 (float_extend:V2SF
6735 (vec_select:V2HF
6736 (match_operand:V4HF 2 "register_operand" "w")
6737 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6738 (float_extend:V2SF
6739 (vec_duplicate:V2HF
6740 (vec_select:HF
6741 (match_operand:V8HF 3 "register_operand" "x")
6742 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6743 (match_operand:V2SF 1 "register_operand" "0")))]
6744 "TARGET_F16FML"
6745 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6746 [(set_attr "type" "neon_fp_mul_s")]
6747 )
6748
6749 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6750 [(set (match_operand:V2SF 0 "register_operand" "=w")
6751 (fma:V2SF
6752 (float_extend:V2SF
6753 (neg:V2HF
6754 (vec_select:V2HF
6755 (match_operand:V4HF 2 "register_operand" "w")
6756 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6757 (float_extend:V2SF
6758 (vec_duplicate:V2HF
6759 (vec_select:HF
6760 (match_operand:V8HF 3 "register_operand" "x")
6761 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6762 (match_operand:V2SF 1 "register_operand" "0")))]
6763 "TARGET_F16FML"
6764 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6765 [(set_attr "type" "neon_fp_mul_s")]
6766 )
6767
6768 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6769 [(set (match_operand:V2SF 0 "register_operand" "=w")
6770 (fma:V2SF
6771 (float_extend:V2SF
6772 (vec_select:V2HF
6773 (match_operand:V4HF 2 "register_operand" "w")
6774 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6775 (float_extend:V2SF
6776 (vec_duplicate:V2HF
6777 (vec_select:HF
6778 (match_operand:V8HF 3 "register_operand" "x")
6779 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6780 (match_operand:V2SF 1 "register_operand" "0")))]
6781 "TARGET_F16FML"
6782 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6783 [(set_attr "type" "neon_fp_mul_s")]
6784 )
6785
6786 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6787 [(set (match_operand:V2SF 0 "register_operand" "=w")
6788 (fma:V2SF
6789 (float_extend:V2SF
6790 (neg:V2HF
6791 (vec_select:V2HF
6792 (match_operand:V4HF 2 "register_operand" "w")
6793 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6794 (float_extend:V2SF
6795 (vec_duplicate:V2HF
6796 (vec_select:HF
6797 (match_operand:V8HF 3 "register_operand" "x")
6798 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6799 (match_operand:V2SF 1 "register_operand" "0")))]
6800 "TARGET_F16FML"
6801 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6802 [(set_attr "type" "neon_fp_mul_s")]
6803 )
6804
6805 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6806 [(set (match_operand:V4SF 0 "register_operand" "")
6807 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6808 (match_operand:V8HF 2 "register_operand" "")
6809 (match_operand:V4HF 3 "register_operand" "")
6810 (match_operand:SI 4 "aarch64_imm2" "")]
6811 VFMLA16_LOW))]
6812 "TARGET_F16FML"
6813 {
6814 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6815 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6816
6817 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6818 operands[1],
6819 operands[2],
6820 operands[3],
6821 p1, lane));
6822 DONE;
6823 })
6824
6825 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6826 [(set (match_operand:V4SF 0 "register_operand" "")
6827 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6828 (match_operand:V8HF 2 "register_operand" "")
6829 (match_operand:V4HF 3 "register_operand" "")
6830 (match_operand:SI 4 "aarch64_imm2" "")]
6831 VFMLA16_HIGH))]
6832 "TARGET_F16FML"
6833 {
6834 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6835 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6836
6837 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6838 operands[1],
6839 operands[2],
6840 operands[3],
6841 p1, lane));
6842 DONE;
6843 })
6844
6845 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6846 [(set (match_operand:V4SF 0 "register_operand" "=w")
6847 (fma:V4SF
6848 (float_extend:V4SF
6849 (vec_select:V4HF
6850 (match_operand:V8HF 2 "register_operand" "w")
6851 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6852 (float_extend:V4SF
6853 (vec_duplicate:V4HF
6854 (vec_select:HF
6855 (match_operand:V4HF 3 "register_operand" "x")
6856 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6857 (match_operand:V4SF 1 "register_operand" "0")))]
6858 "TARGET_F16FML"
6859 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6860 [(set_attr "type" "neon_fp_mul_s")]
6861 )
6862
6863 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6864 [(set (match_operand:V4SF 0 "register_operand" "=w")
6865 (fma:V4SF
6866 (float_extend:V4SF
6867 (neg:V4HF
6868 (vec_select:V4HF
6869 (match_operand:V8HF 2 "register_operand" "w")
6870 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6871 (float_extend:V4SF
6872 (vec_duplicate:V4HF
6873 (vec_select:HF
6874 (match_operand:V4HF 3 "register_operand" "x")
6875 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6876 (match_operand:V4SF 1 "register_operand" "0")))]
6877 "TARGET_F16FML"
6878 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6879 [(set_attr "type" "neon_fp_mul_s")]
6880 )
6881
6882 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6883 [(set (match_operand:V4SF 0 "register_operand" "=w")
6884 (fma:V4SF
6885 (float_extend:V4SF
6886 (vec_select:V4HF
6887 (match_operand:V8HF 2 "register_operand" "w")
6888 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6889 (float_extend:V4SF
6890 (vec_duplicate:V4HF
6891 (vec_select:HF
6892 (match_operand:V4HF 3 "register_operand" "x")
6893 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6894 (match_operand:V4SF 1 "register_operand" "0")))]
6895 "TARGET_F16FML"
6896 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6897 [(set_attr "type" "neon_fp_mul_s")]
6898 )
6899
6900 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6901 [(set (match_operand:V4SF 0 "register_operand" "=w")
6902 (fma:V4SF
6903 (float_extend:V4SF
6904 (neg:V4HF
6905 (vec_select:V4HF
6906 (match_operand:V8HF 2 "register_operand" "w")
6907 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6908 (float_extend:V4SF
6909 (vec_duplicate:V4HF
6910 (vec_select:HF
6911 (match_operand:V4HF 3 "register_operand" "x")
6912 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6913 (match_operand:V4SF 1 "register_operand" "0")))]
6914 "TARGET_F16FML"
6915 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6916 [(set_attr "type" "neon_fp_mul_s")]
6917 )
6918
6919 ;; pmull
6920
6921 (define_insn "aarch64_crypto_pmulldi"
6922 [(set (match_operand:TI 0 "register_operand" "=w")
6923 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
6924 (match_operand:DI 2 "register_operand" "w")]
6925 UNSPEC_PMULL))]
6926 "TARGET_SIMD && TARGET_AES"
6927 "pmull\\t%0.1q, %1.1d, %2.1d"
6928 [(set_attr "type" "crypto_pmull")]
6929 )
6930
6931 (define_insn "aarch64_crypto_pmullv2di"
6932 [(set (match_operand:TI 0 "register_operand" "=w")
6933 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
6934 (match_operand:V2DI 2 "register_operand" "w")]
6935 UNSPEC_PMULL2))]
6936 "TARGET_SIMD && TARGET_AES"
6937 "pmull2\\t%0.1q, %1.2d, %2.2d"
6938 [(set_attr "type" "crypto_pmull")]
6939 )