[AArch64] Rename cmp_result iterator
[gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL_F16 0 "nonimmediate_operand" "")
23 (match_operand:VALL_F16 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 if (GET_CODE (operands[0]) == MEM
27 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
28 && aarch64_legitimate_address_p (<MODE>mode, operands[0],
29 PARALLEL, 1)))
30 operands[1] = force_reg (<MODE>mode, operands[1]);
31 "
32 )
33
34 (define_expand "movmisalign<mode>"
35 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
36 (match_operand:VALL 1 "general_operand" ""))]
37 "TARGET_SIMD"
38 {
39 /* This pattern is not permitted to fail during expansion: if both arguments
40 are non-registers (e.g. memory := constant, which can be created by the
41 auto-vectorizer), force operand 1 into a register. */
42 if (!register_operand (operands[0], <MODE>mode)
43 && !register_operand (operands[1], <MODE>mode))
44 operands[1] = force_reg (<MODE>mode, operands[1]);
45 })
46
47 (define_insn "aarch64_simd_dup<mode>"
48 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
49 (vec_duplicate:VDQ_I
50 (match_operand:<VEL> 1 "register_operand" "w,?r")))]
51 "TARGET_SIMD"
52 "@
53 dup\\t%0.<Vtype>, %1.<Vetype>[0]
54 dup\\t%0.<Vtype>, %<vw>1"
55 [(set_attr "type" "neon_dup<q>, neon_from_gp<q>")]
56 )
57
58 (define_insn "aarch64_simd_dup<mode>"
59 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
60 (vec_duplicate:VDQF_F16
61 (match_operand:<VEL> 1 "register_operand" "w")))]
62 "TARGET_SIMD"
63 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
64 [(set_attr "type" "neon_dup<q>")]
65 )
66
67 (define_insn "aarch64_dup_lane<mode>"
68 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
69 (vec_duplicate:VALL_F16
70 (vec_select:<VEL>
71 (match_operand:VALL_F16 1 "register_operand" "w")
72 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
73 )))]
74 "TARGET_SIMD"
75 {
76 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
77 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
78 }
79 [(set_attr "type" "neon_dup<q>")]
80 )
81
82 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
83 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
84 (vec_duplicate:VALL_F16_NO_V2Q
85 (vec_select:<VEL>
86 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
87 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
88 )))]
89 "TARGET_SIMD"
90 {
91 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
92 INTVAL (operands[2])));
93 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
94 }
95 [(set_attr "type" "neon_dup<q>")]
96 )
97
98 (define_insn "*aarch64_simd_mov<mode>"
99 [(set (match_operand:VD 0 "nonimmediate_operand"
100 "=w, m, m, w, ?r, ?w, ?r, w")
101 (match_operand:VD 1 "general_operand"
102 "m, Dz, w, w, w, r, r, Dn"))]
103 "TARGET_SIMD
104 && (register_operand (operands[0], <MODE>mode)
105 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
106 {
107 switch (which_alternative)
108 {
109 case 0: return "ldr\t%d0, %1";
110 case 1: return "str\txzr, %0";
111 case 2: return "str\t%d1, %0";
112 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
113 case 4: return "umov\t%0, %1.d[0]";
114 case 5: return "fmov\t%d0, %1";
115 case 6: return "mov\t%0, %1";
116 case 7:
117 return aarch64_output_simd_mov_immediate (operands[1],
118 <MODE>mode, 64);
119 default: gcc_unreachable ();
120 }
121 }
122 [(set_attr "type" "neon_load1_1reg<q>, neon_stp, neon_store1_1reg<q>,\
123 neon_logic<q>, neon_to_gp<q>, f_mcr,\
124 mov_reg, neon_move<q>")]
125 )
126
127 (define_insn "*aarch64_simd_mov<mode>"
128 [(set (match_operand:VQ 0 "nonimmediate_operand"
129 "=w, Ump, m, w, ?r, ?w, ?r, w")
130 (match_operand:VQ 1 "general_operand"
131 "m, Dz, w, w, w, r, r, Dn"))]
132 "TARGET_SIMD
133 && (register_operand (operands[0], <MODE>mode)
134 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
135 {
136 switch (which_alternative)
137 {
138 case 0:
139 return "ldr\t%q0, %1";
140 case 1:
141 return "stp\txzr, xzr, %0";
142 case 2:
143 return "str\t%q1, %0";
144 case 3:
145 return "mov\t%0.<Vbtype>, %1.<Vbtype>";
146 case 4:
147 case 5:
148 case 6:
149 return "#";
150 case 7:
151 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
152 default:
153 gcc_unreachable ();
154 }
155 }
156 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
157 neon_stp, neon_logic<q>, multiple, multiple,\
158 multiple, neon_move<q>")
159 (set_attr "length" "4,4,4,4,8,8,8,4")]
160 )
161
162 ;; When storing lane zero we can use the normal STR and its more permissive
163 ;; addressing modes.
164
165 (define_insn "aarch64_store_lane0<mode>"
166 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
167 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
168 (parallel [(match_operand 2 "const_int_operand" "n")])))]
169 "TARGET_SIMD
170 && ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])) == 0"
171 "str\\t%<Vetype>1, %0"
172 [(set_attr "type" "neon_store1_1reg<q>")]
173 )
174
175 (define_insn "load_pair<mode>"
176 [(set (match_operand:VD 0 "register_operand" "=w")
177 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump"))
178 (set (match_operand:VD 2 "register_operand" "=w")
179 (match_operand:VD 3 "memory_operand" "m"))]
180 "TARGET_SIMD
181 && rtx_equal_p (XEXP (operands[3], 0),
182 plus_constant (Pmode,
183 XEXP (operands[1], 0),
184 GET_MODE_SIZE (<MODE>mode)))"
185 "ldp\\t%d0, %d2, %1"
186 [(set_attr "type" "neon_ldp")]
187 )
188
189 (define_insn "store_pair<mode>"
190 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump")
191 (match_operand:VD 1 "register_operand" "w"))
192 (set (match_operand:VD 2 "memory_operand" "=m")
193 (match_operand:VD 3 "register_operand" "w"))]
194 "TARGET_SIMD
195 && rtx_equal_p (XEXP (operands[2], 0),
196 plus_constant (Pmode,
197 XEXP (operands[0], 0),
198 GET_MODE_SIZE (<MODE>mode)))"
199 "stp\\t%d1, %d3, %0"
200 [(set_attr "type" "neon_stp")]
201 )
202
203 (define_split
204 [(set (match_operand:VQ 0 "register_operand" "")
205 (match_operand:VQ 1 "register_operand" ""))]
206 "TARGET_SIMD && reload_completed
207 && GP_REGNUM_P (REGNO (operands[0]))
208 && GP_REGNUM_P (REGNO (operands[1]))"
209 [(const_int 0)]
210 {
211 aarch64_simd_emit_reg_reg_move (operands, DImode, 2);
212 DONE;
213 })
214
215 (define_split
216 [(set (match_operand:VQ 0 "register_operand" "")
217 (match_operand:VQ 1 "register_operand" ""))]
218 "TARGET_SIMD && reload_completed
219 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
220 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
221 [(const_int 0)]
222 {
223 aarch64_split_simd_move (operands[0], operands[1]);
224 DONE;
225 })
226
227 (define_expand "aarch64_split_simd_mov<mode>"
228 [(set (match_operand:VQ 0)
229 (match_operand:VQ 1))]
230 "TARGET_SIMD"
231 {
232 rtx dst = operands[0];
233 rtx src = operands[1];
234
235 if (GP_REGNUM_P (REGNO (src)))
236 {
237 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
238 rtx src_high_part = gen_highpart (<VHALF>mode, src);
239
240 emit_insn
241 (gen_move_lo_quad_<mode> (dst, src_low_part));
242 emit_insn
243 (gen_move_hi_quad_<mode> (dst, src_high_part));
244 }
245
246 else
247 {
248 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
249 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
250 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
251 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
252
253 emit_insn
254 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
255 emit_insn
256 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
257 }
258 DONE;
259 }
260 )
261
262 (define_insn "aarch64_simd_mov_from_<mode>low"
263 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
264 (vec_select:<VHALF>
265 (match_operand:VQ 1 "register_operand" "w")
266 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
267 "TARGET_SIMD && reload_completed"
268 "umov\t%0, %1.d[0]"
269 [(set_attr "type" "neon_to_gp<q>")
270 (set_attr "length" "4")
271 ])
272
273 (define_insn "aarch64_simd_mov_from_<mode>high"
274 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
275 (vec_select:<VHALF>
276 (match_operand:VQ 1 "register_operand" "w")
277 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
278 "TARGET_SIMD && reload_completed"
279 "umov\t%0, %1.d[1]"
280 [(set_attr "type" "neon_to_gp<q>")
281 (set_attr "length" "4")
282 ])
283
284 (define_insn "orn<mode>3"
285 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
286 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
287 (match_operand:VDQ_I 2 "register_operand" "w")))]
288 "TARGET_SIMD"
289 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
290 [(set_attr "type" "neon_logic<q>")]
291 )
292
293 (define_insn "bic<mode>3"
294 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
295 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
296 (match_operand:VDQ_I 2 "register_operand" "w")))]
297 "TARGET_SIMD"
298 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
299 [(set_attr "type" "neon_logic<q>")]
300 )
301
302 (define_insn "add<mode>3"
303 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
304 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
305 (match_operand:VDQ_I 2 "register_operand" "w")))]
306 "TARGET_SIMD"
307 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
308 [(set_attr "type" "neon_add<q>")]
309 )
310
311 (define_insn "sub<mode>3"
312 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
313 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
314 (match_operand:VDQ_I 2 "register_operand" "w")))]
315 "TARGET_SIMD"
316 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
317 [(set_attr "type" "neon_sub<q>")]
318 )
319
320 (define_insn "mul<mode>3"
321 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
322 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
323 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
324 "TARGET_SIMD"
325 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
326 [(set_attr "type" "neon_mul_<Vetype><q>")]
327 )
328
329 (define_insn "bswap<mode>2"
330 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
331 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
332 "TARGET_SIMD"
333 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
334 [(set_attr "type" "neon_rev<q>")]
335 )
336
337 (define_insn "aarch64_rbit<mode>"
338 [(set (match_operand:VB 0 "register_operand" "=w")
339 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
340 UNSPEC_RBIT))]
341 "TARGET_SIMD"
342 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
343 [(set_attr "type" "neon_rbit")]
344 )
345
346 (define_expand "ctz<mode>2"
347 [(set (match_operand:VS 0 "register_operand")
348 (ctz:VS (match_operand:VS 1 "register_operand")))]
349 "TARGET_SIMD"
350 {
351 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
352 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
353 <MODE>mode, 0);
354 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
355 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
356 DONE;
357 }
358 )
359
360 (define_expand "xorsign<mode>3"
361 [(match_operand:VHSDF 0 "register_operand")
362 (match_operand:VHSDF 1 "register_operand")
363 (match_operand:VHSDF 2 "register_operand")]
364 "TARGET_SIMD"
365 {
366
367 machine_mode imode = <V_INT_EQUIV>mode;
368 rtx v_bitmask = gen_reg_rtx (imode);
369 rtx op1x = gen_reg_rtx (imode);
370 rtx op2x = gen_reg_rtx (imode);
371
372 rtx arg1 = lowpart_subreg (imode, operands[1], <MODE>mode);
373 rtx arg2 = lowpart_subreg (imode, operands[2], <MODE>mode);
374
375 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
376
377 emit_move_insn (v_bitmask,
378 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
379 HOST_WIDE_INT_M1U << bits));
380
381 emit_insn (gen_and<v_int_equiv>3 (op2x, v_bitmask, arg2));
382 emit_insn (gen_xor<v_int_equiv>3 (op1x, arg1, op2x));
383 emit_move_insn (operands[0],
384 lowpart_subreg (<MODE>mode, op1x, imode));
385 DONE;
386 }
387 )
388
389 (define_expand "copysign<mode>3"
390 [(match_operand:VHSDF 0 "register_operand")
391 (match_operand:VHSDF 1 "register_operand")
392 (match_operand:VHSDF 2 "register_operand")]
393 "TARGET_FLOAT && TARGET_SIMD"
394 {
395 rtx v_bitmask = gen_reg_rtx (<V_INT_EQUIV>mode);
396 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
397
398 emit_move_insn (v_bitmask,
399 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
400 HOST_WIDE_INT_M1U << bits));
401 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], v_bitmask,
402 operands[2], operands[1]));
403 DONE;
404 }
405 )
406
407 (define_insn "*aarch64_mul3_elt<mode>"
408 [(set (match_operand:VMUL 0 "register_operand" "=w")
409 (mult:VMUL
410 (vec_duplicate:VMUL
411 (vec_select:<VEL>
412 (match_operand:VMUL 1 "register_operand" "<h_con>")
413 (parallel [(match_operand:SI 2 "immediate_operand")])))
414 (match_operand:VMUL 3 "register_operand" "w")))]
415 "TARGET_SIMD"
416 {
417 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
418 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
419 }
420 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
421 )
422
423 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
424 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
425 (mult:VMUL_CHANGE_NLANES
426 (vec_duplicate:VMUL_CHANGE_NLANES
427 (vec_select:<VEL>
428 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
429 (parallel [(match_operand:SI 2 "immediate_operand")])))
430 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
431 "TARGET_SIMD"
432 {
433 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
434 INTVAL (operands[2])));
435 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
436 }
437 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
438 )
439
440 (define_insn "*aarch64_mul3_elt_from_dup<mode>"
441 [(set (match_operand:VMUL 0 "register_operand" "=w")
442 (mult:VMUL
443 (vec_duplicate:VMUL
444 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
445 (match_operand:VMUL 2 "register_operand" "w")))]
446 "TARGET_SIMD"
447 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
448 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
449 )
450
451 (define_insn "aarch64_rsqrte<mode>"
452 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
453 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
454 UNSPEC_RSQRTE))]
455 "TARGET_SIMD"
456 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
457 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
458
459 (define_insn "aarch64_rsqrts<mode>"
460 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
461 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
462 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
463 UNSPEC_RSQRTS))]
464 "TARGET_SIMD"
465 "frsqrts\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
466 [(set_attr "type" "neon_fp_rsqrts_<stype><q>")])
467
468 (define_expand "rsqrt<mode>2"
469 [(set (match_operand:VALLF 0 "register_operand" "=w")
470 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")]
471 UNSPEC_RSQRT))]
472 "TARGET_SIMD"
473 {
474 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
475 DONE;
476 })
477
478 (define_insn "*aarch64_mul3_elt_to_64v2df"
479 [(set (match_operand:DF 0 "register_operand" "=w")
480 (mult:DF
481 (vec_select:DF
482 (match_operand:V2DF 1 "register_operand" "w")
483 (parallel [(match_operand:SI 2 "immediate_operand")]))
484 (match_operand:DF 3 "register_operand" "w")))]
485 "TARGET_SIMD"
486 {
487 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
488 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
489 }
490 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
491 )
492
493 (define_insn "neg<mode>2"
494 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
495 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
496 "TARGET_SIMD"
497 "neg\t%0.<Vtype>, %1.<Vtype>"
498 [(set_attr "type" "neon_neg<q>")]
499 )
500
501 (define_insn "abs<mode>2"
502 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
503 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
504 "TARGET_SIMD"
505 "abs\t%0.<Vtype>, %1.<Vtype>"
506 [(set_attr "type" "neon_abs<q>")]
507 )
508
509 ;; The intrinsic version of integer ABS must not be allowed to
510 ;; combine with any operation with an integerated ABS step, such
511 ;; as SABD.
512 (define_insn "aarch64_abs<mode>"
513 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
514 (unspec:VSDQ_I_DI
515 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")]
516 UNSPEC_ABS))]
517 "TARGET_SIMD"
518 "abs\t%<v>0<Vmtype>, %<v>1<Vmtype>"
519 [(set_attr "type" "neon_abs<q>")]
520 )
521
522 (define_insn "abd<mode>_3"
523 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
524 (abs:VDQ_BHSI (minus:VDQ_BHSI
525 (match_operand:VDQ_BHSI 1 "register_operand" "w")
526 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
527 "TARGET_SIMD"
528 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
529 [(set_attr "type" "neon_abd<q>")]
530 )
531
532 (define_insn "aba<mode>_3"
533 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
534 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
535 (match_operand:VDQ_BHSI 1 "register_operand" "w")
536 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
537 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
538 "TARGET_SIMD"
539 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
540 [(set_attr "type" "neon_arith_acc<q>")]
541 )
542
543 (define_insn "fabd<mode>3"
544 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
545 (abs:VHSDF_HSDF
546 (minus:VHSDF_HSDF
547 (match_operand:VHSDF_HSDF 1 "register_operand" "w")
548 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))))]
549 "TARGET_SIMD"
550 "fabd\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
551 [(set_attr "type" "neon_fp_abd_<stype><q>")]
552 )
553
554 (define_insn "and<mode>3"
555 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
556 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
557 (match_operand:VDQ_I 2 "register_operand" "w")))]
558 "TARGET_SIMD"
559 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
560 [(set_attr "type" "neon_logic<q>")]
561 )
562
563 (define_insn "ior<mode>3"
564 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
565 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
566 (match_operand:VDQ_I 2 "register_operand" "w")))]
567 "TARGET_SIMD"
568 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
569 [(set_attr "type" "neon_logic<q>")]
570 )
571
572 (define_insn "xor<mode>3"
573 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
574 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
575 (match_operand:VDQ_I 2 "register_operand" "w")))]
576 "TARGET_SIMD"
577 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
578 [(set_attr "type" "neon_logic<q>")]
579 )
580
581 (define_insn "one_cmpl<mode>2"
582 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
583 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
584 "TARGET_SIMD"
585 "not\t%0.<Vbtype>, %1.<Vbtype>"
586 [(set_attr "type" "neon_logic<q>")]
587 )
588
589 (define_insn "aarch64_simd_vec_set<mode>"
590 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
591 (vec_merge:VDQ_BHSI
592 (vec_duplicate:VDQ_BHSI
593 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
594 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
595 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
596 "TARGET_SIMD"
597 {
598 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
599 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
600 switch (which_alternative)
601 {
602 case 0:
603 return "ins\\t%0.<Vetype>[%p2], %w1";
604 case 1:
605 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
606 case 2:
607 return "ld1\\t{%0.<Vetype>}[%p2], %1";
608 default:
609 gcc_unreachable ();
610 }
611 }
612 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")]
613 )
614
615 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
616 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
617 (vec_merge:VALL_F16
618 (vec_duplicate:VALL_F16
619 (vec_select:<VEL>
620 (match_operand:VALL_F16 3 "register_operand" "w")
621 (parallel
622 [(match_operand:SI 4 "immediate_operand" "i")])))
623 (match_operand:VALL_F16 1 "register_operand" "0")
624 (match_operand:SI 2 "immediate_operand" "i")))]
625 "TARGET_SIMD"
626 {
627 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
628 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
629 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4])));
630
631 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
632 }
633 [(set_attr "type" "neon_ins<q>")]
634 )
635
636 (define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
637 [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
638 (vec_merge:VALL_F16_NO_V2Q
639 (vec_duplicate:VALL_F16_NO_V2Q
640 (vec_select:<VEL>
641 (match_operand:<VSWAP_WIDTH> 3 "register_operand" "w")
642 (parallel
643 [(match_operand:SI 4 "immediate_operand" "i")])))
644 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
645 (match_operand:SI 2 "immediate_operand" "i")))]
646 "TARGET_SIMD"
647 {
648 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
649 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
650 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
651 INTVAL (operands[4])));
652
653 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
654 }
655 [(set_attr "type" "neon_ins<q>")]
656 )
657
658 (define_insn "aarch64_simd_lshr<mode>"
659 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
660 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
661 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
662 "TARGET_SIMD"
663 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
664 [(set_attr "type" "neon_shift_imm<q>")]
665 )
666
667 (define_insn "aarch64_simd_ashr<mode>"
668 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
669 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
670 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
671 "TARGET_SIMD"
672 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
673 [(set_attr "type" "neon_shift_imm<q>")]
674 )
675
676 (define_insn "aarch64_simd_imm_shl<mode>"
677 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
678 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
679 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
680 "TARGET_SIMD"
681 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
682 [(set_attr "type" "neon_shift_imm<q>")]
683 )
684
685 (define_insn "aarch64_simd_reg_sshl<mode>"
686 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
687 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
688 (match_operand:VDQ_I 2 "register_operand" "w")))]
689 "TARGET_SIMD"
690 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
691 [(set_attr "type" "neon_shift_reg<q>")]
692 )
693
694 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
695 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
696 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
697 (match_operand:VDQ_I 2 "register_operand" "w")]
698 UNSPEC_ASHIFT_UNSIGNED))]
699 "TARGET_SIMD"
700 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
701 [(set_attr "type" "neon_shift_reg<q>")]
702 )
703
704 (define_insn "aarch64_simd_reg_shl<mode>_signed"
705 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
706 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
707 (match_operand:VDQ_I 2 "register_operand" "w")]
708 UNSPEC_ASHIFT_SIGNED))]
709 "TARGET_SIMD"
710 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
711 [(set_attr "type" "neon_shift_reg<q>")]
712 )
713
714 (define_expand "ashl<mode>3"
715 [(match_operand:VDQ_I 0 "register_operand" "")
716 (match_operand:VDQ_I 1 "register_operand" "")
717 (match_operand:SI 2 "general_operand" "")]
718 "TARGET_SIMD"
719 {
720 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
721 int shift_amount;
722
723 if (CONST_INT_P (operands[2]))
724 {
725 shift_amount = INTVAL (operands[2]);
726 if (shift_amount >= 0 && shift_amount < bit_width)
727 {
728 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
729 shift_amount);
730 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
731 operands[1],
732 tmp));
733 DONE;
734 }
735 else
736 {
737 operands[2] = force_reg (SImode, operands[2]);
738 }
739 }
740 else if (MEM_P (operands[2]))
741 {
742 operands[2] = force_reg (SImode, operands[2]);
743 }
744
745 if (REG_P (operands[2]))
746 {
747 rtx tmp = gen_reg_rtx (<MODE>mode);
748 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
749 convert_to_mode (<VEL>mode,
750 operands[2],
751 0)));
752 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
753 tmp));
754 DONE;
755 }
756 else
757 FAIL;
758 }
759 )
760
761 (define_expand "lshr<mode>3"
762 [(match_operand:VDQ_I 0 "register_operand" "")
763 (match_operand:VDQ_I 1 "register_operand" "")
764 (match_operand:SI 2 "general_operand" "")]
765 "TARGET_SIMD"
766 {
767 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
768 int shift_amount;
769
770 if (CONST_INT_P (operands[2]))
771 {
772 shift_amount = INTVAL (operands[2]);
773 if (shift_amount > 0 && shift_amount <= bit_width)
774 {
775 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
776 shift_amount);
777 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
778 operands[1],
779 tmp));
780 DONE;
781 }
782 else
783 operands[2] = force_reg (SImode, operands[2]);
784 }
785 else if (MEM_P (operands[2]))
786 {
787 operands[2] = force_reg (SImode, operands[2]);
788 }
789
790 if (REG_P (operands[2]))
791 {
792 rtx tmp = gen_reg_rtx (SImode);
793 rtx tmp1 = gen_reg_rtx (<MODE>mode);
794 emit_insn (gen_negsi2 (tmp, operands[2]));
795 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
796 convert_to_mode (<VEL>mode,
797 tmp, 0)));
798 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
799 operands[1],
800 tmp1));
801 DONE;
802 }
803 else
804 FAIL;
805 }
806 )
807
808 (define_expand "ashr<mode>3"
809 [(match_operand:VDQ_I 0 "register_operand" "")
810 (match_operand:VDQ_I 1 "register_operand" "")
811 (match_operand:SI 2 "general_operand" "")]
812 "TARGET_SIMD"
813 {
814 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
815 int shift_amount;
816
817 if (CONST_INT_P (operands[2]))
818 {
819 shift_amount = INTVAL (operands[2]);
820 if (shift_amount > 0 && shift_amount <= bit_width)
821 {
822 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
823 shift_amount);
824 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
825 operands[1],
826 tmp));
827 DONE;
828 }
829 else
830 operands[2] = force_reg (SImode, operands[2]);
831 }
832 else if (MEM_P (operands[2]))
833 {
834 operands[2] = force_reg (SImode, operands[2]);
835 }
836
837 if (REG_P (operands[2]))
838 {
839 rtx tmp = gen_reg_rtx (SImode);
840 rtx tmp1 = gen_reg_rtx (<MODE>mode);
841 emit_insn (gen_negsi2 (tmp, operands[2]));
842 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
843 convert_to_mode (<VEL>mode,
844 tmp, 0)));
845 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
846 operands[1],
847 tmp1));
848 DONE;
849 }
850 else
851 FAIL;
852 }
853 )
854
855 (define_expand "vashl<mode>3"
856 [(match_operand:VDQ_I 0 "register_operand" "")
857 (match_operand:VDQ_I 1 "register_operand" "")
858 (match_operand:VDQ_I 2 "register_operand" "")]
859 "TARGET_SIMD"
860 {
861 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
862 operands[2]));
863 DONE;
864 })
865
866 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
867 ;; Negating individual lanes most certainly offsets the
868 ;; gain from vectorization.
869 (define_expand "vashr<mode>3"
870 [(match_operand:VDQ_BHSI 0 "register_operand" "")
871 (match_operand:VDQ_BHSI 1 "register_operand" "")
872 (match_operand:VDQ_BHSI 2 "register_operand" "")]
873 "TARGET_SIMD"
874 {
875 rtx neg = gen_reg_rtx (<MODE>mode);
876 emit (gen_neg<mode>2 (neg, operands[2]));
877 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
878 neg));
879 DONE;
880 })
881
882 ;; DI vector shift
883 (define_expand "aarch64_ashr_simddi"
884 [(match_operand:DI 0 "register_operand" "=w")
885 (match_operand:DI 1 "register_operand" "w")
886 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
887 "TARGET_SIMD"
888 {
889 /* An arithmetic shift right by 64 fills the result with copies of the sign
890 bit, just like asr by 63 - however the standard pattern does not handle
891 a shift by 64. */
892 if (INTVAL (operands[2]) == 64)
893 operands[2] = GEN_INT (63);
894 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
895 DONE;
896 }
897 )
898
899 (define_expand "vlshr<mode>3"
900 [(match_operand:VDQ_BHSI 0 "register_operand" "")
901 (match_operand:VDQ_BHSI 1 "register_operand" "")
902 (match_operand:VDQ_BHSI 2 "register_operand" "")]
903 "TARGET_SIMD"
904 {
905 rtx neg = gen_reg_rtx (<MODE>mode);
906 emit (gen_neg<mode>2 (neg, operands[2]));
907 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
908 neg));
909 DONE;
910 })
911
912 (define_expand "aarch64_lshr_simddi"
913 [(match_operand:DI 0 "register_operand" "=w")
914 (match_operand:DI 1 "register_operand" "w")
915 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
916 "TARGET_SIMD"
917 {
918 if (INTVAL (operands[2]) == 64)
919 emit_move_insn (operands[0], const0_rtx);
920 else
921 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
922 DONE;
923 }
924 )
925
926 (define_expand "vec_set<mode>"
927 [(match_operand:VDQ_BHSI 0 "register_operand")
928 (match_operand:<VEL> 1 "register_operand")
929 (match_operand:SI 2 "immediate_operand")]
930 "TARGET_SIMD"
931 {
932 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
933 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
934 GEN_INT (elem), operands[0]));
935 DONE;
936 }
937 )
938
939 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
940 (define_insn "vec_shr_<mode>"
941 [(set (match_operand:VD 0 "register_operand" "=w")
942 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
943 (match_operand:SI 2 "immediate_operand" "i")]
944 UNSPEC_VEC_SHR))]
945 "TARGET_SIMD"
946 {
947 if (BYTES_BIG_ENDIAN)
948 return "shl %d0, %d1, %2";
949 else
950 return "ushr %d0, %d1, %2";
951 }
952 [(set_attr "type" "neon_shift_imm")]
953 )
954
955 (define_insn "aarch64_simd_vec_setv2di"
956 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
957 (vec_merge:V2DI
958 (vec_duplicate:V2DI
959 (match_operand:DI 1 "register_operand" "r,w"))
960 (match_operand:V2DI 3 "register_operand" "0,0")
961 (match_operand:SI 2 "immediate_operand" "i,i")))]
962 "TARGET_SIMD"
963 {
964 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
965 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
966 switch (which_alternative)
967 {
968 case 0:
969 return "ins\\t%0.d[%p2], %1";
970 case 1:
971 return "ins\\t%0.d[%p2], %1.d[0]";
972 default:
973 gcc_unreachable ();
974 }
975 }
976 [(set_attr "type" "neon_from_gp, neon_ins_q")]
977 )
978
979 (define_expand "vec_setv2di"
980 [(match_operand:V2DI 0 "register_operand")
981 (match_operand:DI 1 "register_operand")
982 (match_operand:SI 2 "immediate_operand")]
983 "TARGET_SIMD"
984 {
985 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
986 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
987 GEN_INT (elem), operands[0]));
988 DONE;
989 }
990 )
991
992 (define_insn "aarch64_simd_vec_set<mode>"
993 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
994 (vec_merge:VDQF_F16
995 (vec_duplicate:VDQF_F16
996 (match_operand:<VEL> 1 "register_operand" "w"))
997 (match_operand:VDQF_F16 3 "register_operand" "0")
998 (match_operand:SI 2 "immediate_operand" "i")))]
999 "TARGET_SIMD"
1000 {
1001 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
1002
1003 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1004 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1005 }
1006 [(set_attr "type" "neon_ins<q>")]
1007 )
1008
1009 (define_expand "vec_set<mode>"
1010 [(match_operand:VDQF_F16 0 "register_operand" "+w")
1011 (match_operand:<VEL> 1 "register_operand" "w")
1012 (match_operand:SI 2 "immediate_operand" "")]
1013 "TARGET_SIMD"
1014 {
1015 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1016 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1017 GEN_INT (elem), operands[0]));
1018 DONE;
1019 }
1020 )
1021
1022
1023 (define_insn "aarch64_mla<mode>"
1024 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1025 (plus:VDQ_BHSI (mult:VDQ_BHSI
1026 (match_operand:VDQ_BHSI 2 "register_operand" "w")
1027 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
1028 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
1029 "TARGET_SIMD"
1030 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1031 [(set_attr "type" "neon_mla_<Vetype><q>")]
1032 )
1033
1034 (define_insn "*aarch64_mla_elt<mode>"
1035 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1036 (plus:VDQHS
1037 (mult:VDQHS
1038 (vec_duplicate:VDQHS
1039 (vec_select:<VEL>
1040 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1041 (parallel [(match_operand:SI 2 "immediate_operand")])))
1042 (match_operand:VDQHS 3 "register_operand" "w"))
1043 (match_operand:VDQHS 4 "register_operand" "0")))]
1044 "TARGET_SIMD"
1045 {
1046 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1047 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1048 }
1049 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1050 )
1051
1052 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
1053 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1054 (plus:VDQHS
1055 (mult:VDQHS
1056 (vec_duplicate:VDQHS
1057 (vec_select:<VEL>
1058 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1059 (parallel [(match_operand:SI 2 "immediate_operand")])))
1060 (match_operand:VDQHS 3 "register_operand" "w"))
1061 (match_operand:VDQHS 4 "register_operand" "0")))]
1062 "TARGET_SIMD"
1063 {
1064 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1065 INTVAL (operands[2])));
1066 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1067 }
1068 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1069 )
1070
1071 (define_insn "*aarch64_mla_elt_merge<mode>"
1072 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1073 (plus:VDQHS
1074 (mult:VDQHS (vec_duplicate:VDQHS
1075 (match_operand:<VEL> 1 "register_operand" "w"))
1076 (match_operand:VDQHS 2 "register_operand" "w"))
1077 (match_operand:VDQHS 3 "register_operand" "0")))]
1078 "TARGET_SIMD"
1079 "mla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1080 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1081 )
1082
1083 (define_insn "aarch64_mls<mode>"
1084 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1085 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
1086 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
1087 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
1088 "TARGET_SIMD"
1089 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1090 [(set_attr "type" "neon_mla_<Vetype><q>")]
1091 )
1092
1093 (define_insn "*aarch64_mls_elt<mode>"
1094 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1095 (minus:VDQHS
1096 (match_operand:VDQHS 4 "register_operand" "0")
1097 (mult:VDQHS
1098 (vec_duplicate:VDQHS
1099 (vec_select:<VEL>
1100 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1101 (parallel [(match_operand:SI 2 "immediate_operand")])))
1102 (match_operand:VDQHS 3 "register_operand" "w"))))]
1103 "TARGET_SIMD"
1104 {
1105 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1106 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1107 }
1108 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1109 )
1110
1111 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
1112 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1113 (minus:VDQHS
1114 (match_operand:VDQHS 4 "register_operand" "0")
1115 (mult:VDQHS
1116 (vec_duplicate:VDQHS
1117 (vec_select:<VEL>
1118 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1119 (parallel [(match_operand:SI 2 "immediate_operand")])))
1120 (match_operand:VDQHS 3 "register_operand" "w"))))]
1121 "TARGET_SIMD"
1122 {
1123 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1124 INTVAL (operands[2])));
1125 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1126 }
1127 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1128 )
1129
1130 (define_insn "*aarch64_mls_elt_merge<mode>"
1131 [(set (match_operand:VDQHS 0 "register_operand" "=w")
1132 (minus:VDQHS
1133 (match_operand:VDQHS 1 "register_operand" "0")
1134 (mult:VDQHS (vec_duplicate:VDQHS
1135 (match_operand:<VEL> 2 "register_operand" "w"))
1136 (match_operand:VDQHS 3 "register_operand" "w"))))]
1137 "TARGET_SIMD"
1138 "mls\t%0.<Vtype>, %3.<Vtype>, %2.<Vetype>[0]"
1139 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1140 )
1141
1142 ;; Max/Min operations.
1143 (define_insn "<su><maxmin><mode>3"
1144 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1145 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
1146 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
1147 "TARGET_SIMD"
1148 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1149 [(set_attr "type" "neon_minmax<q>")]
1150 )
1151
1152 (define_expand "<su><maxmin>v2di3"
1153 [(set (match_operand:V2DI 0 "register_operand" "")
1154 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
1155 (match_operand:V2DI 2 "register_operand" "")))]
1156 "TARGET_SIMD"
1157 {
1158 enum rtx_code cmp_operator;
1159 rtx cmp_fmt;
1160
1161 switch (<CODE>)
1162 {
1163 case UMIN:
1164 cmp_operator = LTU;
1165 break;
1166 case SMIN:
1167 cmp_operator = LT;
1168 break;
1169 case UMAX:
1170 cmp_operator = GTU;
1171 break;
1172 case SMAX:
1173 cmp_operator = GT;
1174 break;
1175 default:
1176 gcc_unreachable ();
1177 }
1178
1179 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1180 emit_insn (gen_vcondv2div2di (operands[0], operands[1],
1181 operands[2], cmp_fmt, operands[1], operands[2]));
1182 DONE;
1183 })
1184
1185 ;; Pairwise Integer Max/Min operations.
1186 (define_insn "aarch64_<maxmin_uns>p<mode>"
1187 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1188 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
1189 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
1190 MAXMINV))]
1191 "TARGET_SIMD"
1192 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1193 [(set_attr "type" "neon_minmax<q>")]
1194 )
1195
1196 ;; Pairwise FP Max/Min operations.
1197 (define_insn "aarch64_<maxmin_uns>p<mode>"
1198 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1199 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
1200 (match_operand:VHSDF 2 "register_operand" "w")]
1201 FMAXMINV))]
1202 "TARGET_SIMD"
1203 "<maxmin_uns_op>p\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1204 [(set_attr "type" "neon_minmax<q>")]
1205 )
1206
1207 ;; vec_concat gives a new vector with the low elements from operand 1, and
1208 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1209 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1210 ;; What that means, is that the RTL descriptions of the below patterns
1211 ;; need to change depending on endianness.
1212
1213 ;; Move to the low architectural bits of the register.
1214 ;; On little-endian this is { operand, zeroes }
1215 ;; On big-endian this is { zeroes, operand }
1216
1217 (define_insn "move_lo_quad_internal_<mode>"
1218 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1219 (vec_concat:VQ_NO2E
1220 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1221 (vec_duplicate:<VHALF> (const_int 0))))]
1222 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1223 "@
1224 dup\\t%d0, %1.d[0]
1225 fmov\\t%d0, %1
1226 dup\\t%d0, %1"
1227 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1228 (set_attr "simd" "yes,*,yes")
1229 (set_attr "fp" "*,yes,*")
1230 (set_attr "length" "4")]
1231 )
1232
1233 (define_insn "move_lo_quad_internal_<mode>"
1234 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1235 (vec_concat:VQ_2E
1236 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1237 (const_int 0)))]
1238 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1239 "@
1240 dup\\t%d0, %1.d[0]
1241 fmov\\t%d0, %1
1242 dup\\t%d0, %1"
1243 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1244 (set_attr "simd" "yes,*,yes")
1245 (set_attr "fp" "*,yes,*")
1246 (set_attr "length" "4")]
1247 )
1248
1249 (define_insn "move_lo_quad_internal_be_<mode>"
1250 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1251 (vec_concat:VQ_NO2E
1252 (vec_duplicate:<VHALF> (const_int 0))
1253 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1254 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1255 "@
1256 dup\\t%d0, %1.d[0]
1257 fmov\\t%d0, %1
1258 dup\\t%d0, %1"
1259 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1260 (set_attr "simd" "yes,*,yes")
1261 (set_attr "fp" "*,yes,*")
1262 (set_attr "length" "4")]
1263 )
1264
1265 (define_insn "move_lo_quad_internal_be_<mode>"
1266 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1267 (vec_concat:VQ_2E
1268 (const_int 0)
1269 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1270 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1271 "@
1272 dup\\t%d0, %1.d[0]
1273 fmov\\t%d0, %1
1274 dup\\t%d0, %1"
1275 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1276 (set_attr "simd" "yes,*,yes")
1277 (set_attr "fp" "*,yes,*")
1278 (set_attr "length" "4")]
1279 )
1280
1281 (define_expand "move_lo_quad_<mode>"
1282 [(match_operand:VQ 0 "register_operand")
1283 (match_operand:VQ 1 "register_operand")]
1284 "TARGET_SIMD"
1285 {
1286 if (BYTES_BIG_ENDIAN)
1287 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1288 else
1289 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1290 DONE;
1291 }
1292 )
1293
1294 ;; Move operand1 to the high architectural bits of the register, keeping
1295 ;; the low architectural bits of operand2.
1296 ;; For little-endian this is { operand2, operand1 }
1297 ;; For big-endian this is { operand1, operand2 }
1298
1299 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1300 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1301 (vec_concat:VQ
1302 (vec_select:<VHALF>
1303 (match_dup 0)
1304 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1305 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1306 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1307 "@
1308 ins\\t%0.d[1], %1.d[0]
1309 ins\\t%0.d[1], %1"
1310 [(set_attr "type" "neon_ins")]
1311 )
1312
1313 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1314 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1315 (vec_concat:VQ
1316 (match_operand:<VHALF> 1 "register_operand" "w,r")
1317 (vec_select:<VHALF>
1318 (match_dup 0)
1319 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1320 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1321 "@
1322 ins\\t%0.d[1], %1.d[0]
1323 ins\\t%0.d[1], %1"
1324 [(set_attr "type" "neon_ins")]
1325 )
1326
1327 (define_expand "move_hi_quad_<mode>"
1328 [(match_operand:VQ 0 "register_operand" "")
1329 (match_operand:<VHALF> 1 "register_operand" "")]
1330 "TARGET_SIMD"
1331 {
1332 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1333 if (BYTES_BIG_ENDIAN)
1334 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1335 operands[1], p));
1336 else
1337 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1338 operands[1], p));
1339 DONE;
1340 })
1341
1342 ;; Narrowing operations.
1343
1344 ;; For doubles.
1345 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1346 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1347 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1348 "TARGET_SIMD"
1349 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1350 [(set_attr "type" "neon_shift_imm_narrow_q")]
1351 )
1352
1353 (define_expand "vec_pack_trunc_<mode>"
1354 [(match_operand:<VNARROWD> 0 "register_operand" "")
1355 (match_operand:VDN 1 "register_operand" "")
1356 (match_operand:VDN 2 "register_operand" "")]
1357 "TARGET_SIMD"
1358 {
1359 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1360 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1361 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1362
1363 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1364 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1365 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1366 DONE;
1367 })
1368
1369 ;; For quads.
1370
1371 (define_insn "vec_pack_trunc_<mode>"
1372 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1373 (vec_concat:<VNARROWQ2>
1374 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1375 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1376 "TARGET_SIMD"
1377 {
1378 if (BYTES_BIG_ENDIAN)
1379 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1380 else
1381 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1382 }
1383 [(set_attr "type" "multiple")
1384 (set_attr "length" "8")]
1385 )
1386
1387 ;; Widening operations.
1388
1389 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1390 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1391 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1392 (match_operand:VQW 1 "register_operand" "w")
1393 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1394 )))]
1395 "TARGET_SIMD"
1396 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1397 [(set_attr "type" "neon_shift_imm_long")]
1398 )
1399
1400 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1401 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1402 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1403 (match_operand:VQW 1 "register_operand" "w")
1404 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1405 )))]
1406 "TARGET_SIMD"
1407 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1408 [(set_attr "type" "neon_shift_imm_long")]
1409 )
1410
1411 (define_expand "vec_unpack<su>_hi_<mode>"
1412 [(match_operand:<VWIDE> 0 "register_operand" "")
1413 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1414 "TARGET_SIMD"
1415 {
1416 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1417 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1418 operands[1], p));
1419 DONE;
1420 }
1421 )
1422
1423 (define_expand "vec_unpack<su>_lo_<mode>"
1424 [(match_operand:<VWIDE> 0 "register_operand" "")
1425 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1426 "TARGET_SIMD"
1427 {
1428 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1429 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1430 operands[1], p));
1431 DONE;
1432 }
1433 )
1434
1435 ;; Widening arithmetic.
1436
1437 (define_insn "*aarch64_<su>mlal_lo<mode>"
1438 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1439 (plus:<VWIDE>
1440 (mult:<VWIDE>
1441 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1442 (match_operand:VQW 2 "register_operand" "w")
1443 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1444 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1445 (match_operand:VQW 4 "register_operand" "w")
1446 (match_dup 3))))
1447 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1448 "TARGET_SIMD"
1449 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1450 [(set_attr "type" "neon_mla_<Vetype>_long")]
1451 )
1452
1453 (define_insn "*aarch64_<su>mlal_hi<mode>"
1454 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1455 (plus:<VWIDE>
1456 (mult:<VWIDE>
1457 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1458 (match_operand:VQW 2 "register_operand" "w")
1459 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1460 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1461 (match_operand:VQW 4 "register_operand" "w")
1462 (match_dup 3))))
1463 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1464 "TARGET_SIMD"
1465 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1466 [(set_attr "type" "neon_mla_<Vetype>_long")]
1467 )
1468
1469 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1470 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1471 (minus:<VWIDE>
1472 (match_operand:<VWIDE> 1 "register_operand" "0")
1473 (mult:<VWIDE>
1474 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1475 (match_operand:VQW 2 "register_operand" "w")
1476 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1477 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1478 (match_operand:VQW 4 "register_operand" "w")
1479 (match_dup 3))))))]
1480 "TARGET_SIMD"
1481 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1482 [(set_attr "type" "neon_mla_<Vetype>_long")]
1483 )
1484
1485 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1486 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1487 (minus:<VWIDE>
1488 (match_operand:<VWIDE> 1 "register_operand" "0")
1489 (mult:<VWIDE>
1490 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1491 (match_operand:VQW 2 "register_operand" "w")
1492 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1493 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1494 (match_operand:VQW 4 "register_operand" "w")
1495 (match_dup 3))))))]
1496 "TARGET_SIMD"
1497 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1498 [(set_attr "type" "neon_mla_<Vetype>_long")]
1499 )
1500
1501 (define_insn "*aarch64_<su>mlal<mode>"
1502 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1503 (plus:<VWIDE>
1504 (mult:<VWIDE>
1505 (ANY_EXTEND:<VWIDE>
1506 (match_operand:VD_BHSI 1 "register_operand" "w"))
1507 (ANY_EXTEND:<VWIDE>
1508 (match_operand:VD_BHSI 2 "register_operand" "w")))
1509 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1510 "TARGET_SIMD"
1511 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1512 [(set_attr "type" "neon_mla_<Vetype>_long")]
1513 )
1514
1515 (define_insn "*aarch64_<su>mlsl<mode>"
1516 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1517 (minus:<VWIDE>
1518 (match_operand:<VWIDE> 1 "register_operand" "0")
1519 (mult:<VWIDE>
1520 (ANY_EXTEND:<VWIDE>
1521 (match_operand:VD_BHSI 2 "register_operand" "w"))
1522 (ANY_EXTEND:<VWIDE>
1523 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1524 "TARGET_SIMD"
1525 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1526 [(set_attr "type" "neon_mla_<Vetype>_long")]
1527 )
1528
1529 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1530 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1531 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1532 (match_operand:VQW 1 "register_operand" "w")
1533 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1534 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1535 (match_operand:VQW 2 "register_operand" "w")
1536 (match_dup 3)))))]
1537 "TARGET_SIMD"
1538 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1539 [(set_attr "type" "neon_mul_<Vetype>_long")]
1540 )
1541
1542 (define_expand "vec_widen_<su>mult_lo_<mode>"
1543 [(match_operand:<VWIDE> 0 "register_operand" "")
1544 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1545 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1546 "TARGET_SIMD"
1547 {
1548 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1549 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1550 operands[1],
1551 operands[2], p));
1552 DONE;
1553 }
1554 )
1555
1556 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1557 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1558 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1559 (match_operand:VQW 1 "register_operand" "w")
1560 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1561 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1562 (match_operand:VQW 2 "register_operand" "w")
1563 (match_dup 3)))))]
1564 "TARGET_SIMD"
1565 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1566 [(set_attr "type" "neon_mul_<Vetype>_long")]
1567 )
1568
1569 (define_expand "vec_widen_<su>mult_hi_<mode>"
1570 [(match_operand:<VWIDE> 0 "register_operand" "")
1571 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1572 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1573 "TARGET_SIMD"
1574 {
1575 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1576 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1577 operands[1],
1578 operands[2], p));
1579 DONE;
1580
1581 }
1582 )
1583
1584 ;; FP vector operations.
1585 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1586 ;; double-precision (64-bit) floating-point data types and arithmetic as
1587 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1588 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1589 ;;
1590 ;; Floating-point operations can raise an exception. Vectorizing such
1591 ;; operations are safe because of reasons explained below.
1592 ;;
1593 ;; ARMv8 permits an extension to enable trapped floating-point
1594 ;; exception handling, however this is an optional feature. In the
1595 ;; event of a floating-point exception being raised by vectorised
1596 ;; code then:
1597 ;; 1. If trapped floating-point exceptions are available, then a trap
1598 ;; will be taken when any lane raises an enabled exception. A trap
1599 ;; handler may determine which lane raised the exception.
1600 ;; 2. Alternatively a sticky exception flag is set in the
1601 ;; floating-point status register (FPSR). Software may explicitly
1602 ;; test the exception flags, in which case the tests will either
1603 ;; prevent vectorisation, allowing precise identification of the
1604 ;; failing operation, or if tested outside of vectorisable regions
1605 ;; then the specific operation and lane are not of interest.
1606
1607 ;; FP arithmetic operations.
1608
1609 (define_insn "add<mode>3"
1610 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1611 (plus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1612 (match_operand:VHSDF 2 "register_operand" "w")))]
1613 "TARGET_SIMD"
1614 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1615 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1616 )
1617
1618 (define_insn "sub<mode>3"
1619 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1620 (minus:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1621 (match_operand:VHSDF 2 "register_operand" "w")))]
1622 "TARGET_SIMD"
1623 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1624 [(set_attr "type" "neon_fp_addsub_<stype><q>")]
1625 )
1626
1627 (define_insn "mul<mode>3"
1628 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1629 (mult:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1630 (match_operand:VHSDF 2 "register_operand" "w")))]
1631 "TARGET_SIMD"
1632 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1633 [(set_attr "type" "neon_fp_mul_<stype><q>")]
1634 )
1635
1636 (define_expand "div<mode>3"
1637 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1638 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1639 (match_operand:VHSDF 2 "register_operand" "w")))]
1640 "TARGET_SIMD"
1641 {
1642 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
1643 DONE;
1644
1645 operands[1] = force_reg (<MODE>mode, operands[1]);
1646 })
1647
1648 (define_insn "*div<mode>3"
1649 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1650 (div:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1651 (match_operand:VHSDF 2 "register_operand" "w")))]
1652 "TARGET_SIMD"
1653 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1654 [(set_attr "type" "neon_fp_div_<stype><q>")]
1655 )
1656
1657 (define_insn "neg<mode>2"
1658 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1659 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1660 "TARGET_SIMD"
1661 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1662 [(set_attr "type" "neon_fp_neg_<stype><q>")]
1663 )
1664
1665 (define_insn "abs<mode>2"
1666 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1667 (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
1668 "TARGET_SIMD"
1669 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1670 [(set_attr "type" "neon_fp_abs_<stype><q>")]
1671 )
1672
1673 (define_insn "fma<mode>4"
1674 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1675 (fma:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
1676 (match_operand:VHSDF 2 "register_operand" "w")
1677 (match_operand:VHSDF 3 "register_operand" "0")))]
1678 "TARGET_SIMD"
1679 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1680 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1681 )
1682
1683 (define_insn "*aarch64_fma4_elt<mode>"
1684 [(set (match_operand:VDQF 0 "register_operand" "=w")
1685 (fma:VDQF
1686 (vec_duplicate:VDQF
1687 (vec_select:<VEL>
1688 (match_operand:VDQF 1 "register_operand" "<h_con>")
1689 (parallel [(match_operand:SI 2 "immediate_operand")])))
1690 (match_operand:VDQF 3 "register_operand" "w")
1691 (match_operand:VDQF 4 "register_operand" "0")))]
1692 "TARGET_SIMD"
1693 {
1694 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1695 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1696 }
1697 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1698 )
1699
1700 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1701 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1702 (fma:VDQSF
1703 (vec_duplicate:VDQSF
1704 (vec_select:<VEL>
1705 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1706 (parallel [(match_operand:SI 2 "immediate_operand")])))
1707 (match_operand:VDQSF 3 "register_operand" "w")
1708 (match_operand:VDQSF 4 "register_operand" "0")))]
1709 "TARGET_SIMD"
1710 {
1711 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1712 INTVAL (operands[2])));
1713 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1714 }
1715 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1716 )
1717
1718 (define_insn "*aarch64_fma4_elt_from_dup<mode>"
1719 [(set (match_operand:VMUL 0 "register_operand" "=w")
1720 (fma:VMUL
1721 (vec_duplicate:VMUL
1722 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1723 (match_operand:VMUL 2 "register_operand" "w")
1724 (match_operand:VMUL 3 "register_operand" "0")))]
1725 "TARGET_SIMD"
1726 "fmla\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1727 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1728 )
1729
1730 (define_insn "*aarch64_fma4_elt_to_64v2df"
1731 [(set (match_operand:DF 0 "register_operand" "=w")
1732 (fma:DF
1733 (vec_select:DF
1734 (match_operand:V2DF 1 "register_operand" "w")
1735 (parallel [(match_operand:SI 2 "immediate_operand")]))
1736 (match_operand:DF 3 "register_operand" "w")
1737 (match_operand:DF 4 "register_operand" "0")))]
1738 "TARGET_SIMD"
1739 {
1740 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1741 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1742 }
1743 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1744 )
1745
1746 (define_insn "fnma<mode>4"
1747 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1748 (fma:VHSDF
1749 (match_operand:VHSDF 1 "register_operand" "w")
1750 (neg:VHSDF
1751 (match_operand:VHSDF 2 "register_operand" "w"))
1752 (match_operand:VHSDF 3 "register_operand" "0")))]
1753 "TARGET_SIMD"
1754 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1755 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1756 )
1757
1758 (define_insn "*aarch64_fnma4_elt<mode>"
1759 [(set (match_operand:VDQF 0 "register_operand" "=w")
1760 (fma:VDQF
1761 (neg:VDQF
1762 (match_operand:VDQF 3 "register_operand" "w"))
1763 (vec_duplicate:VDQF
1764 (vec_select:<VEL>
1765 (match_operand:VDQF 1 "register_operand" "<h_con>")
1766 (parallel [(match_operand:SI 2 "immediate_operand")])))
1767 (match_operand:VDQF 4 "register_operand" "0")))]
1768 "TARGET_SIMD"
1769 {
1770 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1771 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1772 }
1773 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1774 )
1775
1776 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1777 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1778 (fma:VDQSF
1779 (neg:VDQSF
1780 (match_operand:VDQSF 3 "register_operand" "w"))
1781 (vec_duplicate:VDQSF
1782 (vec_select:<VEL>
1783 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1784 (parallel [(match_operand:SI 2 "immediate_operand")])))
1785 (match_operand:VDQSF 4 "register_operand" "0")))]
1786 "TARGET_SIMD"
1787 {
1788 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1789 INTVAL (operands[2])));
1790 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1791 }
1792 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1793 )
1794
1795 (define_insn "*aarch64_fnma4_elt_from_dup<mode>"
1796 [(set (match_operand:VMUL 0 "register_operand" "=w")
1797 (fma:VMUL
1798 (neg:VMUL
1799 (match_operand:VMUL 2 "register_operand" "w"))
1800 (vec_duplicate:VMUL
1801 (match_operand:<VEL> 1 "register_operand" "<h_con>"))
1802 (match_operand:VMUL 3 "register_operand" "0")))]
1803 "TARGET_SIMD"
1804 "fmls\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"
1805 [(set_attr "type" "neon<fp>_mla_<stype>_scalar<q>")]
1806 )
1807
1808 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1809 [(set (match_operand:DF 0 "register_operand" "=w")
1810 (fma:DF
1811 (vec_select:DF
1812 (match_operand:V2DF 1 "register_operand" "w")
1813 (parallel [(match_operand:SI 2 "immediate_operand")]))
1814 (neg:DF
1815 (match_operand:DF 3 "register_operand" "w"))
1816 (match_operand:DF 4 "register_operand" "0")))]
1817 "TARGET_SIMD"
1818 {
1819 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1820 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1821 }
1822 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1823 )
1824
1825 ;; Vector versions of the floating-point frint patterns.
1826 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1827 (define_insn "<frint_pattern><mode>2"
1828 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1829 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
1830 FRINT))]
1831 "TARGET_SIMD"
1832 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1833 [(set_attr "type" "neon_fp_round_<stype><q>")]
1834 )
1835
1836 ;; Vector versions of the fcvt standard patterns.
1837 ;; Expands to lbtrunc, lround, lceil, lfloor
1838 (define_insn "l<fcvt_pattern><su_optab><VHSDF:mode><fcvt_target>2"
1839 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1840 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1841 [(match_operand:VHSDF 1 "register_operand" "w")]
1842 FCVT)))]
1843 "TARGET_SIMD"
1844 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1845 [(set_attr "type" "neon_fp_to_int_<stype><q>")]
1846 )
1847
1848 ;; HF Scalar variants of related SIMD instructions.
1849 (define_insn "l<fcvt_pattern><su_optab>hfhi2"
1850 [(set (match_operand:HI 0 "register_operand" "=w")
1851 (FIXUORS:HI (unspec:HF [(match_operand:HF 1 "register_operand" "w")]
1852 FCVT)))]
1853 "TARGET_SIMD_F16INST"
1854 "fcvt<frint_suffix><su>\t%h0, %h1"
1855 [(set_attr "type" "neon_fp_to_int_s")]
1856 )
1857
1858 (define_insn "<optab>_trunchfhi2"
1859 [(set (match_operand:HI 0 "register_operand" "=w")
1860 (FIXUORS:HI (match_operand:HF 1 "register_operand" "w")))]
1861 "TARGET_SIMD_F16INST"
1862 "fcvtz<su>\t%h0, %h1"
1863 [(set_attr "type" "neon_fp_to_int_s")]
1864 )
1865
1866 (define_insn "<optab>hihf2"
1867 [(set (match_operand:HF 0 "register_operand" "=w")
1868 (FLOATUORS:HF (match_operand:HI 1 "register_operand" "w")))]
1869 "TARGET_SIMD_F16INST"
1870 "<su_optab>cvtf\t%h0, %h1"
1871 [(set_attr "type" "neon_int_to_fp_s")]
1872 )
1873
1874 (define_insn "*aarch64_fcvt<su_optab><VDQF:mode><fcvt_target>2_mult"
1875 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1876 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1877 [(mult:VDQF
1878 (match_operand:VDQF 1 "register_operand" "w")
1879 (match_operand:VDQF 2 "aarch64_fp_vec_pow2" ""))]
1880 UNSPEC_FRINTZ)))]
1881 "TARGET_SIMD
1882 && IN_RANGE (aarch64_vec_fpconst_pow_of_2 (operands[2]), 1,
1883 GET_MODE_BITSIZE (GET_MODE_INNER (<VDQF:MODE>mode)))"
1884 {
1885 int fbits = aarch64_vec_fpconst_pow_of_2 (operands[2]);
1886 char buf[64];
1887 snprintf (buf, 64, "fcvtz<su>\\t%%0.<Vtype>, %%1.<Vtype>, #%d", fbits);
1888 output_asm_insn (buf, operands);
1889 return "";
1890 }
1891 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1892 )
1893
1894 (define_expand "<optab><VHSDF:mode><fcvt_target>2"
1895 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1896 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1897 [(match_operand:VHSDF 1 "register_operand")]
1898 UNSPEC_FRINTZ)))]
1899 "TARGET_SIMD"
1900 {})
1901
1902 (define_expand "<fix_trunc_optab><VHSDF:mode><fcvt_target>2"
1903 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1904 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1905 [(match_operand:VHSDF 1 "register_operand")]
1906 UNSPEC_FRINTZ)))]
1907 "TARGET_SIMD"
1908 {})
1909
1910 (define_expand "ftrunc<VHSDF:mode>2"
1911 [(set (match_operand:VHSDF 0 "register_operand")
1912 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
1913 UNSPEC_FRINTZ))]
1914 "TARGET_SIMD"
1915 {})
1916
1917 (define_insn "<optab><fcvt_target><VHSDF:mode>2"
1918 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1919 (FLOATUORS:VHSDF
1920 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1921 "TARGET_SIMD"
1922 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1923 [(set_attr "type" "neon_int_to_fp_<stype><q>")]
1924 )
1925
1926 ;; Conversions between vectors of floats and doubles.
1927 ;; Contains a mix of patterns to match standard pattern names
1928 ;; and those for intrinsics.
1929
1930 ;; Float widening operations.
1931
1932 (define_insn "aarch64_simd_vec_unpacks_lo_<mode>"
1933 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1934 (float_extend:<VWIDE> (vec_select:<VHALF>
1935 (match_operand:VQ_HSF 1 "register_operand" "w")
1936 (match_operand:VQ_HSF 2 "vect_par_cnst_lo_half" "")
1937 )))]
1938 "TARGET_SIMD"
1939 "fcvtl\\t%0.<Vwtype>, %1.<Vhalftype>"
1940 [(set_attr "type" "neon_fp_cvt_widen_s")]
1941 )
1942
1943 ;; Convert between fixed-point and floating-point (vector modes)
1944
1945 (define_insn "<FCVT_F2FIXED:fcvt_fixed_insn><VHSDF:mode>3"
1946 [(set (match_operand:<VHSDF:FCVT_TARGET> 0 "register_operand" "=w")
1947 (unspec:<VHSDF:FCVT_TARGET>
1948 [(match_operand:VHSDF 1 "register_operand" "w")
1949 (match_operand:SI 2 "immediate_operand" "i")]
1950 FCVT_F2FIXED))]
1951 "TARGET_SIMD"
1952 "<FCVT_F2FIXED:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1953 [(set_attr "type" "neon_fp_to_int_<VHSDF:stype><q>")]
1954 )
1955
1956 (define_insn "<FCVT_FIXED2F:fcvt_fixed_insn><VDQ_HSDI:mode>3"
1957 [(set (match_operand:<VDQ_HSDI:FCVT_TARGET> 0 "register_operand" "=w")
1958 (unspec:<VDQ_HSDI:FCVT_TARGET>
1959 [(match_operand:VDQ_HSDI 1 "register_operand" "w")
1960 (match_operand:SI 2 "immediate_operand" "i")]
1961 FCVT_FIXED2F))]
1962 "TARGET_SIMD"
1963 "<FCVT_FIXED2F:fcvt_fixed_insn>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #%2"
1964 [(set_attr "type" "neon_int_to_fp_<VDQ_HSDI:stype><q>")]
1965 )
1966
1967 ;; ??? Note that the vectorizer usage of the vec_unpacks_[lo/hi] patterns
1968 ;; is inconsistent with vector ordering elsewhere in the compiler, in that
1969 ;; the meaning of HI and LO changes depending on the target endianness.
1970 ;; While elsewhere we map the higher numbered elements of a vector to
1971 ;; the lower architectural lanes of the vector, for these patterns we want
1972 ;; to always treat "hi" as referring to the higher architectural lanes.
1973 ;; Consequently, while the patterns below look inconsistent with our
1974 ;; other big-endian patterns their behavior is as required.
1975
1976 (define_expand "vec_unpacks_lo_<mode>"
1977 [(match_operand:<VWIDE> 0 "register_operand" "")
1978 (match_operand:VQ_HSF 1 "register_operand" "")]
1979 "TARGET_SIMD"
1980 {
1981 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1982 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
1983 operands[1], p));
1984 DONE;
1985 }
1986 )
1987
1988 (define_insn "aarch64_simd_vec_unpacks_hi_<mode>"
1989 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1990 (float_extend:<VWIDE> (vec_select:<VHALF>
1991 (match_operand:VQ_HSF 1 "register_operand" "w")
1992 (match_operand:VQ_HSF 2 "vect_par_cnst_hi_half" "")
1993 )))]
1994 "TARGET_SIMD"
1995 "fcvtl2\\t%0.<Vwtype>, %1.<Vtype>"
1996 [(set_attr "type" "neon_fp_cvt_widen_s")]
1997 )
1998
1999 (define_expand "vec_unpacks_hi_<mode>"
2000 [(match_operand:<VWIDE> 0 "register_operand" "")
2001 (match_operand:VQ_HSF 1 "register_operand" "")]
2002 "TARGET_SIMD"
2003 {
2004 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2005 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2006 operands[1], p));
2007 DONE;
2008 }
2009 )
2010 (define_insn "aarch64_float_extend_lo_<Vwide>"
2011 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2012 (float_extend:<VWIDE>
2013 (match_operand:VDF 1 "register_operand" "w")))]
2014 "TARGET_SIMD"
2015 "fcvtl\\t%0<Vmwtype>, %1<Vmtype>"
2016 [(set_attr "type" "neon_fp_cvt_widen_s")]
2017 )
2018
2019 ;; Float narrowing operations.
2020
2021 (define_insn "aarch64_float_truncate_lo_<mode>"
2022 [(set (match_operand:VDF 0 "register_operand" "=w")
2023 (float_truncate:VDF
2024 (match_operand:<VWIDE> 1 "register_operand" "w")))]
2025 "TARGET_SIMD"
2026 "fcvtn\\t%0.<Vtype>, %1<Vmwtype>"
2027 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2028 )
2029
2030 (define_insn "aarch64_float_truncate_hi_<Vdbl>_le"
2031 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2032 (vec_concat:<VDBL>
2033 (match_operand:VDF 1 "register_operand" "0")
2034 (float_truncate:VDF
2035 (match_operand:<VWIDE> 2 "register_operand" "w"))))]
2036 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2037 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2038 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2039 )
2040
2041 (define_insn "aarch64_float_truncate_hi_<Vdbl>_be"
2042 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
2043 (vec_concat:<VDBL>
2044 (float_truncate:VDF
2045 (match_operand:<VWIDE> 2 "register_operand" "w"))
2046 (match_operand:VDF 1 "register_operand" "0")))]
2047 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2048 "fcvtn2\\t%0.<Vdtype>, %2<Vmwtype>"
2049 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
2050 )
2051
2052 (define_expand "aarch64_float_truncate_hi_<Vdbl>"
2053 [(match_operand:<VDBL> 0 "register_operand" "=w")
2054 (match_operand:VDF 1 "register_operand" "0")
2055 (match_operand:<VWIDE> 2 "register_operand" "w")]
2056 "TARGET_SIMD"
2057 {
2058 rtx (*gen) (rtx, rtx, rtx) = BYTES_BIG_ENDIAN
2059 ? gen_aarch64_float_truncate_hi_<Vdbl>_be
2060 : gen_aarch64_float_truncate_hi_<Vdbl>_le;
2061 emit_insn (gen (operands[0], operands[1], operands[2]));
2062 DONE;
2063 }
2064 )
2065
2066 (define_expand "vec_pack_trunc_v2df"
2067 [(set (match_operand:V4SF 0 "register_operand")
2068 (vec_concat:V4SF
2069 (float_truncate:V2SF
2070 (match_operand:V2DF 1 "register_operand"))
2071 (float_truncate:V2SF
2072 (match_operand:V2DF 2 "register_operand"))
2073 ))]
2074 "TARGET_SIMD"
2075 {
2076 rtx tmp = gen_reg_rtx (V2SFmode);
2077 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2078 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2079
2080 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
2081 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
2082 tmp, operands[hi]));
2083 DONE;
2084 }
2085 )
2086
2087 (define_expand "vec_pack_trunc_df"
2088 [(set (match_operand:V2SF 0 "register_operand")
2089 (vec_concat:V2SF
2090 (float_truncate:SF
2091 (match_operand:DF 1 "register_operand"))
2092 (float_truncate:SF
2093 (match_operand:DF 2 "register_operand"))
2094 ))]
2095 "TARGET_SIMD"
2096 {
2097 rtx tmp = gen_reg_rtx (V2SFmode);
2098 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
2099 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
2100
2101 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
2102 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
2103 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
2104 DONE;
2105 }
2106 )
2107
2108 ;; FP Max/Min
2109 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2110 ;; expression like:
2111 ;; a = (b < c) ? b : c;
2112 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
2113 ;; either explicitly or indirectly via -ffast-math.
2114 ;;
2115 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2116 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2117 ;; operand will be returned when both operands are zero (i.e. they may not
2118 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2119 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
2120 ;; NaNs.
2121
2122 (define_insn "<su><maxmin><mode>3"
2123 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2124 (FMAXMIN:VHSDF (match_operand:VHSDF 1 "register_operand" "w")
2125 (match_operand:VHSDF 2 "register_operand" "w")))]
2126 "TARGET_SIMD"
2127 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2128 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2129 )
2130
2131 ;; Vector forms for fmax, fmin, fmaxnm, fminnm.
2132 ;; fmaxnm and fminnm are used for the fmax<mode>3 standard pattern names,
2133 ;; which implement the IEEE fmax ()/fmin () functions.
2134 (define_insn "<maxmin_uns><mode>3"
2135 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2136 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2137 (match_operand:VHSDF 2 "register_operand" "w")]
2138 FMAXMIN_UNS))]
2139 "TARGET_SIMD"
2140 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2141 [(set_attr "type" "neon_fp_minmax_<stype><q>")]
2142 )
2143
2144 ;; 'across lanes' add.
2145
2146 (define_expand "reduc_plus_scal_<mode>"
2147 [(match_operand:<VEL> 0 "register_operand" "=w")
2148 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2149 UNSPEC_ADDV)]
2150 "TARGET_SIMD"
2151 {
2152 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2153 rtx scratch = gen_reg_rtx (<MODE>mode);
2154 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2155 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2156 DONE;
2157 }
2158 )
2159
2160 (define_insn "aarch64_faddp<mode>"
2161 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2162 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
2163 (match_operand:VHSDF 2 "register_operand" "w")]
2164 UNSPEC_FADDV))]
2165 "TARGET_SIMD"
2166 "faddp\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2167 [(set_attr "type" "neon_fp_reduc_add_<stype><q>")]
2168 )
2169
2170 (define_insn "aarch64_reduc_plus_internal<mode>"
2171 [(set (match_operand:VDQV 0 "register_operand" "=w")
2172 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
2173 UNSPEC_ADDV))]
2174 "TARGET_SIMD"
2175 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
2176 [(set_attr "type" "neon_reduc_add<q>")]
2177 )
2178
2179 (define_insn "aarch64_reduc_plus_internalv2si"
2180 [(set (match_operand:V2SI 0 "register_operand" "=w")
2181 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2182 UNSPEC_ADDV))]
2183 "TARGET_SIMD"
2184 "addp\\t%0.2s, %1.2s, %1.2s"
2185 [(set_attr "type" "neon_reduc_add")]
2186 )
2187
2188 (define_insn "reduc_plus_scal_<mode>"
2189 [(set (match_operand:<VEL> 0 "register_operand" "=w")
2190 (unspec:<VEL> [(match_operand:V2F 1 "register_operand" "w")]
2191 UNSPEC_FADDV))]
2192 "TARGET_SIMD"
2193 "faddp\\t%<Vetype>0, %1.<Vtype>"
2194 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
2195 )
2196
2197 (define_expand "reduc_plus_scal_v4sf"
2198 [(set (match_operand:SF 0 "register_operand")
2199 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2200 UNSPEC_FADDV))]
2201 "TARGET_SIMD"
2202 {
2203 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
2204 rtx scratch = gen_reg_rtx (V4SFmode);
2205 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2206 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2207 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2208 DONE;
2209 })
2210
2211 (define_insn "clrsb<mode>2"
2212 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2213 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2214 "TARGET_SIMD"
2215 "cls\\t%0.<Vtype>, %1.<Vtype>"
2216 [(set_attr "type" "neon_cls<q>")]
2217 )
2218
2219 (define_insn "clz<mode>2"
2220 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2221 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
2222 "TARGET_SIMD"
2223 "clz\\t%0.<Vtype>, %1.<Vtype>"
2224 [(set_attr "type" "neon_cls<q>")]
2225 )
2226
2227 (define_insn "popcount<mode>2"
2228 [(set (match_operand:VB 0 "register_operand" "=w")
2229 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
2230 "TARGET_SIMD"
2231 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
2232 [(set_attr "type" "neon_cnt<q>")]
2233 )
2234
2235 ;; 'across lanes' max and min ops.
2236
2237 ;; Template for outputting a scalar, so we can create __builtins which can be
2238 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
2239 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2240 [(match_operand:<VEL> 0 "register_operand")
2241 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2242 FMAXMINV)]
2243 "TARGET_SIMD"
2244 {
2245 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2246 rtx scratch = gen_reg_rtx (<MODE>mode);
2247 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2248 operands[1]));
2249 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2250 DONE;
2251 }
2252 )
2253
2254 ;; Likewise for integer cases, signed and unsigned.
2255 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2256 [(match_operand:<VEL> 0 "register_operand")
2257 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2258 MAXMINV)]
2259 "TARGET_SIMD"
2260 {
2261 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
2262 rtx scratch = gen_reg_rtx (<MODE>mode);
2263 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2264 operands[1]));
2265 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2266 DONE;
2267 }
2268 )
2269
2270 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2271 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
2272 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
2273 MAXMINV))]
2274 "TARGET_SIMD"
2275 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
2276 [(set_attr "type" "neon_reduc_minmax<q>")]
2277 )
2278
2279 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
2280 [(set (match_operand:V2SI 0 "register_operand" "=w")
2281 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
2282 MAXMINV))]
2283 "TARGET_SIMD"
2284 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
2285 [(set_attr "type" "neon_reduc_minmax")]
2286 )
2287
2288 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
2289 [(set (match_operand:VHSDF 0 "register_operand" "=w")
2290 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
2291 FMAXMINV))]
2292 "TARGET_SIMD"
2293 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
2294 [(set_attr "type" "neon_fp_reduc_minmax_<stype><q>")]
2295 )
2296
2297 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
2298 ;; allocation.
2299 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
2300 ;; to select.
2301 ;;
2302 ;; Thus our BSL is of the form:
2303 ;; op0 = bsl (mask, op2, op3)
2304 ;; We can use any of:
2305 ;;
2306 ;; if (op0 = mask)
2307 ;; bsl mask, op1, op2
2308 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
2309 ;; bit op0, op2, mask
2310 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
2311 ;; bif op0, op1, mask
2312 ;;
2313 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2314 ;; Some forms of straight-line code may generate the equivalent form
2315 ;; in *aarch64_simd_bsl<mode>_alt.
2316
2317 (define_insn "aarch64_simd_bsl<mode>_internal"
2318 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2319 (xor:VSDQ_I_DI
2320 (and:VSDQ_I_DI
2321 (xor:VSDQ_I_DI
2322 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2323 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0"))
2324 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2325 (match_dup:<V_INT_EQUIV> 3)
2326 ))]
2327 "TARGET_SIMD"
2328 "@
2329 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2330 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2331 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2332 [(set_attr "type" "neon_bsl<q>")]
2333 )
2334
2335 ;; We need this form in addition to the above pattern to match the case
2336 ;; when combine tries merging three insns such that the second operand of
2337 ;; the outer XOR matches the second operand of the inner XOR rather than
2338 ;; the first. The two are equivalent but since recog doesn't try all
2339 ;; permutations of commutative operations, we have to have a separate pattern.
2340
2341 (define_insn "*aarch64_simd_bsl<mode>_alt"
2342 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2343 (xor:VSDQ_I_DI
2344 (and:VSDQ_I_DI
2345 (xor:VSDQ_I_DI
2346 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0")
2347 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w"))
2348 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w"))
2349 (match_dup:VSDQ_I_DI 2)))]
2350 "TARGET_SIMD"
2351 "@
2352 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2353 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2354 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2355 [(set_attr "type" "neon_bsl<q>")]
2356 )
2357
2358 (define_expand "aarch64_simd_bsl<mode>"
2359 [(match_operand:VALLDIF 0 "register_operand")
2360 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2361 (match_operand:VALLDIF 2 "register_operand")
2362 (match_operand:VALLDIF 3 "register_operand")]
2363 "TARGET_SIMD"
2364 {
2365 /* We can't alias operands together if they have different modes. */
2366 rtx tmp = operands[0];
2367 if (FLOAT_MODE_P (<MODE>mode))
2368 {
2369 operands[2] = gen_lowpart (<V_INT_EQUIV>mode, operands[2]);
2370 operands[3] = gen_lowpart (<V_INT_EQUIV>mode, operands[3]);
2371 tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2372 }
2373 operands[1] = gen_lowpart (<V_INT_EQUIV>mode, operands[1]);
2374 emit_insn (gen_aarch64_simd_bsl<v_int_equiv>_internal (tmp,
2375 operands[1],
2376 operands[2],
2377 operands[3]));
2378 if (tmp != operands[0])
2379 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2380
2381 DONE;
2382 })
2383
2384 (define_expand "vcond_mask_<mode><v_int_equiv>"
2385 [(match_operand:VALLDI 0 "register_operand")
2386 (match_operand:VALLDI 1 "nonmemory_operand")
2387 (match_operand:VALLDI 2 "nonmemory_operand")
2388 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
2389 "TARGET_SIMD"
2390 {
2391 /* If we have (a = (P) ? -1 : 0);
2392 Then we can simply move the generated mask (result must be int). */
2393 if (operands[1] == CONSTM1_RTX (<MODE>mode)
2394 && operands[2] == CONST0_RTX (<MODE>mode))
2395 emit_move_insn (operands[0], operands[3]);
2396 /* Similarly, (a = (P) ? 0 : -1) is just inverting the generated mask. */
2397 else if (operands[1] == CONST0_RTX (<MODE>mode)
2398 && operands[2] == CONSTM1_RTX (<MODE>mode))
2399 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[3]));
2400 else
2401 {
2402 if (!REG_P (operands[1]))
2403 operands[1] = force_reg (<MODE>mode, operands[1]);
2404 if (!REG_P (operands[2]))
2405 operands[2] = force_reg (<MODE>mode, operands[2]);
2406 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], operands[3],
2407 operands[1], operands[2]));
2408 }
2409
2410 DONE;
2411 })
2412
2413 ;; Patterns comparing two vectors to produce a mask.
2414
2415 (define_expand "vec_cmp<mode><mode>"
2416 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2417 (match_operator 1 "comparison_operator"
2418 [(match_operand:VSDQ_I_DI 2 "register_operand")
2419 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2420 "TARGET_SIMD"
2421 {
2422 rtx mask = operands[0];
2423 enum rtx_code code = GET_CODE (operands[1]);
2424
2425 switch (code)
2426 {
2427 case NE:
2428 case LE:
2429 case LT:
2430 case GE:
2431 case GT:
2432 case EQ:
2433 if (operands[3] == CONST0_RTX (<MODE>mode))
2434 break;
2435
2436 /* Fall through. */
2437 default:
2438 if (!REG_P (operands[3]))
2439 operands[3] = force_reg (<MODE>mode, operands[3]);
2440
2441 break;
2442 }
2443
2444 switch (code)
2445 {
2446 case LT:
2447 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[2], operands[3]));
2448 break;
2449
2450 case GE:
2451 emit_insn (gen_aarch64_cmge<mode> (mask, operands[2], operands[3]));
2452 break;
2453
2454 case LE:
2455 emit_insn (gen_aarch64_cmle<mode> (mask, operands[2], operands[3]));
2456 break;
2457
2458 case GT:
2459 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[2], operands[3]));
2460 break;
2461
2462 case LTU:
2463 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[3], operands[2]));
2464 break;
2465
2466 case GEU:
2467 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[2], operands[3]));
2468 break;
2469
2470 case LEU:
2471 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[3], operands[2]));
2472 break;
2473
2474 case GTU:
2475 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[2], operands[3]));
2476 break;
2477
2478 case NE:
2479 /* Handle NE as !EQ. */
2480 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2481 emit_insn (gen_one_cmpl<v_int_equiv>2 (mask, mask));
2482 break;
2483
2484 case EQ:
2485 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[2], operands[3]));
2486 break;
2487
2488 default:
2489 gcc_unreachable ();
2490 }
2491
2492 DONE;
2493 })
2494
2495 (define_expand "vec_cmp<mode><v_int_equiv>"
2496 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
2497 (match_operator 1 "comparison_operator"
2498 [(match_operand:VDQF 2 "register_operand")
2499 (match_operand:VDQF 3 "nonmemory_operand")]))]
2500 "TARGET_SIMD"
2501 {
2502 int use_zero_form = 0;
2503 enum rtx_code code = GET_CODE (operands[1]);
2504 rtx tmp = gen_reg_rtx (<V_INT_EQUIV>mode);
2505
2506 rtx (*comparison) (rtx, rtx, rtx) = NULL;
2507
2508 switch (code)
2509 {
2510 case LE:
2511 case LT:
2512 case GE:
2513 case GT:
2514 case EQ:
2515 if (operands[3] == CONST0_RTX (<MODE>mode))
2516 {
2517 use_zero_form = 1;
2518 break;
2519 }
2520 /* Fall through. */
2521 default:
2522 if (!REG_P (operands[3]))
2523 operands[3] = force_reg (<MODE>mode, operands[3]);
2524
2525 break;
2526 }
2527
2528 switch (code)
2529 {
2530 case LT:
2531 if (use_zero_form)
2532 {
2533 comparison = gen_aarch64_cmlt<mode>;
2534 break;
2535 }
2536 /* Fall through. */
2537 case UNGE:
2538 std::swap (operands[2], operands[3]);
2539 /* Fall through. */
2540 case UNLE:
2541 case GT:
2542 comparison = gen_aarch64_cmgt<mode>;
2543 break;
2544 case LE:
2545 if (use_zero_form)
2546 {
2547 comparison = gen_aarch64_cmle<mode>;
2548 break;
2549 }
2550 /* Fall through. */
2551 case UNGT:
2552 std::swap (operands[2], operands[3]);
2553 /* Fall through. */
2554 case UNLT:
2555 case GE:
2556 comparison = gen_aarch64_cmge<mode>;
2557 break;
2558 case NE:
2559 case EQ:
2560 comparison = gen_aarch64_cmeq<mode>;
2561 break;
2562 case UNEQ:
2563 case ORDERED:
2564 case UNORDERED:
2565 break;
2566 default:
2567 gcc_unreachable ();
2568 }
2569
2570 switch (code)
2571 {
2572 case UNGE:
2573 case UNGT:
2574 case UNLE:
2575 case UNLT:
2576 case NE:
2577 /* FCM returns false for lanes which are unordered, so if we use
2578 the inverse of the comparison we actually want to emit, then
2579 invert the result, we will end up with the correct result.
2580 Note that a NE NaN and NaN NE b are true for all a, b.
2581
2582 Our transformations are:
2583 a UNGE b -> !(b GT a)
2584 a UNGT b -> !(b GE a)
2585 a UNLE b -> !(a GT b)
2586 a UNLT b -> !(a GE b)
2587 a NE b -> !(a EQ b) */
2588 gcc_assert (comparison != NULL);
2589 emit_insn (comparison (operands[0], operands[2], operands[3]));
2590 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2591 break;
2592
2593 case LT:
2594 case LE:
2595 case GT:
2596 case GE:
2597 case EQ:
2598 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2599 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2600 a GE b -> a GE b
2601 a GT b -> a GT b
2602 a LE b -> b GE a
2603 a LT b -> b GT a
2604 a EQ b -> a EQ b */
2605 gcc_assert (comparison != NULL);
2606 emit_insn (comparison (operands[0], operands[2], operands[3]));
2607 break;
2608
2609 case UNEQ:
2610 /* We first check (a > b || b > a) which is !UNEQ, inverting
2611 this result will then give us (a == b || a UNORDERED b). */
2612 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2613 operands[2], operands[3]));
2614 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2615 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2616 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2617 break;
2618
2619 case UNORDERED:
2620 /* Operands are ORDERED iff (a > b || b >= a), so we can compute
2621 UNORDERED as !ORDERED. */
2622 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2623 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2624 operands[3], operands[2]));
2625 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2626 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2627 break;
2628
2629 case ORDERED:
2630 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3]));
2631 emit_insn (gen_aarch64_cmge<mode> (operands[0],
2632 operands[3], operands[2]));
2633 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2634 break;
2635
2636 default:
2637 gcc_unreachable ();
2638 }
2639
2640 DONE;
2641 })
2642
2643 (define_expand "vec_cmpu<mode><mode>"
2644 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2645 (match_operator 1 "comparison_operator"
2646 [(match_operand:VSDQ_I_DI 2 "register_operand")
2647 (match_operand:VSDQ_I_DI 3 "nonmemory_operand")]))]
2648 "TARGET_SIMD"
2649 {
2650 emit_insn (gen_vec_cmp<mode><mode> (operands[0], operands[1],
2651 operands[2], operands[3]));
2652 DONE;
2653 })
2654
2655 (define_expand "vcond<mode><mode>"
2656 [(set (match_operand:VALLDI 0 "register_operand")
2657 (if_then_else:VALLDI
2658 (match_operator 3 "comparison_operator"
2659 [(match_operand:VALLDI 4 "register_operand")
2660 (match_operand:VALLDI 5 "nonmemory_operand")])
2661 (match_operand:VALLDI 1 "nonmemory_operand")
2662 (match_operand:VALLDI 2 "nonmemory_operand")))]
2663 "TARGET_SIMD"
2664 {
2665 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2666 enum rtx_code code = GET_CODE (operands[3]);
2667
2668 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2669 it as well as switch operands 1/2 in order to avoid the additional
2670 NOT instruction. */
2671 if (code == NE)
2672 {
2673 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2674 operands[4], operands[5]);
2675 std::swap (operands[1], operands[2]);
2676 }
2677 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2678 operands[4], operands[5]));
2679 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2680 operands[2], mask));
2681
2682 DONE;
2683 })
2684
2685 (define_expand "vcond<v_cmp_mixed><mode>"
2686 [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
2687 (if_then_else:<V_cmp_mixed>
2688 (match_operator 3 "comparison_operator"
2689 [(match_operand:VDQF_COND 4 "register_operand")
2690 (match_operand:VDQF_COND 5 "nonmemory_operand")])
2691 (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
2692 (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
2693 "TARGET_SIMD"
2694 {
2695 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2696 enum rtx_code code = GET_CODE (operands[3]);
2697
2698 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2699 it as well as switch operands 1/2 in order to avoid the additional
2700 NOT instruction. */
2701 if (code == NE)
2702 {
2703 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2704 operands[4], operands[5]);
2705 std::swap (operands[1], operands[2]);
2706 }
2707 emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
2708 operands[4], operands[5]));
2709 emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
2710 operands[0], operands[1],
2711 operands[2], mask));
2712
2713 DONE;
2714 })
2715
2716 (define_expand "vcondu<mode><mode>"
2717 [(set (match_operand:VSDQ_I_DI 0 "register_operand")
2718 (if_then_else:VSDQ_I_DI
2719 (match_operator 3 "comparison_operator"
2720 [(match_operand:VSDQ_I_DI 4 "register_operand")
2721 (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
2722 (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
2723 (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
2724 "TARGET_SIMD"
2725 {
2726 rtx mask = gen_reg_rtx (<MODE>mode);
2727 enum rtx_code code = GET_CODE (operands[3]);
2728
2729 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2730 it as well as switch operands 1/2 in order to avoid the additional
2731 NOT instruction. */
2732 if (code == NE)
2733 {
2734 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2735 operands[4], operands[5]);
2736 std::swap (operands[1], operands[2]);
2737 }
2738 emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
2739 operands[4], operands[5]));
2740 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2741 operands[2], mask));
2742 DONE;
2743 })
2744
2745 (define_expand "vcondu<mode><v_cmp_mixed>"
2746 [(set (match_operand:VDQF 0 "register_operand")
2747 (if_then_else:VDQF
2748 (match_operator 3 "comparison_operator"
2749 [(match_operand:<V_cmp_mixed> 4 "register_operand")
2750 (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
2751 (match_operand:VDQF 1 "nonmemory_operand")
2752 (match_operand:VDQF 2 "nonmemory_operand")))]
2753 "TARGET_SIMD"
2754 {
2755 rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
2756 enum rtx_code code = GET_CODE (operands[3]);
2757
2758 /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
2759 it as well as switch operands 1/2 in order to avoid the additional
2760 NOT instruction. */
2761 if (code == NE)
2762 {
2763 operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
2764 operands[4], operands[5]);
2765 std::swap (operands[1], operands[2]);
2766 }
2767 emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
2768 mask, operands[3],
2769 operands[4], operands[5]));
2770 emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
2771 operands[2], mask));
2772 DONE;
2773 })
2774
2775 ;; Patterns for AArch64 SIMD Intrinsics.
2776
2777 ;; Lane extraction with sign extension to general purpose register.
2778 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2779 [(set (match_operand:GPI 0 "register_operand" "=r")
2780 (sign_extend:GPI
2781 (vec_select:<VEL>
2782 (match_operand:VDQQH 1 "register_operand" "w")
2783 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2784 "TARGET_SIMD"
2785 {
2786 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2787 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2788 }
2789 [(set_attr "type" "neon_to_gp<q>")]
2790 )
2791
2792 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2793 [(set (match_operand:SI 0 "register_operand" "=r")
2794 (zero_extend:SI
2795 (vec_select:<VEL>
2796 (match_operand:VDQQH 1 "register_operand" "w")
2797 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2798 "TARGET_SIMD"
2799 {
2800 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2801 return "umov\\t%w0, %1.<Vetype>[%2]";
2802 }
2803 [(set_attr "type" "neon_to_gp<q>")]
2804 )
2805
2806 ;; Lane extraction of a value, neither sign nor zero extension
2807 ;; is guaranteed so upper bits should be considered undefined.
2808 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2809 (define_insn "aarch64_get_lane<mode>"
2810 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2811 (vec_select:<VEL>
2812 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2813 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2814 "TARGET_SIMD"
2815 {
2816 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2817 switch (which_alternative)
2818 {
2819 case 0:
2820 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2821 case 1:
2822 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2823 case 2:
2824 return "st1\\t{%1.<Vetype>}[%2], %0";
2825 default:
2826 gcc_unreachable ();
2827 }
2828 }
2829 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2830 )
2831
2832 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2833 ;; dest vector.
2834
2835 (define_insn "*aarch64_combinez<mode>"
2836 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2837 (vec_concat:<VDBL>
2838 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")
2839 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))]
2840 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2841 "@
2842 mov\\t%0.8b, %1.8b
2843 fmov\t%d0, %1
2844 ldr\\t%d0, %1"
2845 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2846 (set_attr "simd" "yes,*,yes")
2847 (set_attr "fp" "*,yes,*")]
2848 )
2849
2850 (define_insn "*aarch64_combinez_be<mode>"
2851 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2852 (vec_concat:<VDBL>
2853 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")
2854 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))]
2855 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2856 "@
2857 mov\\t%0.8b, %1.8b
2858 fmov\t%d0, %1
2859 ldr\\t%d0, %1"
2860 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2861 (set_attr "simd" "yes,*,yes")
2862 (set_attr "fp" "*,yes,*")]
2863 )
2864
2865 (define_expand "aarch64_combine<mode>"
2866 [(match_operand:<VDBL> 0 "register_operand")
2867 (match_operand:VDC 1 "register_operand")
2868 (match_operand:VDC 2 "register_operand")]
2869 "TARGET_SIMD"
2870 {
2871 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2872
2873 DONE;
2874 }
2875 )
2876
2877 (define_expand "aarch64_simd_combine<mode>"
2878 [(match_operand:<VDBL> 0 "register_operand")
2879 (match_operand:VDC 1 "register_operand")
2880 (match_operand:VDC 2 "register_operand")]
2881 "TARGET_SIMD"
2882 {
2883 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2884 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2885 DONE;
2886 }
2887 [(set_attr "type" "multiple")]
2888 )
2889
2890 ;; <su><addsub>l<q>.
2891
2892 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2893 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2894 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2895 (match_operand:VQW 1 "register_operand" "w")
2896 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2897 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2898 (match_operand:VQW 2 "register_operand" "w")
2899 (match_dup 3)))))]
2900 "TARGET_SIMD"
2901 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2902 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2903 )
2904
2905 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2906 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2907 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2908 (match_operand:VQW 1 "register_operand" "w")
2909 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2910 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2911 (match_operand:VQW 2 "register_operand" "w")
2912 (match_dup 3)))))]
2913 "TARGET_SIMD"
2914 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2915 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2916 )
2917
2918
2919 (define_expand "aarch64_saddl2<mode>"
2920 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2921 (match_operand:VQW 1 "register_operand" "w")
2922 (match_operand:VQW 2 "register_operand" "w")]
2923 "TARGET_SIMD"
2924 {
2925 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2926 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2927 operands[2], p));
2928 DONE;
2929 })
2930
2931 (define_expand "aarch64_uaddl2<mode>"
2932 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2933 (match_operand:VQW 1 "register_operand" "w")
2934 (match_operand:VQW 2 "register_operand" "w")]
2935 "TARGET_SIMD"
2936 {
2937 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2938 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2939 operands[2], p));
2940 DONE;
2941 })
2942
2943 (define_expand "aarch64_ssubl2<mode>"
2944 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2945 (match_operand:VQW 1 "register_operand" "w")
2946 (match_operand:VQW 2 "register_operand" "w")]
2947 "TARGET_SIMD"
2948 {
2949 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2950 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2951 operands[2], p));
2952 DONE;
2953 })
2954
2955 (define_expand "aarch64_usubl2<mode>"
2956 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2957 (match_operand:VQW 1 "register_operand" "w")
2958 (match_operand:VQW 2 "register_operand" "w")]
2959 "TARGET_SIMD"
2960 {
2961 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2962 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2963 operands[2], p));
2964 DONE;
2965 })
2966
2967 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2968 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2969 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2970 (match_operand:VD_BHSI 1 "register_operand" "w"))
2971 (ANY_EXTEND:<VWIDE>
2972 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2973 "TARGET_SIMD"
2974 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2975 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2976 )
2977
2978 ;; <su><addsub>w<q>.
2979
2980 (define_expand "widen_ssum<mode>3"
2981 [(set (match_operand:<VDBLW> 0 "register_operand" "")
2982 (plus:<VDBLW> (sign_extend:<VDBLW>
2983 (match_operand:VQW 1 "register_operand" ""))
2984 (match_operand:<VDBLW> 2 "register_operand" "")))]
2985 "TARGET_SIMD"
2986 {
2987 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
2988 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
2989
2990 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
2991 operands[1], p));
2992 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
2993 DONE;
2994 }
2995 )
2996
2997 (define_expand "widen_ssum<mode>3"
2998 [(set (match_operand:<VWIDE> 0 "register_operand" "")
2999 (plus:<VWIDE> (sign_extend:<VWIDE>
3000 (match_operand:VD_BHSI 1 "register_operand" ""))
3001 (match_operand:<VWIDE> 2 "register_operand" "")))]
3002 "TARGET_SIMD"
3003 {
3004 emit_insn (gen_aarch64_saddw<mode> (operands[0], operands[2], operands[1]));
3005 DONE;
3006 })
3007
3008 (define_expand "widen_usum<mode>3"
3009 [(set (match_operand:<VDBLW> 0 "register_operand" "")
3010 (plus:<VDBLW> (zero_extend:<VDBLW>
3011 (match_operand:VQW 1 "register_operand" ""))
3012 (match_operand:<VDBLW> 2 "register_operand" "")))]
3013 "TARGET_SIMD"
3014 {
3015 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
3016 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3017
3018 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3019 operands[1], p));
3020 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3021 DONE;
3022 }
3023 )
3024
3025 (define_expand "widen_usum<mode>3"
3026 [(set (match_operand:<VWIDE> 0 "register_operand" "")
3027 (plus:<VWIDE> (zero_extend:<VWIDE>
3028 (match_operand:VD_BHSI 1 "register_operand" ""))
3029 (match_operand:<VWIDE> 2 "register_operand" "")))]
3030 "TARGET_SIMD"
3031 {
3032 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3033 DONE;
3034 })
3035
3036 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
3037 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3038 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3039 (ANY_EXTEND:<VWIDE>
3040 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3041 "TARGET_SIMD"
3042 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3043 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3044 )
3045
3046 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal"
3047 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3048 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3049 (ANY_EXTEND:<VWIDE>
3050 (vec_select:<VHALF>
3051 (match_operand:VQW 2 "register_operand" "w")
3052 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3053 "TARGET_SIMD"
3054 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3055 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3056 )
3057
3058 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
3059 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3060 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3061 (ANY_EXTEND:<VWIDE>
3062 (vec_select:<VHALF>
3063 (match_operand:VQW 2 "register_operand" "w")
3064 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3065 "TARGET_SIMD"
3066 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3067 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
3068 )
3069
3070 (define_expand "aarch64_saddw2<mode>"
3071 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3072 (match_operand:<VWIDE> 1 "register_operand" "w")
3073 (match_operand:VQW 2 "register_operand" "w")]
3074 "TARGET_SIMD"
3075 {
3076 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3077 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3078 operands[2], p));
3079 DONE;
3080 })
3081
3082 (define_expand "aarch64_uaddw2<mode>"
3083 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3084 (match_operand:<VWIDE> 1 "register_operand" "w")
3085 (match_operand:VQW 2 "register_operand" "w")]
3086 "TARGET_SIMD"
3087 {
3088 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3089 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3090 operands[2], p));
3091 DONE;
3092 })
3093
3094
3095 (define_expand "aarch64_ssubw2<mode>"
3096 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3097 (match_operand:<VWIDE> 1 "register_operand" "w")
3098 (match_operand:VQW 2 "register_operand" "w")]
3099 "TARGET_SIMD"
3100 {
3101 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3102 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3103 operands[2], p));
3104 DONE;
3105 })
3106
3107 (define_expand "aarch64_usubw2<mode>"
3108 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3109 (match_operand:<VWIDE> 1 "register_operand" "w")
3110 (match_operand:VQW 2 "register_operand" "w")]
3111 "TARGET_SIMD"
3112 {
3113 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3114 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3115 operands[2], p));
3116 DONE;
3117 })
3118
3119 ;; <su><r>h<addsub>.
3120
3121 (define_insn "aarch64_<sur>h<addsub><mode>"
3122 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3123 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3124 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3125 HADDSUB))]
3126 "TARGET_SIMD"
3127 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3128 [(set_attr "type" "neon_<addsub>_halve<q>")]
3129 )
3130
3131 ;; <r><addsub>hn<q>.
3132
3133 (define_insn "aarch64_<sur><addsub>hn<mode>"
3134 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3135 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
3136 (match_operand:VQN 2 "register_operand" "w")]
3137 ADDSUBHN))]
3138 "TARGET_SIMD"
3139 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
3140 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3141 )
3142
3143 (define_insn "aarch64_<sur><addsub>hn2<mode>"
3144 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
3145 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
3146 (match_operand:VQN 2 "register_operand" "w")
3147 (match_operand:VQN 3 "register_operand" "w")]
3148 ADDSUBHN2))]
3149 "TARGET_SIMD"
3150 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
3151 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
3152 )
3153
3154 ;; pmul.
3155
3156 (define_insn "aarch64_pmul<mode>"
3157 [(set (match_operand:VB 0 "register_operand" "=w")
3158 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
3159 (match_operand:VB 2 "register_operand" "w")]
3160 UNSPEC_PMUL))]
3161 "TARGET_SIMD"
3162 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
3163 [(set_attr "type" "neon_mul_<Vetype><q>")]
3164 )
3165
3166 ;; fmulx.
3167
3168 (define_insn "aarch64_fmulx<mode>"
3169 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
3170 (unspec:VHSDF_HSDF
3171 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
3172 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
3173 UNSPEC_FMULX))]
3174 "TARGET_SIMD"
3175 "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3176 [(set_attr "type" "neon_fp_mul_<stype>")]
3177 )
3178
3179 ;; vmulxq_lane_f32, and vmulx_laneq_f32
3180
3181 (define_insn "*aarch64_mulx_elt_<vswap_width_name><mode>"
3182 [(set (match_operand:VDQSF 0 "register_operand" "=w")
3183 (unspec:VDQSF
3184 [(match_operand:VDQSF 1 "register_operand" "w")
3185 (vec_duplicate:VDQSF
3186 (vec_select:<VEL>
3187 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3188 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3189 UNSPEC_FMULX))]
3190 "TARGET_SIMD"
3191 {
3192 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
3193 INTVAL (operands[3])));
3194 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3195 }
3196 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3197 )
3198
3199 ;; vmulxq_laneq_f32, vmulxq_laneq_f64, vmulx_lane_f32
3200
3201 (define_insn "*aarch64_mulx_elt<mode>"
3202 [(set (match_operand:VDQF 0 "register_operand" "=w")
3203 (unspec:VDQF
3204 [(match_operand:VDQF 1 "register_operand" "w")
3205 (vec_duplicate:VDQF
3206 (vec_select:<VEL>
3207 (match_operand:VDQF 2 "register_operand" "w")
3208 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3209 UNSPEC_FMULX))]
3210 "TARGET_SIMD"
3211 {
3212 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3213 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3214 }
3215 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3216 )
3217
3218 ;; vmulxq_lane
3219
3220 (define_insn "*aarch64_mulx_elt_from_dup<mode>"
3221 [(set (match_operand:VHSDF 0 "register_operand" "=w")
3222 (unspec:VHSDF
3223 [(match_operand:VHSDF 1 "register_operand" "w")
3224 (vec_duplicate:VHSDF
3225 (match_operand:<VEL> 2 "register_operand" "<h_con>"))]
3226 UNSPEC_FMULX))]
3227 "TARGET_SIMD"
3228 "fmulx\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[0]";
3229 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
3230 )
3231
3232 ;; vmulxs_lane_f32, vmulxs_laneq_f32
3233 ;; vmulxd_lane_f64 == vmulx_lane_f64
3234 ;; vmulxd_laneq_f64 == vmulx_laneq_f64
3235
3236 (define_insn "*aarch64_vgetfmulx<mode>"
3237 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3238 (unspec:<VEL>
3239 [(match_operand:<VEL> 1 "register_operand" "w")
3240 (vec_select:<VEL>
3241 (match_operand:VDQF 2 "register_operand" "w")
3242 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3243 UNSPEC_FMULX))]
3244 "TARGET_SIMD"
3245 {
3246 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
3247 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3248 }
3249 [(set_attr "type" "fmul<Vetype>")]
3250 )
3251 ;; <su>q<addsub>
3252
3253 (define_insn "aarch64_<su_optab><optab><mode>"
3254 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3255 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
3256 (match_operand:VSDQ_I 2 "register_operand" "w")))]
3257 "TARGET_SIMD"
3258 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3259 [(set_attr "type" "neon_<optab><q>")]
3260 )
3261
3262 ;; suqadd and usqadd
3263
3264 (define_insn "aarch64_<sur>qadd<mode>"
3265 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3266 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
3267 (match_operand:VSDQ_I 2 "register_operand" "w")]
3268 USSUQADD))]
3269 "TARGET_SIMD"
3270 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
3271 [(set_attr "type" "neon_qadd<q>")]
3272 )
3273
3274 ;; sqmovun
3275
3276 (define_insn "aarch64_sqmovun<mode>"
3277 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3278 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3279 UNSPEC_SQXTUN))]
3280 "TARGET_SIMD"
3281 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3282 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3283 )
3284
3285 ;; sqmovn and uqmovn
3286
3287 (define_insn "aarch64_<sur>qmovn<mode>"
3288 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3289 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
3290 SUQMOVN))]
3291 "TARGET_SIMD"
3292 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
3293 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3294 )
3295
3296 ;; <su>q<absneg>
3297
3298 (define_insn "aarch64_s<optab><mode>"
3299 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3300 (UNQOPS:VSDQ_I
3301 (match_operand:VSDQ_I 1 "register_operand" "w")))]
3302 "TARGET_SIMD"
3303 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
3304 [(set_attr "type" "neon_<optab><q>")]
3305 )
3306
3307 ;; sq<r>dmulh.
3308
3309 (define_insn "aarch64_sq<r>dmulh<mode>"
3310 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3311 (unspec:VSDQ_HSI
3312 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
3313 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
3314 VQDMULH))]
3315 "TARGET_SIMD"
3316 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3317 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
3318 )
3319
3320 ;; sq<r>dmulh_lane
3321
3322 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3323 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3324 (unspec:VDQHS
3325 [(match_operand:VDQHS 1 "register_operand" "w")
3326 (vec_select:<VEL>
3327 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3328 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3329 VQDMULH))]
3330 "TARGET_SIMD"
3331 "*
3332 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3333 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3334 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3335 )
3336
3337 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3338 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3339 (unspec:VDQHS
3340 [(match_operand:VDQHS 1 "register_operand" "w")
3341 (vec_select:<VEL>
3342 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3343 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3344 VQDMULH))]
3345 "TARGET_SIMD"
3346 "*
3347 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3348 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3349 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3350 )
3351
3352 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3353 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3354 (unspec:SD_HSI
3355 [(match_operand:SD_HSI 1 "register_operand" "w")
3356 (vec_select:<VEL>
3357 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3358 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3359 VQDMULH))]
3360 "TARGET_SIMD"
3361 "*
3362 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3363 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3364 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3365 )
3366
3367 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3368 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3369 (unspec:SD_HSI
3370 [(match_operand:SD_HSI 1 "register_operand" "w")
3371 (vec_select:<VEL>
3372 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3373 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3374 VQDMULH))]
3375 "TARGET_SIMD"
3376 "*
3377 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3378 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3379 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3380 )
3381
3382 ;; sqrdml[as]h.
3383
3384 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>"
3385 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
3386 (unspec:VSDQ_HSI
3387 [(match_operand:VSDQ_HSI 1 "register_operand" "0")
3388 (match_operand:VSDQ_HSI 2 "register_operand" "w")
3389 (match_operand:VSDQ_HSI 3 "register_operand" "w")]
3390 SQRDMLH_AS))]
3391 "TARGET_SIMD_RDMA"
3392 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3393 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3394 )
3395
3396 ;; sqrdml[as]h_lane.
3397
3398 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3399 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3400 (unspec:VDQHS
3401 [(match_operand:VDQHS 1 "register_operand" "0")
3402 (match_operand:VDQHS 2 "register_operand" "w")
3403 (vec_select:<VEL>
3404 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3405 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3406 SQRDMLH_AS))]
3407 "TARGET_SIMD_RDMA"
3408 {
3409 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3410 return
3411 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3412 }
3413 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3414 )
3415
3416 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>"
3417 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3418 (unspec:SD_HSI
3419 [(match_operand:SD_HSI 1 "register_operand" "0")
3420 (match_operand:SD_HSI 2 "register_operand" "w")
3421 (vec_select:<VEL>
3422 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3423 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3424 SQRDMLH_AS))]
3425 "TARGET_SIMD_RDMA"
3426 {
3427 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3428 return
3429 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3430 }
3431 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3432 )
3433
3434 ;; sqrdml[as]h_laneq.
3435
3436 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3437 [(set (match_operand:VDQHS 0 "register_operand" "=w")
3438 (unspec:VDQHS
3439 [(match_operand:VDQHS 1 "register_operand" "0")
3440 (match_operand:VDQHS 2 "register_operand" "w")
3441 (vec_select:<VEL>
3442 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3443 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3444 SQRDMLH_AS))]
3445 "TARGET_SIMD_RDMA"
3446 {
3447 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3448 return
3449 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3450 }
3451 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3452 )
3453
3454 (define_insn "aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>"
3455 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
3456 (unspec:SD_HSI
3457 [(match_operand:SD_HSI 1 "register_operand" "0")
3458 (match_operand:SD_HSI 2 "register_operand" "w")
3459 (vec_select:<VEL>
3460 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3461 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3462 SQRDMLH_AS))]
3463 "TARGET_SIMD_RDMA"
3464 {
3465 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3466 return
3467 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3468 }
3469 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3470 )
3471
3472 ;; vqdml[sa]l
3473
3474 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
3475 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3476 (SBINQOPS:<VWIDE>
3477 (match_operand:<VWIDE> 1 "register_operand" "0")
3478 (ss_ashift:<VWIDE>
3479 (mult:<VWIDE>
3480 (sign_extend:<VWIDE>
3481 (match_operand:VSD_HSI 2 "register_operand" "w"))
3482 (sign_extend:<VWIDE>
3483 (match_operand:VSD_HSI 3 "register_operand" "w")))
3484 (const_int 1))))]
3485 "TARGET_SIMD"
3486 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3487 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
3488 )
3489
3490 ;; vqdml[sa]l_lane
3491
3492 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3493 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3494 (SBINQOPS:<VWIDE>
3495 (match_operand:<VWIDE> 1 "register_operand" "0")
3496 (ss_ashift:<VWIDE>
3497 (mult:<VWIDE>
3498 (sign_extend:<VWIDE>
3499 (match_operand:VD_HSI 2 "register_operand" "w"))
3500 (sign_extend:<VWIDE>
3501 (vec_duplicate:VD_HSI
3502 (vec_select:<VEL>
3503 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3504 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3505 ))
3506 (const_int 1))))]
3507 "TARGET_SIMD"
3508 {
3509 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3510 return
3511 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3512 }
3513 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3514 )
3515
3516 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3517 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3518 (SBINQOPS:<VWIDE>
3519 (match_operand:<VWIDE> 1 "register_operand" "0")
3520 (ss_ashift:<VWIDE>
3521 (mult:<VWIDE>
3522 (sign_extend:<VWIDE>
3523 (match_operand:VD_HSI 2 "register_operand" "w"))
3524 (sign_extend:<VWIDE>
3525 (vec_duplicate:VD_HSI
3526 (vec_select:<VEL>
3527 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3528 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3529 ))
3530 (const_int 1))))]
3531 "TARGET_SIMD"
3532 {
3533 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3534 return
3535 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3536 }
3537 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3538 )
3539
3540 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
3541 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3542 (SBINQOPS:<VWIDE>
3543 (match_operand:<VWIDE> 1 "register_operand" "0")
3544 (ss_ashift:<VWIDE>
3545 (mult:<VWIDE>
3546 (sign_extend:<VWIDE>
3547 (match_operand:SD_HSI 2 "register_operand" "w"))
3548 (sign_extend:<VWIDE>
3549 (vec_select:<VEL>
3550 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3551 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3552 )
3553 (const_int 1))))]
3554 "TARGET_SIMD"
3555 {
3556 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3557 return
3558 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3559 }
3560 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3561 )
3562
3563 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
3564 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3565 (SBINQOPS:<VWIDE>
3566 (match_operand:<VWIDE> 1 "register_operand" "0")
3567 (ss_ashift:<VWIDE>
3568 (mult:<VWIDE>
3569 (sign_extend:<VWIDE>
3570 (match_operand:SD_HSI 2 "register_operand" "w"))
3571 (sign_extend:<VWIDE>
3572 (vec_select:<VEL>
3573 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3574 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3575 )
3576 (const_int 1))))]
3577 "TARGET_SIMD"
3578 {
3579 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3580 return
3581 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3582 }
3583 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3584 )
3585
3586 ;; vqdml[sa]l_n
3587
3588 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
3589 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3590 (SBINQOPS:<VWIDE>
3591 (match_operand:<VWIDE> 1 "register_operand" "0")
3592 (ss_ashift:<VWIDE>
3593 (mult:<VWIDE>
3594 (sign_extend:<VWIDE>
3595 (match_operand:VD_HSI 2 "register_operand" "w"))
3596 (sign_extend:<VWIDE>
3597 (vec_duplicate:VD_HSI
3598 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3599 (const_int 1))))]
3600 "TARGET_SIMD"
3601 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3602 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3603 )
3604
3605 ;; sqdml[as]l2
3606
3607 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3608 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3609 (SBINQOPS:<VWIDE>
3610 (match_operand:<VWIDE> 1 "register_operand" "0")
3611 (ss_ashift:<VWIDE>
3612 (mult:<VWIDE>
3613 (sign_extend:<VWIDE>
3614 (vec_select:<VHALF>
3615 (match_operand:VQ_HSI 2 "register_operand" "w")
3616 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3617 (sign_extend:<VWIDE>
3618 (vec_select:<VHALF>
3619 (match_operand:VQ_HSI 3 "register_operand" "w")
3620 (match_dup 4))))
3621 (const_int 1))))]
3622 "TARGET_SIMD"
3623 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3624 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3625 )
3626
3627 (define_expand "aarch64_sqdmlal2<mode>"
3628 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3629 (match_operand:<VWIDE> 1 "register_operand" "w")
3630 (match_operand:VQ_HSI 2 "register_operand" "w")
3631 (match_operand:VQ_HSI 3 "register_operand" "w")]
3632 "TARGET_SIMD"
3633 {
3634 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3635 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3636 operands[2], operands[3], p));
3637 DONE;
3638 })
3639
3640 (define_expand "aarch64_sqdmlsl2<mode>"
3641 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3642 (match_operand:<VWIDE> 1 "register_operand" "w")
3643 (match_operand:VQ_HSI 2 "register_operand" "w")
3644 (match_operand:VQ_HSI 3 "register_operand" "w")]
3645 "TARGET_SIMD"
3646 {
3647 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3648 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3649 operands[2], operands[3], p));
3650 DONE;
3651 })
3652
3653 ;; vqdml[sa]l2_lane
3654
3655 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3656 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3657 (SBINQOPS:<VWIDE>
3658 (match_operand:<VWIDE> 1 "register_operand" "0")
3659 (ss_ashift:<VWIDE>
3660 (mult:<VWIDE>
3661 (sign_extend:<VWIDE>
3662 (vec_select:<VHALF>
3663 (match_operand:VQ_HSI 2 "register_operand" "w")
3664 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3665 (sign_extend:<VWIDE>
3666 (vec_duplicate:<VHALF>
3667 (vec_select:<VEL>
3668 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3669 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3670 ))))
3671 (const_int 1))))]
3672 "TARGET_SIMD"
3673 {
3674 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3675 return
3676 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3677 }
3678 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3679 )
3680
3681 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3682 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3683 (SBINQOPS:<VWIDE>
3684 (match_operand:<VWIDE> 1 "register_operand" "0")
3685 (ss_ashift:<VWIDE>
3686 (mult:<VWIDE>
3687 (sign_extend:<VWIDE>
3688 (vec_select:<VHALF>
3689 (match_operand:VQ_HSI 2 "register_operand" "w")
3690 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3691 (sign_extend:<VWIDE>
3692 (vec_duplicate:<VHALF>
3693 (vec_select:<VEL>
3694 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3695 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3696 ))))
3697 (const_int 1))))]
3698 "TARGET_SIMD"
3699 {
3700 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3701 return
3702 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3703 }
3704 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3705 )
3706
3707 (define_expand "aarch64_sqdmlal2_lane<mode>"
3708 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3709 (match_operand:<VWIDE> 1 "register_operand" "w")
3710 (match_operand:VQ_HSI 2 "register_operand" "w")
3711 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3712 (match_operand:SI 4 "immediate_operand" "i")]
3713 "TARGET_SIMD"
3714 {
3715 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3716 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3717 operands[2], operands[3],
3718 operands[4], p));
3719 DONE;
3720 })
3721
3722 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3723 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3724 (match_operand:<VWIDE> 1 "register_operand" "w")
3725 (match_operand:VQ_HSI 2 "register_operand" "w")
3726 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3727 (match_operand:SI 4 "immediate_operand" "i")]
3728 "TARGET_SIMD"
3729 {
3730 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3731 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3732 operands[2], operands[3],
3733 operands[4], p));
3734 DONE;
3735 })
3736
3737 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3738 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3739 (match_operand:<VWIDE> 1 "register_operand" "w")
3740 (match_operand:VQ_HSI 2 "register_operand" "w")
3741 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3742 (match_operand:SI 4 "immediate_operand" "i")]
3743 "TARGET_SIMD"
3744 {
3745 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3746 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3747 operands[2], operands[3],
3748 operands[4], p));
3749 DONE;
3750 })
3751
3752 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3753 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3754 (match_operand:<VWIDE> 1 "register_operand" "w")
3755 (match_operand:VQ_HSI 2 "register_operand" "w")
3756 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3757 (match_operand:SI 4 "immediate_operand" "i")]
3758 "TARGET_SIMD"
3759 {
3760 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3761 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3762 operands[2], operands[3],
3763 operands[4], p));
3764 DONE;
3765 })
3766
3767 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3768 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3769 (SBINQOPS:<VWIDE>
3770 (match_operand:<VWIDE> 1 "register_operand" "0")
3771 (ss_ashift:<VWIDE>
3772 (mult:<VWIDE>
3773 (sign_extend:<VWIDE>
3774 (vec_select:<VHALF>
3775 (match_operand:VQ_HSI 2 "register_operand" "w")
3776 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3777 (sign_extend:<VWIDE>
3778 (vec_duplicate:<VHALF>
3779 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3780 (const_int 1))))]
3781 "TARGET_SIMD"
3782 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3783 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3784 )
3785
3786 (define_expand "aarch64_sqdmlal2_n<mode>"
3787 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3788 (match_operand:<VWIDE> 1 "register_operand" "w")
3789 (match_operand:VQ_HSI 2 "register_operand" "w")
3790 (match_operand:<VEL> 3 "register_operand" "w")]
3791 "TARGET_SIMD"
3792 {
3793 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3794 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3795 operands[2], operands[3],
3796 p));
3797 DONE;
3798 })
3799
3800 (define_expand "aarch64_sqdmlsl2_n<mode>"
3801 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3802 (match_operand:<VWIDE> 1 "register_operand" "w")
3803 (match_operand:VQ_HSI 2 "register_operand" "w")
3804 (match_operand:<VEL> 3 "register_operand" "w")]
3805 "TARGET_SIMD"
3806 {
3807 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3808 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3809 operands[2], operands[3],
3810 p));
3811 DONE;
3812 })
3813
3814 ;; vqdmull
3815
3816 (define_insn "aarch64_sqdmull<mode>"
3817 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3818 (ss_ashift:<VWIDE>
3819 (mult:<VWIDE>
3820 (sign_extend:<VWIDE>
3821 (match_operand:VSD_HSI 1 "register_operand" "w"))
3822 (sign_extend:<VWIDE>
3823 (match_operand:VSD_HSI 2 "register_operand" "w")))
3824 (const_int 1)))]
3825 "TARGET_SIMD"
3826 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3827 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3828 )
3829
3830 ;; vqdmull_lane
3831
3832 (define_insn "aarch64_sqdmull_lane<mode>"
3833 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3834 (ss_ashift:<VWIDE>
3835 (mult:<VWIDE>
3836 (sign_extend:<VWIDE>
3837 (match_operand:VD_HSI 1 "register_operand" "w"))
3838 (sign_extend:<VWIDE>
3839 (vec_duplicate:VD_HSI
3840 (vec_select:<VEL>
3841 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3842 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3843 ))
3844 (const_int 1)))]
3845 "TARGET_SIMD"
3846 {
3847 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3848 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3849 }
3850 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3851 )
3852
3853 (define_insn "aarch64_sqdmull_laneq<mode>"
3854 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3855 (ss_ashift:<VWIDE>
3856 (mult:<VWIDE>
3857 (sign_extend:<VWIDE>
3858 (match_operand:VD_HSI 1 "register_operand" "w"))
3859 (sign_extend:<VWIDE>
3860 (vec_duplicate:VD_HSI
3861 (vec_select:<VEL>
3862 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3863 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3864 ))
3865 (const_int 1)))]
3866 "TARGET_SIMD"
3867 {
3868 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3869 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3870 }
3871 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3872 )
3873
3874 (define_insn "aarch64_sqdmull_lane<mode>"
3875 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3876 (ss_ashift:<VWIDE>
3877 (mult:<VWIDE>
3878 (sign_extend:<VWIDE>
3879 (match_operand:SD_HSI 1 "register_operand" "w"))
3880 (sign_extend:<VWIDE>
3881 (vec_select:<VEL>
3882 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3883 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3884 ))
3885 (const_int 1)))]
3886 "TARGET_SIMD"
3887 {
3888 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3889 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3890 }
3891 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3892 )
3893
3894 (define_insn "aarch64_sqdmull_laneq<mode>"
3895 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3896 (ss_ashift:<VWIDE>
3897 (mult:<VWIDE>
3898 (sign_extend:<VWIDE>
3899 (match_operand:SD_HSI 1 "register_operand" "w"))
3900 (sign_extend:<VWIDE>
3901 (vec_select:<VEL>
3902 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3903 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3904 ))
3905 (const_int 1)))]
3906 "TARGET_SIMD"
3907 {
3908 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3909 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3910 }
3911 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3912 )
3913
3914 ;; vqdmull_n
3915
3916 (define_insn "aarch64_sqdmull_n<mode>"
3917 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3918 (ss_ashift:<VWIDE>
3919 (mult:<VWIDE>
3920 (sign_extend:<VWIDE>
3921 (match_operand:VD_HSI 1 "register_operand" "w"))
3922 (sign_extend:<VWIDE>
3923 (vec_duplicate:VD_HSI
3924 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3925 )
3926 (const_int 1)))]
3927 "TARGET_SIMD"
3928 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3929 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3930 )
3931
3932 ;; vqdmull2
3933
3934
3935
3936 (define_insn "aarch64_sqdmull2<mode>_internal"
3937 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3938 (ss_ashift:<VWIDE>
3939 (mult:<VWIDE>
3940 (sign_extend:<VWIDE>
3941 (vec_select:<VHALF>
3942 (match_operand:VQ_HSI 1 "register_operand" "w")
3943 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3944 (sign_extend:<VWIDE>
3945 (vec_select:<VHALF>
3946 (match_operand:VQ_HSI 2 "register_operand" "w")
3947 (match_dup 3)))
3948 )
3949 (const_int 1)))]
3950 "TARGET_SIMD"
3951 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3952 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3953 )
3954
3955 (define_expand "aarch64_sqdmull2<mode>"
3956 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3957 (match_operand:VQ_HSI 1 "register_operand" "w")
3958 (match_operand:VQ_HSI 2 "register_operand" "w")]
3959 "TARGET_SIMD"
3960 {
3961 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3962 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3963 operands[2], p));
3964 DONE;
3965 })
3966
3967 ;; vqdmull2_lane
3968
3969 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3970 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3971 (ss_ashift:<VWIDE>
3972 (mult:<VWIDE>
3973 (sign_extend:<VWIDE>
3974 (vec_select:<VHALF>
3975 (match_operand:VQ_HSI 1 "register_operand" "w")
3976 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3977 (sign_extend:<VWIDE>
3978 (vec_duplicate:<VHALF>
3979 (vec_select:<VEL>
3980 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3981 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3982 ))
3983 (const_int 1)))]
3984 "TARGET_SIMD"
3985 {
3986 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3987 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3988 }
3989 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3990 )
3991
3992 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3993 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3994 (ss_ashift:<VWIDE>
3995 (mult:<VWIDE>
3996 (sign_extend:<VWIDE>
3997 (vec_select:<VHALF>
3998 (match_operand:VQ_HSI 1 "register_operand" "w")
3999 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
4000 (sign_extend:<VWIDE>
4001 (vec_duplicate:<VHALF>
4002 (vec_select:<VEL>
4003 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4004 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4005 ))
4006 (const_int 1)))]
4007 "TARGET_SIMD"
4008 {
4009 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
4010 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4011 }
4012 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4013 )
4014
4015 (define_expand "aarch64_sqdmull2_lane<mode>"
4016 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4017 (match_operand:VQ_HSI 1 "register_operand" "w")
4018 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4019 (match_operand:SI 3 "immediate_operand" "i")]
4020 "TARGET_SIMD"
4021 {
4022 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4023 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4024 operands[2], operands[3],
4025 p));
4026 DONE;
4027 })
4028
4029 (define_expand "aarch64_sqdmull2_laneq<mode>"
4030 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4031 (match_operand:VQ_HSI 1 "register_operand" "w")
4032 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4033 (match_operand:SI 3 "immediate_operand" "i")]
4034 "TARGET_SIMD"
4035 {
4036 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4037 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4038 operands[2], operands[3],
4039 p));
4040 DONE;
4041 })
4042
4043 ;; vqdmull2_n
4044
4045 (define_insn "aarch64_sqdmull2_n<mode>_internal"
4046 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4047 (ss_ashift:<VWIDE>
4048 (mult:<VWIDE>
4049 (sign_extend:<VWIDE>
4050 (vec_select:<VHALF>
4051 (match_operand:VQ_HSI 1 "register_operand" "w")
4052 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
4053 (sign_extend:<VWIDE>
4054 (vec_duplicate:<VHALF>
4055 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
4056 )
4057 (const_int 1)))]
4058 "TARGET_SIMD"
4059 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
4060 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4061 )
4062
4063 (define_expand "aarch64_sqdmull2_n<mode>"
4064 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4065 (match_operand:VQ_HSI 1 "register_operand" "w")
4066 (match_operand:<VEL> 2 "register_operand" "w")]
4067 "TARGET_SIMD"
4068 {
4069 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
4070 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4071 operands[2], p));
4072 DONE;
4073 })
4074
4075 ;; vshl
4076
4077 (define_insn "aarch64_<sur>shl<mode>"
4078 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4079 (unspec:VSDQ_I_DI
4080 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4081 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
4082 VSHL))]
4083 "TARGET_SIMD"
4084 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4085 [(set_attr "type" "neon_shift_reg<q>")]
4086 )
4087
4088
4089 ;; vqshl
4090
4091 (define_insn "aarch64_<sur>q<r>shl<mode>"
4092 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4093 (unspec:VSDQ_I
4094 [(match_operand:VSDQ_I 1 "register_operand" "w")
4095 (match_operand:VSDQ_I 2 "register_operand" "w")]
4096 VQSHL))]
4097 "TARGET_SIMD"
4098 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
4099 [(set_attr "type" "neon_sat_shift_reg<q>")]
4100 )
4101
4102 ;; vshll_n
4103
4104 (define_insn "aarch64_<sur>shll_n<mode>"
4105 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4106 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
4107 (match_operand:SI 2
4108 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
4109 VSHLL))]
4110 "TARGET_SIMD"
4111 {
4112 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4113 return "shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4114 else
4115 return "<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2";
4116 }
4117 [(set_attr "type" "neon_shift_imm_long")]
4118 )
4119
4120 ;; vshll_high_n
4121
4122 (define_insn "aarch64_<sur>shll2_n<mode>"
4123 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
4124 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
4125 (match_operand:SI 2 "immediate_operand" "i")]
4126 VSHLL))]
4127 "TARGET_SIMD"
4128 {
4129 if (INTVAL (operands[2]) == GET_MODE_UNIT_BITSIZE (<MODE>mode))
4130 return "shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4131 else
4132 return "<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2";
4133 }
4134 [(set_attr "type" "neon_shift_imm_long")]
4135 )
4136
4137 ;; vrshr_n
4138
4139 (define_insn "aarch64_<sur>shr_n<mode>"
4140 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4141 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
4142 (match_operand:SI 2
4143 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4144 VRSHR_N))]
4145 "TARGET_SIMD"
4146 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4147 [(set_attr "type" "neon_sat_shift_imm<q>")]
4148 )
4149
4150 ;; v(r)sra_n
4151
4152 (define_insn "aarch64_<sur>sra_n<mode>"
4153 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4154 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4155 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4156 (match_operand:SI 3
4157 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4158 VSRA))]
4159 "TARGET_SIMD"
4160 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4161 [(set_attr "type" "neon_shift_acc<q>")]
4162 )
4163
4164 ;; vs<lr>i_n
4165
4166 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
4167 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
4168 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
4169 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
4170 (match_operand:SI 3
4171 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
4172 VSLRI))]
4173 "TARGET_SIMD"
4174 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
4175 [(set_attr "type" "neon_shift_imm<q>")]
4176 )
4177
4178 ;; vqshl(u)
4179
4180 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
4181 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
4182 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
4183 (match_operand:SI 2
4184 "aarch64_simd_shift_imm_<ve_mode>" "i")]
4185 VQSHL_N))]
4186 "TARGET_SIMD"
4187 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
4188 [(set_attr "type" "neon_sat_shift_imm<q>")]
4189 )
4190
4191
4192 ;; vq(r)shr(u)n_n
4193
4194 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
4195 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
4196 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
4197 (match_operand:SI 2
4198 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
4199 VQSHRN_N))]
4200 "TARGET_SIMD"
4201 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
4202 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4203 )
4204
4205
4206 ;; cm(eq|ge|gt|lt|le)
4207 ;; Note, we have constraints for Dz and Z as different expanders
4208 ;; have different ideas of what should be passed to this pattern.
4209
4210 (define_insn "aarch64_cm<optab><mode>"
4211 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4212 (neg:<V_INT_EQUIV>
4213 (COMPARISONS:<V_INT_EQUIV>
4214 (match_operand:VDQ_I 1 "register_operand" "w,w")
4215 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4216 )))]
4217 "TARGET_SIMD"
4218 "@
4219 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4220 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
4221 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
4222 )
4223
4224 (define_insn_and_split "aarch64_cm<optab>di"
4225 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
4226 (neg:DI
4227 (COMPARISONS:DI
4228 (match_operand:DI 1 "register_operand" "w,w,r")
4229 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4230 )))
4231 (clobber (reg:CC CC_REGNUM))]
4232 "TARGET_SIMD"
4233 "#"
4234 "reload_completed"
4235 [(set (match_operand:DI 0 "register_operand")
4236 (neg:DI
4237 (COMPARISONS:DI
4238 (match_operand:DI 1 "register_operand")
4239 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4240 )))]
4241 {
4242 /* If we are in the general purpose register file,
4243 we split to a sequence of comparison and store. */
4244 if (GP_REGNUM_P (REGNO (operands[0]))
4245 && GP_REGNUM_P (REGNO (operands[1])))
4246 {
4247 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
4248 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4249 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4250 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4251 DONE;
4252 }
4253 /* Otherwise, we expand to a similar pattern which does not
4254 clobber CC_REGNUM. */
4255 }
4256 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
4257 )
4258
4259 (define_insn "*aarch64_cm<optab>di"
4260 [(set (match_operand:DI 0 "register_operand" "=w,w")
4261 (neg:DI
4262 (COMPARISONS:DI
4263 (match_operand:DI 1 "register_operand" "w,w")
4264 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
4265 )))]
4266 "TARGET_SIMD && reload_completed"
4267 "@
4268 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
4269 cm<optab>\t%d0, %d1, #0"
4270 [(set_attr "type" "neon_compare, neon_compare_zero")]
4271 )
4272
4273 ;; cm(hs|hi)
4274
4275 (define_insn "aarch64_cm<optab><mode>"
4276 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4277 (neg:<V_INT_EQUIV>
4278 (UCOMPARISONS:<V_INT_EQUIV>
4279 (match_operand:VDQ_I 1 "register_operand" "w")
4280 (match_operand:VDQ_I 2 "register_operand" "w")
4281 )))]
4282 "TARGET_SIMD"
4283 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4284 [(set_attr "type" "neon_compare<q>")]
4285 )
4286
4287 (define_insn_and_split "aarch64_cm<optab>di"
4288 [(set (match_operand:DI 0 "register_operand" "=w,r")
4289 (neg:DI
4290 (UCOMPARISONS:DI
4291 (match_operand:DI 1 "register_operand" "w,r")
4292 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4293 )))
4294 (clobber (reg:CC CC_REGNUM))]
4295 "TARGET_SIMD"
4296 "#"
4297 "reload_completed"
4298 [(set (match_operand:DI 0 "register_operand")
4299 (neg:DI
4300 (UCOMPARISONS:DI
4301 (match_operand:DI 1 "register_operand")
4302 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4303 )))]
4304 {
4305 /* If we are in the general purpose register file,
4306 we split to a sequence of comparison and store. */
4307 if (GP_REGNUM_P (REGNO (operands[0]))
4308 && GP_REGNUM_P (REGNO (operands[1])))
4309 {
4310 machine_mode mode = CCmode;
4311 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
4312 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
4313 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4314 DONE;
4315 }
4316 /* Otherwise, we expand to a similar pattern which does not
4317 clobber CC_REGNUM. */
4318 }
4319 [(set_attr "type" "neon_compare,multiple")]
4320 )
4321
4322 (define_insn "*aarch64_cm<optab>di"
4323 [(set (match_operand:DI 0 "register_operand" "=w")
4324 (neg:DI
4325 (UCOMPARISONS:DI
4326 (match_operand:DI 1 "register_operand" "w")
4327 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
4328 )))]
4329 "TARGET_SIMD && reload_completed"
4330 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
4331 [(set_attr "type" "neon_compare")]
4332 )
4333
4334 ;; cmtst
4335
4336 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
4337 ;; we don't have any insns using ne, and aarch64_vcond outputs
4338 ;; not (neg (eq (and x y) 0))
4339 ;; which is rewritten by simplify_rtx as
4340 ;; plus (eq (and x y) 0) -1.
4341
4342 (define_insn "aarch64_cmtst<mode>"
4343 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4344 (plus:<V_INT_EQUIV>
4345 (eq:<V_INT_EQUIV>
4346 (and:VDQ_I
4347 (match_operand:VDQ_I 1 "register_operand" "w")
4348 (match_operand:VDQ_I 2 "register_operand" "w"))
4349 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
4350 (match_operand:<V_INT_EQUIV> 4 "aarch64_simd_imm_minus_one")))
4351 ]
4352 "TARGET_SIMD"
4353 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4354 [(set_attr "type" "neon_tst<q>")]
4355 )
4356
4357 (define_insn_and_split "aarch64_cmtstdi"
4358 [(set (match_operand:DI 0 "register_operand" "=w,r")
4359 (neg:DI
4360 (ne:DI
4361 (and:DI
4362 (match_operand:DI 1 "register_operand" "w,r")
4363 (match_operand:DI 2 "register_operand" "w,r"))
4364 (const_int 0))))
4365 (clobber (reg:CC CC_REGNUM))]
4366 "TARGET_SIMD"
4367 "#"
4368 "reload_completed"
4369 [(set (match_operand:DI 0 "register_operand")
4370 (neg:DI
4371 (ne:DI
4372 (and:DI
4373 (match_operand:DI 1 "register_operand")
4374 (match_operand:DI 2 "register_operand"))
4375 (const_int 0))))]
4376 {
4377 /* If we are in the general purpose register file,
4378 we split to a sequence of comparison and store. */
4379 if (GP_REGNUM_P (REGNO (operands[0]))
4380 && GP_REGNUM_P (REGNO (operands[1])))
4381 {
4382 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
4383 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
4384 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
4385 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
4386 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
4387 DONE;
4388 }
4389 /* Otherwise, we expand to a similar pattern which does not
4390 clobber CC_REGNUM. */
4391 }
4392 [(set_attr "type" "neon_tst,multiple")]
4393 )
4394
4395 (define_insn "*aarch64_cmtstdi"
4396 [(set (match_operand:DI 0 "register_operand" "=w")
4397 (neg:DI
4398 (ne:DI
4399 (and:DI
4400 (match_operand:DI 1 "register_operand" "w")
4401 (match_operand:DI 2 "register_operand" "w"))
4402 (const_int 0))))]
4403 "TARGET_SIMD"
4404 "cmtst\t%d0, %d1, %d2"
4405 [(set_attr "type" "neon_tst")]
4406 )
4407
4408 ;; fcm(eq|ge|gt|le|lt)
4409
4410 (define_insn "aarch64_cm<optab><mode>"
4411 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w,w")
4412 (neg:<V_INT_EQUIV>
4413 (COMPARISONS:<V_INT_EQUIV>
4414 (match_operand:VHSDF_HSDF 1 "register_operand" "w,w")
4415 (match_operand:VHSDF_HSDF 2 "aarch64_simd_reg_or_zero" "w,YDz")
4416 )))]
4417 "TARGET_SIMD"
4418 "@
4419 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
4420 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
4421 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4422 )
4423
4424 ;; fac(ge|gt)
4425 ;; Note we can also handle what would be fac(le|lt) by
4426 ;; generating fac(ge|gt).
4427
4428 (define_insn "aarch64_fac<optab><mode>"
4429 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
4430 (neg:<V_INT_EQUIV>
4431 (FAC_COMPARISONS:<V_INT_EQUIV>
4432 (abs:VHSDF_HSDF
4433 (match_operand:VHSDF_HSDF 1 "register_operand" "w"))
4434 (abs:VHSDF_HSDF
4435 (match_operand:VHSDF_HSDF 2 "register_operand" "w"))
4436 )))]
4437 "TARGET_SIMD"
4438 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
4439 [(set_attr "type" "neon_fp_compare_<stype><q>")]
4440 )
4441
4442 ;; addp
4443
4444 (define_insn "aarch64_addp<mode>"
4445 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
4446 (unspec:VD_BHSI
4447 [(match_operand:VD_BHSI 1 "register_operand" "w")
4448 (match_operand:VD_BHSI 2 "register_operand" "w")]
4449 UNSPEC_ADDP))]
4450 "TARGET_SIMD"
4451 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4452 [(set_attr "type" "neon_reduc_add<q>")]
4453 )
4454
4455 (define_insn "aarch64_addpdi"
4456 [(set (match_operand:DI 0 "register_operand" "=w")
4457 (unspec:DI
4458 [(match_operand:V2DI 1 "register_operand" "w")]
4459 UNSPEC_ADDP))]
4460 "TARGET_SIMD"
4461 "addp\t%d0, %1.2d"
4462 [(set_attr "type" "neon_reduc_add")]
4463 )
4464
4465 ;; sqrt
4466
4467 (define_expand "sqrt<mode>2"
4468 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4469 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4470 "TARGET_SIMD"
4471 {
4472 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
4473 DONE;
4474 })
4475
4476 (define_insn "*sqrt<mode>2"
4477 [(set (match_operand:VHSDF 0 "register_operand" "=w")
4478 (sqrt:VHSDF (match_operand:VHSDF 1 "register_operand" "w")))]
4479 "TARGET_SIMD"
4480 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
4481 [(set_attr "type" "neon_fp_sqrt_<stype><q>")]
4482 )
4483
4484 ;; Patterns for vector struct loads and stores.
4485
4486 (define_insn "aarch64_simd_ld2<mode>"
4487 [(set (match_operand:OI 0 "register_operand" "=w")
4488 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4489 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4490 UNSPEC_LD2))]
4491 "TARGET_SIMD"
4492 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4493 [(set_attr "type" "neon_load2_2reg<q>")]
4494 )
4495
4496 (define_insn "aarch64_simd_ld2r<mode>"
4497 [(set (match_operand:OI 0 "register_operand" "=w")
4498 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4499 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4500 UNSPEC_LD2_DUP))]
4501 "TARGET_SIMD"
4502 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4503 [(set_attr "type" "neon_load2_all_lanes<q>")]
4504 )
4505
4506 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
4507 [(set (match_operand:OI 0 "register_operand" "=w")
4508 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4509 (match_operand:OI 2 "register_operand" "0")
4510 (match_operand:SI 3 "immediate_operand" "i")
4511 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4512 UNSPEC_LD2_LANE))]
4513 "TARGET_SIMD"
4514 {
4515 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4516 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4517 }
4518 [(set_attr "type" "neon_load2_one_lane")]
4519 )
4520
4521 (define_expand "vec_load_lanesoi<mode>"
4522 [(set (match_operand:OI 0 "register_operand" "=w")
4523 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
4524 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4525 UNSPEC_LD2))]
4526 "TARGET_SIMD"
4527 {
4528 if (BYTES_BIG_ENDIAN)
4529 {
4530 rtx tmp = gen_reg_rtx (OImode);
4531 rtx mask = aarch64_reverse_mask (<MODE>mode);
4532 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4533 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4534 }
4535 else
4536 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4537 DONE;
4538 })
4539
4540 (define_insn "aarch64_simd_st2<mode>"
4541 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4542 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4543 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4544 UNSPEC_ST2))]
4545 "TARGET_SIMD"
4546 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4547 [(set_attr "type" "neon_store2_2reg<q>")]
4548 )
4549
4550 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4551 (define_insn "aarch64_vec_store_lanesoi_lane<mode>"
4552 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4553 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
4554 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4555 (match_operand:SI 2 "immediate_operand" "i")]
4556 UNSPEC_ST2_LANE))]
4557 "TARGET_SIMD"
4558 {
4559 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4560 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4561 }
4562 [(set_attr "type" "neon_store2_one_lane<q>")]
4563 )
4564
4565 (define_expand "vec_store_lanesoi<mode>"
4566 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4567 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
4568 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4569 UNSPEC_ST2))]
4570 "TARGET_SIMD"
4571 {
4572 if (BYTES_BIG_ENDIAN)
4573 {
4574 rtx tmp = gen_reg_rtx (OImode);
4575 rtx mask = aarch64_reverse_mask (<MODE>mode);
4576 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4577 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4578 }
4579 else
4580 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4581 DONE;
4582 })
4583
4584 (define_insn "aarch64_simd_ld3<mode>"
4585 [(set (match_operand:CI 0 "register_operand" "=w")
4586 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4587 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4588 UNSPEC_LD3))]
4589 "TARGET_SIMD"
4590 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4591 [(set_attr "type" "neon_load3_3reg<q>")]
4592 )
4593
4594 (define_insn "aarch64_simd_ld3r<mode>"
4595 [(set (match_operand:CI 0 "register_operand" "=w")
4596 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4597 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4598 UNSPEC_LD3_DUP))]
4599 "TARGET_SIMD"
4600 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4601 [(set_attr "type" "neon_load3_all_lanes<q>")]
4602 )
4603
4604 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
4605 [(set (match_operand:CI 0 "register_operand" "=w")
4606 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4607 (match_operand:CI 2 "register_operand" "0")
4608 (match_operand:SI 3 "immediate_operand" "i")
4609 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4610 UNSPEC_LD3_LANE))]
4611 "TARGET_SIMD"
4612 {
4613 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4614 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4615 }
4616 [(set_attr "type" "neon_load3_one_lane")]
4617 )
4618
4619 (define_expand "vec_load_lanesci<mode>"
4620 [(set (match_operand:CI 0 "register_operand" "=w")
4621 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
4622 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4623 UNSPEC_LD3))]
4624 "TARGET_SIMD"
4625 {
4626 if (BYTES_BIG_ENDIAN)
4627 {
4628 rtx tmp = gen_reg_rtx (CImode);
4629 rtx mask = aarch64_reverse_mask (<MODE>mode);
4630 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4631 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4632 }
4633 else
4634 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4635 DONE;
4636 })
4637
4638 (define_insn "aarch64_simd_st3<mode>"
4639 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4640 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4641 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4642 UNSPEC_ST3))]
4643 "TARGET_SIMD"
4644 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4645 [(set_attr "type" "neon_store3_3reg<q>")]
4646 )
4647
4648 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4649 (define_insn "aarch64_vec_store_lanesci_lane<mode>"
4650 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4651 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
4652 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4653 (match_operand:SI 2 "immediate_operand" "i")]
4654 UNSPEC_ST3_LANE))]
4655 "TARGET_SIMD"
4656 {
4657 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4658 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4659 }
4660 [(set_attr "type" "neon_store3_one_lane<q>")]
4661 )
4662
4663 (define_expand "vec_store_lanesci<mode>"
4664 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
4665 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
4666 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4667 UNSPEC_ST3))]
4668 "TARGET_SIMD"
4669 {
4670 if (BYTES_BIG_ENDIAN)
4671 {
4672 rtx tmp = gen_reg_rtx (CImode);
4673 rtx mask = aarch64_reverse_mask (<MODE>mode);
4674 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4675 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4676 }
4677 else
4678 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4679 DONE;
4680 })
4681
4682 (define_insn "aarch64_simd_ld4<mode>"
4683 [(set (match_operand:XI 0 "register_operand" "=w")
4684 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4685 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4686 UNSPEC_LD4))]
4687 "TARGET_SIMD"
4688 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4689 [(set_attr "type" "neon_load4_4reg<q>")]
4690 )
4691
4692 (define_insn "aarch64_simd_ld4r<mode>"
4693 [(set (match_operand:XI 0 "register_operand" "=w")
4694 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4695 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4696 UNSPEC_LD4_DUP))]
4697 "TARGET_SIMD"
4698 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4699 [(set_attr "type" "neon_load4_all_lanes<q>")]
4700 )
4701
4702 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4703 [(set (match_operand:XI 0 "register_operand" "=w")
4704 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4705 (match_operand:XI 2 "register_operand" "0")
4706 (match_operand:SI 3 "immediate_operand" "i")
4707 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4708 UNSPEC_LD4_LANE))]
4709 "TARGET_SIMD"
4710 {
4711 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3])));
4712 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4713 }
4714 [(set_attr "type" "neon_load4_one_lane")]
4715 )
4716
4717 (define_expand "vec_load_lanesxi<mode>"
4718 [(set (match_operand:XI 0 "register_operand" "=w")
4719 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
4720 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4721 UNSPEC_LD4))]
4722 "TARGET_SIMD"
4723 {
4724 if (BYTES_BIG_ENDIAN)
4725 {
4726 rtx tmp = gen_reg_rtx (XImode);
4727 rtx mask = aarch64_reverse_mask (<MODE>mode);
4728 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4729 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4730 }
4731 else
4732 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4733 DONE;
4734 })
4735
4736 (define_insn "aarch64_simd_st4<mode>"
4737 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4738 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4739 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4740 UNSPEC_ST4))]
4741 "TARGET_SIMD"
4742 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4743 [(set_attr "type" "neon_store4_4reg<q>")]
4744 )
4745
4746 ;; RTL uses GCC vector extension indices, so flip only for assembly.
4747 (define_insn "aarch64_vec_store_lanesxi_lane<mode>"
4748 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
4749 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
4750 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4751 (match_operand:SI 2 "immediate_operand" "i")]
4752 UNSPEC_ST4_LANE))]
4753 "TARGET_SIMD"
4754 {
4755 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4756 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4757 }
4758 [(set_attr "type" "neon_store4_one_lane<q>")]
4759 )
4760
4761 (define_expand "vec_store_lanesxi<mode>"
4762 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4763 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4764 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4765 UNSPEC_ST4))]
4766 "TARGET_SIMD"
4767 {
4768 if (BYTES_BIG_ENDIAN)
4769 {
4770 rtx tmp = gen_reg_rtx (XImode);
4771 rtx mask = aarch64_reverse_mask (<MODE>mode);
4772 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4773 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4774 }
4775 else
4776 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4777 DONE;
4778 })
4779
4780 (define_insn_and_split "aarch64_rev_reglist<mode>"
4781 [(set (match_operand:VSTRUCT 0 "register_operand" "=&w")
4782 (unspec:VSTRUCT
4783 [(match_operand:VSTRUCT 1 "register_operand" "w")
4784 (match_operand:V16QI 2 "register_operand" "w")]
4785 UNSPEC_REV_REGLIST))]
4786 "TARGET_SIMD"
4787 "#"
4788 "&& reload_completed"
4789 [(const_int 0)]
4790 {
4791 int i;
4792 int nregs = GET_MODE_SIZE (<MODE>mode) / UNITS_PER_VREG;
4793 for (i = 0; i < nregs; i++)
4794 {
4795 rtx op0 = gen_rtx_REG (V16QImode, REGNO (operands[0]) + i);
4796 rtx op1 = gen_rtx_REG (V16QImode, REGNO (operands[1]) + i);
4797 emit_insn (gen_aarch64_tbl1v16qi (op0, op1, operands[2]));
4798 }
4799 DONE;
4800 }
4801 [(set_attr "type" "neon_tbl1_q")
4802 (set_attr "length" "<insn_count>")]
4803 )
4804
4805 ;; Reload patterns for AdvSIMD register list operands.
4806
4807 (define_expand "mov<mode>"
4808 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
4809 (match_operand:VSTRUCT 1 "general_operand" ""))]
4810 "TARGET_SIMD"
4811 {
4812 if (can_create_pseudo_p ())
4813 {
4814 if (GET_CODE (operands[0]) != REG)
4815 operands[1] = force_reg (<MODE>mode, operands[1]);
4816 }
4817 })
4818
4819 (define_insn "*aarch64_mov<mode>"
4820 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4821 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4822 "TARGET_SIMD && !BYTES_BIG_ENDIAN
4823 && (register_operand (operands[0], <MODE>mode)
4824 || register_operand (operands[1], <MODE>mode))"
4825 "@
4826 #
4827 st1\\t{%S1.16b - %<Vendreg>1.16b}, %0
4828 ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1"
4829 [(set_attr "type" "multiple,neon_store<nregs>_<nregs>reg_q,\
4830 neon_load<nregs>_<nregs>reg_q")
4831 (set_attr "length" "<insn_count>,4,4")]
4832 )
4833
4834 (define_insn "aarch64_be_ld1<mode>"
4835 [(set (match_operand:VALLDI_F16 0 "register_operand" "=w")
4836 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1
4837 "aarch64_simd_struct_operand" "Utv")]
4838 UNSPEC_LD1))]
4839 "TARGET_SIMD"
4840 "ld1\\t{%0<Vmtype>}, %1"
4841 [(set_attr "type" "neon_load1_1reg<q>")]
4842 )
4843
4844 (define_insn "aarch64_be_st1<mode>"
4845 [(set (match_operand:VALLDI_F16 0 "aarch64_simd_struct_operand" "=Utv")
4846 (unspec:VALLDI_F16 [(match_operand:VALLDI_F16 1 "register_operand" "w")]
4847 UNSPEC_ST1))]
4848 "TARGET_SIMD"
4849 "st1\\t{%1<Vmtype>}, %0"
4850 [(set_attr "type" "neon_store1_1reg<q>")]
4851 )
4852
4853 (define_insn "*aarch64_be_movoi"
4854 [(set (match_operand:OI 0 "nonimmediate_operand" "=w,m,w")
4855 (match_operand:OI 1 "general_operand" " w,w,m"))]
4856 "TARGET_SIMD && BYTES_BIG_ENDIAN
4857 && (register_operand (operands[0], OImode)
4858 || register_operand (operands[1], OImode))"
4859 "@
4860 #
4861 stp\\t%q1, %R1, %0
4862 ldp\\t%q0, %R0, %1"
4863 [(set_attr "type" "multiple,neon_stp_q,neon_ldp_q")
4864 (set_attr "length" "8,4,4")]
4865 )
4866
4867 (define_insn "*aarch64_be_movci"
4868 [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
4869 (match_operand:CI 1 "general_operand" " w,w,o"))]
4870 "TARGET_SIMD && BYTES_BIG_ENDIAN
4871 && (register_operand (operands[0], CImode)
4872 || register_operand (operands[1], CImode))"
4873 "#"
4874 [(set_attr "type" "multiple")
4875 (set_attr "length" "12,4,4")]
4876 )
4877
4878 (define_insn "*aarch64_be_movxi"
4879 [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
4880 (match_operand:XI 1 "general_operand" " w,w,o"))]
4881 "TARGET_SIMD && BYTES_BIG_ENDIAN
4882 && (register_operand (operands[0], XImode)
4883 || register_operand (operands[1], XImode))"
4884 "#"
4885 [(set_attr "type" "multiple")
4886 (set_attr "length" "16,4,4")]
4887 )
4888
4889 (define_split
4890 [(set (match_operand:OI 0 "register_operand")
4891 (match_operand:OI 1 "register_operand"))]
4892 "TARGET_SIMD && reload_completed"
4893 [(const_int 0)]
4894 {
4895 aarch64_simd_emit_reg_reg_move (operands, TImode, 2);
4896 DONE;
4897 })
4898
4899 (define_split
4900 [(set (match_operand:CI 0 "nonimmediate_operand")
4901 (match_operand:CI 1 "general_operand"))]
4902 "TARGET_SIMD && reload_completed"
4903 [(const_int 0)]
4904 {
4905 if (register_operand (operands[0], CImode)
4906 && register_operand (operands[1], CImode))
4907 {
4908 aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
4909 DONE;
4910 }
4911 else if (BYTES_BIG_ENDIAN)
4912 {
4913 emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
4914 simplify_gen_subreg (OImode, operands[1], CImode, 0));
4915 emit_move_insn (gen_lowpart (V16QImode,
4916 simplify_gen_subreg (TImode, operands[0],
4917 CImode, 32)),
4918 gen_lowpart (V16QImode,
4919 simplify_gen_subreg (TImode, operands[1],
4920 CImode, 32)));
4921 DONE;
4922 }
4923 else
4924 FAIL;
4925 })
4926
4927 (define_split
4928 [(set (match_operand:XI 0 "nonimmediate_operand")
4929 (match_operand:XI 1 "general_operand"))]
4930 "TARGET_SIMD && reload_completed"
4931 [(const_int 0)]
4932 {
4933 if (register_operand (operands[0], XImode)
4934 && register_operand (operands[1], XImode))
4935 {
4936 aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
4937 DONE;
4938 }
4939 else if (BYTES_BIG_ENDIAN)
4940 {
4941 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
4942 simplify_gen_subreg (OImode, operands[1], XImode, 0));
4943 emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
4944 simplify_gen_subreg (OImode, operands[1], XImode, 32));
4945 DONE;
4946 }
4947 else
4948 FAIL;
4949 })
4950
4951 (define_expand "aarch64_ld<VSTRUCT:nregs>r<VALLDIF:mode>"
4952 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4953 (match_operand:DI 1 "register_operand" "w")
4954 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4955 "TARGET_SIMD"
4956 {
4957 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
4958 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
4959 * <VSTRUCT:nregs>);
4960
4961 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs>r<VALLDIF:mode> (operands[0],
4962 mem));
4963 DONE;
4964 })
4965
4966 (define_insn "aarch64_ld2<mode>_dreg"
4967 [(set (match_operand:OI 0 "register_operand" "=w")
4968 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4969 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4970 UNSPEC_LD2_DREG))]
4971 "TARGET_SIMD"
4972 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4973 [(set_attr "type" "neon_load2_2reg<q>")]
4974 )
4975
4976 (define_insn "aarch64_ld2<mode>_dreg"
4977 [(set (match_operand:OI 0 "register_operand" "=w")
4978 (unspec:OI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4979 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4980 UNSPEC_LD2_DREG))]
4981 "TARGET_SIMD"
4982 "ld1\\t{%S0.1d - %T0.1d}, %1"
4983 [(set_attr "type" "neon_load1_2reg<q>")]
4984 )
4985
4986 (define_insn "aarch64_ld3<mode>_dreg"
4987 [(set (match_operand:CI 0 "register_operand" "=w")
4988 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4989 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4990 UNSPEC_LD3_DREG))]
4991 "TARGET_SIMD"
4992 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4993 [(set_attr "type" "neon_load3_3reg<q>")]
4994 )
4995
4996 (define_insn "aarch64_ld3<mode>_dreg"
4997 [(set (match_operand:CI 0 "register_operand" "=w")
4998 (unspec:CI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
4999 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5000 UNSPEC_LD3_DREG))]
5001 "TARGET_SIMD"
5002 "ld1\\t{%S0.1d - %U0.1d}, %1"
5003 [(set_attr "type" "neon_load1_3reg<q>")]
5004 )
5005
5006 (define_insn "aarch64_ld4<mode>_dreg"
5007 [(set (match_operand:XI 0 "register_operand" "=w")
5008 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5009 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5010 UNSPEC_LD4_DREG))]
5011 "TARGET_SIMD"
5012 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
5013 [(set_attr "type" "neon_load4_4reg<q>")]
5014 )
5015
5016 (define_insn "aarch64_ld4<mode>_dreg"
5017 [(set (match_operand:XI 0 "register_operand" "=w")
5018 (unspec:XI [(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
5019 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5020 UNSPEC_LD4_DREG))]
5021 "TARGET_SIMD"
5022 "ld1\\t{%S0.1d - %V0.1d}, %1"
5023 [(set_attr "type" "neon_load1_4reg<q>")]
5024 )
5025
5026 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
5027 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5028 (match_operand:DI 1 "register_operand" "r")
5029 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5030 "TARGET_SIMD"
5031 {
5032 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5033 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5034
5035 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
5036 DONE;
5037 })
5038
5039 (define_expand "aarch64_ld1<VALL_F16:mode>"
5040 [(match_operand:VALL_F16 0 "register_operand")
5041 (match_operand:DI 1 "register_operand")]
5042 "TARGET_SIMD"
5043 {
5044 machine_mode mode = <VALL_F16:MODE>mode;
5045 rtx mem = gen_rtx_MEM (mode, operands[1]);
5046
5047 if (BYTES_BIG_ENDIAN)
5048 emit_insn (gen_aarch64_be_ld1<VALL_F16:mode> (operands[0], mem));
5049 else
5050 emit_move_insn (operands[0], mem);
5051 DONE;
5052 })
5053
5054 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
5055 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5056 (match_operand:DI 1 "register_operand" "r")
5057 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5058 "TARGET_SIMD"
5059 {
5060 machine_mode mode = <VSTRUCT:MODE>mode;
5061 rtx mem = gen_rtx_MEM (mode, operands[1]);
5062
5063 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5064 DONE;
5065 })
5066
5067 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5068 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5069 (match_operand:DI 1 "register_operand" "w")
5070 (match_operand:VSTRUCT 2 "register_operand" "0")
5071 (match_operand:SI 3 "immediate_operand" "i")
5072 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5073 "TARGET_SIMD"
5074 {
5075 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5076 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5077 * <VSTRUCT:nregs>);
5078
5079 aarch64_simd_lane_bounds (operands[3], 0,
5080 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5081 NULL);
5082 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5083 operands[0], mem, operands[2], operands[3]));
5084 DONE;
5085 })
5086
5087 ;; Expanders for builtins to extract vector registers from large
5088 ;; opaque integer modes.
5089
5090 ;; D-register list.
5091
5092 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
5093 [(match_operand:VDC 0 "register_operand" "=w")
5094 (match_operand:VSTRUCT 1 "register_operand" "w")
5095 (match_operand:SI 2 "immediate_operand" "i")]
5096 "TARGET_SIMD"
5097 {
5098 int part = INTVAL (operands[2]);
5099 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
5100 int offset = part * 16;
5101
5102 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
5103 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
5104 DONE;
5105 })
5106
5107 ;; Q-register list.
5108
5109 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
5110 [(match_operand:VQ 0 "register_operand" "=w")
5111 (match_operand:VSTRUCT 1 "register_operand" "w")
5112 (match_operand:SI 2 "immediate_operand" "i")]
5113 "TARGET_SIMD"
5114 {
5115 int part = INTVAL (operands[2]);
5116 int offset = part * 16;
5117
5118 emit_move_insn (operands[0],
5119 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
5120 DONE;
5121 })
5122
5123 ;; Permuted-store expanders for neon intrinsics.
5124
5125 ;; Permute instructions
5126
5127 ;; vec_perm support
5128
5129 (define_expand "vec_perm_const<mode>"
5130 [(match_operand:VALL_F16 0 "register_operand")
5131 (match_operand:VALL_F16 1 "register_operand")
5132 (match_operand:VALL_F16 2 "register_operand")
5133 (match_operand:<V_INT_EQUIV> 3)]
5134 "TARGET_SIMD"
5135 {
5136 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5137 operands[2], operands[3]))
5138 DONE;
5139 else
5140 FAIL;
5141 })
5142
5143 (define_expand "vec_perm<mode>"
5144 [(match_operand:VB 0 "register_operand")
5145 (match_operand:VB 1 "register_operand")
5146 (match_operand:VB 2 "register_operand")
5147 (match_operand:VB 3 "register_operand")]
5148 "TARGET_SIMD"
5149 {
5150 aarch64_expand_vec_perm (operands[0], operands[1],
5151 operands[2], operands[3]);
5152 DONE;
5153 })
5154
5155 (define_insn "aarch64_tbl1<mode>"
5156 [(set (match_operand:VB 0 "register_operand" "=w")
5157 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
5158 (match_operand:VB 2 "register_operand" "w")]
5159 UNSPEC_TBL))]
5160 "TARGET_SIMD"
5161 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
5162 [(set_attr "type" "neon_tbl1<q>")]
5163 )
5164
5165 ;; Two source registers.
5166
5167 (define_insn "aarch64_tbl2v16qi"
5168 [(set (match_operand:V16QI 0 "register_operand" "=w")
5169 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
5170 (match_operand:V16QI 2 "register_operand" "w")]
5171 UNSPEC_TBL))]
5172 "TARGET_SIMD"
5173 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
5174 [(set_attr "type" "neon_tbl2_q")]
5175 )
5176
5177 (define_insn "aarch64_tbl3<mode>"
5178 [(set (match_operand:VB 0 "register_operand" "=w")
5179 (unspec:VB [(match_operand:OI 1 "register_operand" "w")
5180 (match_operand:VB 2 "register_operand" "w")]
5181 UNSPEC_TBL))]
5182 "TARGET_SIMD"
5183 "tbl\\t%S0.<Vbtype>, {%S1.16b - %T1.16b}, %S2.<Vbtype>"
5184 [(set_attr "type" "neon_tbl3")]
5185 )
5186
5187 (define_insn "aarch64_tbx4<mode>"
5188 [(set (match_operand:VB 0 "register_operand" "=w")
5189 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5190 (match_operand:OI 2 "register_operand" "w")
5191 (match_operand:VB 3 "register_operand" "w")]
5192 UNSPEC_TBX))]
5193 "TARGET_SIMD"
5194 "tbx\\t%S0.<Vbtype>, {%S2.16b - %T2.16b}, %S3.<Vbtype>"
5195 [(set_attr "type" "neon_tbl4")]
5196 )
5197
5198 ;; Three source registers.
5199
5200 (define_insn "aarch64_qtbl3<mode>"
5201 [(set (match_operand:VB 0 "register_operand" "=w")
5202 (unspec:VB [(match_operand:CI 1 "register_operand" "w")
5203 (match_operand:VB 2 "register_operand" "w")]
5204 UNSPEC_TBL))]
5205 "TARGET_SIMD"
5206 "tbl\\t%S0.<Vbtype>, {%S1.16b - %U1.16b}, %S2.<Vbtype>"
5207 [(set_attr "type" "neon_tbl3")]
5208 )
5209
5210 (define_insn "aarch64_qtbx3<mode>"
5211 [(set (match_operand:VB 0 "register_operand" "=w")
5212 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5213 (match_operand:CI 2 "register_operand" "w")
5214 (match_operand:VB 3 "register_operand" "w")]
5215 UNSPEC_TBX))]
5216 "TARGET_SIMD"
5217 "tbx\\t%S0.<Vbtype>, {%S2.16b - %U2.16b}, %S3.<Vbtype>"
5218 [(set_attr "type" "neon_tbl3")]
5219 )
5220
5221 ;; Four source registers.
5222
5223 (define_insn "aarch64_qtbl4<mode>"
5224 [(set (match_operand:VB 0 "register_operand" "=w")
5225 (unspec:VB [(match_operand:XI 1 "register_operand" "w")
5226 (match_operand:VB 2 "register_operand" "w")]
5227 UNSPEC_TBL))]
5228 "TARGET_SIMD"
5229 "tbl\\t%S0.<Vbtype>, {%S1.16b - %V1.16b}, %S2.<Vbtype>"
5230 [(set_attr "type" "neon_tbl4")]
5231 )
5232
5233 (define_insn "aarch64_qtbx4<mode>"
5234 [(set (match_operand:VB 0 "register_operand" "=w")
5235 (unspec:VB [(match_operand:VB 1 "register_operand" "0")
5236 (match_operand:XI 2 "register_operand" "w")
5237 (match_operand:VB 3 "register_operand" "w")]
5238 UNSPEC_TBX))]
5239 "TARGET_SIMD"
5240 "tbx\\t%S0.<Vbtype>, {%S2.16b - %V2.16b}, %S3.<Vbtype>"
5241 [(set_attr "type" "neon_tbl4")]
5242 )
5243
5244 (define_insn_and_split "aarch64_combinev16qi"
5245 [(set (match_operand:OI 0 "register_operand" "=w")
5246 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
5247 (match_operand:V16QI 2 "register_operand" "w")]
5248 UNSPEC_CONCAT))]
5249 "TARGET_SIMD"
5250 "#"
5251 "&& reload_completed"
5252 [(const_int 0)]
5253 {
5254 aarch64_split_combinev16qi (operands);
5255 DONE;
5256 }
5257 [(set_attr "type" "multiple")]
5258 )
5259
5260 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5261 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5262 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5263 (match_operand:VALL_F16 2 "register_operand" "w")]
5264 PERMUTE))]
5265 "TARGET_SIMD"
5266 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5267 [(set_attr "type" "neon_permute<q>")]
5268 )
5269
5270 ;; Note immediate (third) operand is lane index not byte index.
5271 (define_insn "aarch64_ext<mode>"
5272 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5273 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5274 (match_operand:VALL_F16 2 "register_operand" "w")
5275 (match_operand:SI 3 "immediate_operand" "i")]
5276 UNSPEC_EXT))]
5277 "TARGET_SIMD"
5278 {
5279 operands[3] = GEN_INT (INTVAL (operands[3])
5280 * GET_MODE_UNIT_SIZE (<MODE>mode));
5281 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5282 }
5283 [(set_attr "type" "neon_ext<q>")]
5284 )
5285
5286 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5287 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5288 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5289 REVERSE))]
5290 "TARGET_SIMD"
5291 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
5292 [(set_attr "type" "neon_rev<q>")]
5293 )
5294
5295 (define_insn "aarch64_st2<mode>_dreg"
5296 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5297 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5298 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5299 UNSPEC_ST2))]
5300 "TARGET_SIMD"
5301 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5302 [(set_attr "type" "neon_store2_2reg")]
5303 )
5304
5305 (define_insn "aarch64_st2<mode>_dreg"
5306 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5307 (unspec:BLK [(match_operand:OI 1 "register_operand" "w")
5308 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5309 UNSPEC_ST2))]
5310 "TARGET_SIMD"
5311 "st1\\t{%S1.1d - %T1.1d}, %0"
5312 [(set_attr "type" "neon_store1_2reg")]
5313 )
5314
5315 (define_insn "aarch64_st3<mode>_dreg"
5316 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5317 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5318 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5319 UNSPEC_ST3))]
5320 "TARGET_SIMD"
5321 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5322 [(set_attr "type" "neon_store3_3reg")]
5323 )
5324
5325 (define_insn "aarch64_st3<mode>_dreg"
5326 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5327 (unspec:BLK [(match_operand:CI 1 "register_operand" "w")
5328 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5329 UNSPEC_ST3))]
5330 "TARGET_SIMD"
5331 "st1\\t{%S1.1d - %U1.1d}, %0"
5332 [(set_attr "type" "neon_store1_3reg")]
5333 )
5334
5335 (define_insn "aarch64_st4<mode>_dreg"
5336 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5337 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5338 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5339 UNSPEC_ST4))]
5340 "TARGET_SIMD"
5341 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
5342 [(set_attr "type" "neon_store4_4reg")]
5343 )
5344
5345 (define_insn "aarch64_st4<mode>_dreg"
5346 [(set (match_operand:BLK 0 "aarch64_simd_struct_operand" "=Utv")
5347 (unspec:BLK [(match_operand:XI 1 "register_operand" "w")
5348 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5349 UNSPEC_ST4))]
5350 "TARGET_SIMD"
5351 "st1\\t{%S1.1d - %V1.1d}, %0"
5352 [(set_attr "type" "neon_store1_4reg")]
5353 )
5354
5355 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
5356 [(match_operand:DI 0 "register_operand" "r")
5357 (match_operand:VSTRUCT 1 "register_operand" "w")
5358 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5359 "TARGET_SIMD"
5360 {
5361 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5362 set_mem_size (mem, <VSTRUCT:nregs> * 8);
5363
5364 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
5365 DONE;
5366 })
5367
5368 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
5369 [(match_operand:DI 0 "register_operand" "r")
5370 (match_operand:VSTRUCT 1 "register_operand" "w")
5371 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5372 "TARGET_SIMD"
5373 {
5374 machine_mode mode = <VSTRUCT:MODE>mode;
5375 rtx mem = gen_rtx_MEM (mode, operands[0]);
5376
5377 emit_insn (gen_aarch64_simd_st<VSTRUCT:nregs><VQ:mode> (mem, operands[1]));
5378 DONE;
5379 })
5380
5381 (define_expand "aarch64_st<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5382 [(match_operand:DI 0 "register_operand" "r")
5383 (match_operand:VSTRUCT 1 "register_operand" "w")
5384 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
5385 (match_operand:SI 2 "immediate_operand")]
5386 "TARGET_SIMD"
5387 {
5388 rtx mem = gen_rtx_MEM (BLKmode, operands[0]);
5389 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5390 * <VSTRUCT:nregs>);
5391
5392 emit_insn (gen_aarch64_vec_store_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5393 mem, operands[1], operands[2]));
5394 DONE;
5395 })
5396
5397 (define_expand "aarch64_st1<VALL_F16:mode>"
5398 [(match_operand:DI 0 "register_operand")
5399 (match_operand:VALL_F16 1 "register_operand")]
5400 "TARGET_SIMD"
5401 {
5402 machine_mode mode = <VALL_F16:MODE>mode;
5403 rtx mem = gen_rtx_MEM (mode, operands[0]);
5404
5405 if (BYTES_BIG_ENDIAN)
5406 emit_insn (gen_aarch64_be_st1<VALL_F16:mode> (mem, operands[1]));
5407 else
5408 emit_move_insn (mem, operands[1]);
5409 DONE;
5410 })
5411
5412 ;; Expander for builtins to insert vector registers into large
5413 ;; opaque integer modes.
5414
5415 ;; Q-register list. We don't need a D-reg inserter as we zero
5416 ;; extend them in arm_neon.h and insert the resulting Q-regs.
5417
5418 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
5419 [(match_operand:VSTRUCT 0 "register_operand" "+w")
5420 (match_operand:VSTRUCT 1 "register_operand" "0")
5421 (match_operand:VQ 2 "register_operand" "w")
5422 (match_operand:SI 3 "immediate_operand" "i")]
5423 "TARGET_SIMD"
5424 {
5425 int part = INTVAL (operands[3]);
5426 int offset = part * 16;
5427
5428 emit_move_insn (operands[0], operands[1]);
5429 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
5430 operands[2]);
5431 DONE;
5432 })
5433
5434 ;; Standard pattern name vec_init<mode><Vel>.
5435
5436 (define_expand "vec_init<mode><Vel>"
5437 [(match_operand:VALL_F16 0 "register_operand" "")
5438 (match_operand 1 "" "")]
5439 "TARGET_SIMD"
5440 {
5441 aarch64_expand_vector_init (operands[0], operands[1]);
5442 DONE;
5443 })
5444
5445 (define_insn "*aarch64_simd_ld1r<mode>"
5446 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5447 (vec_duplicate:VALL_F16
5448 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
5449 "TARGET_SIMD"
5450 "ld1r\\t{%0.<Vtype>}, %1"
5451 [(set_attr "type" "neon_load1_all_lanes")]
5452 )
5453
5454 (define_insn "aarch64_frecpe<mode>"
5455 [(set (match_operand:VHSDF 0 "register_operand" "=w")
5456 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")]
5457 UNSPEC_FRECPE))]
5458 "TARGET_SIMD"
5459 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
5460 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5461 )
5462
5463 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
5464 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5465 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5466 FRECP))]
5467 "TARGET_SIMD"
5468 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
5469 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")]
5470 )
5471
5472 (define_insn "aarch64_frecps<mode>"
5473 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5474 (unspec:VHSDF_HSDF
5475 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5476 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5477 UNSPEC_FRECPS))]
5478 "TARGET_SIMD"
5479 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
5480 [(set_attr "type" "neon_fp_recps_<stype><q>")]
5481 )
5482
5483 (define_insn "aarch64_urecpe<mode>"
5484 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
5485 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
5486 UNSPEC_URECPE))]
5487 "TARGET_SIMD"
5488 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
5489 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
5490
5491 ;; Standard pattern name vec_extract<mode><Vel>.
5492
5493 (define_expand "vec_extract<mode><Vel>"
5494 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
5495 (match_operand:VALL_F16 1 "register_operand" "")
5496 (match_operand:SI 2 "immediate_operand" "")]
5497 "TARGET_SIMD"
5498 {
5499 emit_insn
5500 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
5501 DONE;
5502 })
5503
5504 ;; aes
5505
5506 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5507 [(set (match_operand:V16QI 0 "register_operand" "=w")
5508 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
5509 (match_operand:V16QI 2 "register_operand" "w")]
5510 CRYPTO_AES))]
5511 "TARGET_SIMD && TARGET_CRYPTO"
5512 "aes<aes_op>\\t%0.16b, %2.16b"
5513 [(set_attr "type" "crypto_aese")]
5514 )
5515
5516 ;; When AES/AESMC fusion is enabled we want the register allocation to
5517 ;; look like:
5518 ;; AESE Vn, _
5519 ;; AESMC Vn, Vn
5520 ;; So prefer to tie operand 1 to operand 0 when fusing.
5521
5522 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5523 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5524 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5525 CRYPTO_AESMC))]
5526 "TARGET_SIMD && TARGET_CRYPTO"
5527 "aes<aesmc_op>\\t%0.16b, %1.16b"
5528 [(set_attr "type" "crypto_aesmc")
5529 (set_attr_alternative "enabled"
5530 [(if_then_else (match_test
5531 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5532 (const_string "yes" )
5533 (const_string "no"))
5534 (const_string "yes")])]
5535 )
5536
5537 ;; sha1
5538
5539 (define_insn "aarch64_crypto_sha1hsi"
5540 [(set (match_operand:SI 0 "register_operand" "=w")
5541 (unspec:SI [(match_operand:SI 1
5542 "register_operand" "w")]
5543 UNSPEC_SHA1H))]
5544 "TARGET_SIMD && TARGET_CRYPTO"
5545 "sha1h\\t%s0, %s1"
5546 [(set_attr "type" "crypto_sha1_fast")]
5547 )
5548
5549 (define_insn "aarch64_crypto_sha1hv4si"
5550 [(set (match_operand:SI 0 "register_operand" "=w")
5551 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5552 (parallel [(const_int 0)]))]
5553 UNSPEC_SHA1H))]
5554 "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN"
5555 "sha1h\\t%s0, %s1"
5556 [(set_attr "type" "crypto_sha1_fast")]
5557 )
5558
5559 (define_insn "aarch64_be_crypto_sha1hv4si"
5560 [(set (match_operand:SI 0 "register_operand" "=w")
5561 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5562 (parallel [(const_int 3)]))]
5563 UNSPEC_SHA1H))]
5564 "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN"
5565 "sha1h\\t%s0, %s1"
5566 [(set_attr "type" "crypto_sha1_fast")]
5567 )
5568
5569 (define_insn "aarch64_crypto_sha1su1v4si"
5570 [(set (match_operand:V4SI 0 "register_operand" "=w")
5571 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5572 (match_operand:V4SI 2 "register_operand" "w")]
5573 UNSPEC_SHA1SU1))]
5574 "TARGET_SIMD && TARGET_CRYPTO"
5575 "sha1su1\\t%0.4s, %2.4s"
5576 [(set_attr "type" "crypto_sha1_fast")]
5577 )
5578
5579 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5580 [(set (match_operand:V4SI 0 "register_operand" "=w")
5581 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5582 (match_operand:SI 2 "register_operand" "w")
5583 (match_operand:V4SI 3 "register_operand" "w")]
5584 CRYPTO_SHA1))]
5585 "TARGET_SIMD && TARGET_CRYPTO"
5586 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5587 [(set_attr "type" "crypto_sha1_slow")]
5588 )
5589
5590 (define_insn "aarch64_crypto_sha1su0v4si"
5591 [(set (match_operand:V4SI 0 "register_operand" "=w")
5592 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5593 (match_operand:V4SI 2 "register_operand" "w")
5594 (match_operand:V4SI 3 "register_operand" "w")]
5595 UNSPEC_SHA1SU0))]
5596 "TARGET_SIMD && TARGET_CRYPTO"
5597 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5598 [(set_attr "type" "crypto_sha1_xor")]
5599 )
5600
5601 ;; sha256
5602
5603 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
5604 [(set (match_operand:V4SI 0 "register_operand" "=w")
5605 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5606 (match_operand:V4SI 2 "register_operand" "w")
5607 (match_operand:V4SI 3 "register_operand" "w")]
5608 CRYPTO_SHA256))]
5609 "TARGET_SIMD && TARGET_CRYPTO"
5610 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5611 [(set_attr "type" "crypto_sha256_slow")]
5612 )
5613
5614 (define_insn "aarch64_crypto_sha256su0v4si"
5615 [(set (match_operand:V4SI 0 "register_operand" "=w")
5616 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5617 (match_operand:V4SI 2 "register_operand" "w")]
5618 UNSPEC_SHA256SU0))]
5619 "TARGET_SIMD &&TARGET_CRYPTO"
5620 "sha256su0\\t%0.4s, %2.4s"
5621 [(set_attr "type" "crypto_sha256_fast")]
5622 )
5623
5624 (define_insn "aarch64_crypto_sha256su1v4si"
5625 [(set (match_operand:V4SI 0 "register_operand" "=w")
5626 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5627 (match_operand:V4SI 2 "register_operand" "w")
5628 (match_operand:V4SI 3 "register_operand" "w")]
5629 UNSPEC_SHA256SU1))]
5630 "TARGET_SIMD &&TARGET_CRYPTO"
5631 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5632 [(set_attr "type" "crypto_sha256_slow")]
5633 )
5634
5635 ;; pmull
5636
5637 (define_insn "aarch64_crypto_pmulldi"
5638 [(set (match_operand:TI 0 "register_operand" "=w")
5639 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5640 (match_operand:DI 2 "register_operand" "w")]
5641 UNSPEC_PMULL))]
5642 "TARGET_SIMD && TARGET_CRYPTO"
5643 "pmull\\t%0.1q, %1.1d, %2.1d"
5644 [(set_attr "type" "crypto_pmull")]
5645 )
5646
5647 (define_insn "aarch64_crypto_pmullv2di"
5648 [(set (match_operand:TI 0 "register_operand" "=w")
5649 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5650 (match_operand:V2DI 2 "register_operand" "w")]
5651 UNSPEC_PMULL2))]
5652 "TARGET_SIMD && TARGET_CRYPTO"
5653 "pmull2\\t%0.1q, %1.2d, %2.2d"
5654 [(set_attr "type" "crypto_pmull")]
5655 )