9a48537a33bc097051f9dd7095a816d5221259f0
[gcc.git] / gcc / config / aarch64 / aarch64-simd.md
1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2014 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_expand "mov<mode>"
22 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
23 (match_operand:VALL 1 "general_operand" ""))]
24 "TARGET_SIMD"
25 "
26 if (GET_CODE (operands[0]) == MEM)
27 operands[1] = force_reg (<MODE>mode, operands[1]);
28 "
29 )
30
31 (define_expand "movmisalign<mode>"
32 [(set (match_operand:VALL 0 "nonimmediate_operand" "")
33 (match_operand:VALL 1 "general_operand" ""))]
34 "TARGET_SIMD"
35 {
36 /* This pattern is not permitted to fail during expansion: if both arguments
37 are non-registers (e.g. memory := constant, which can be created by the
38 auto-vectorizer), force operand 1 into a register. */
39 if (!register_operand (operands[0], <MODE>mode)
40 && !register_operand (operands[1], <MODE>mode))
41 operands[1] = force_reg (<MODE>mode, operands[1]);
42 })
43
44 (define_insn "aarch64_simd_dup<mode>"
45 [(set (match_operand:VDQ_I 0 "register_operand" "=w, w")
46 (vec_duplicate:VDQ_I
47 (match_operand:<VEL> 1 "register_operand" "r, w")))]
48 "TARGET_SIMD"
49 "@
50 dup\\t%0.<Vtype>, %<vw>1
51 dup\\t%0.<Vtype>, %1.<Vetype>[0]"
52 [(set_attr "type" "neon_from_gp<q>, neon_dup<q>")]
53 )
54
55 (define_insn "aarch64_simd_dup<mode>"
56 [(set (match_operand:VDQF 0 "register_operand" "=w")
57 (vec_duplicate:VDQF (match_operand:<VEL> 1 "register_operand" "w")))]
58 "TARGET_SIMD"
59 "dup\\t%0.<Vtype>, %1.<Vetype>[0]"
60 [(set_attr "type" "neon_dup<q>")]
61 )
62
63 (define_insn "aarch64_dup_lane<mode>"
64 [(set (match_operand:VALL 0 "register_operand" "=w")
65 (vec_duplicate:VALL
66 (vec_select:<VEL>
67 (match_operand:VALL 1 "register_operand" "w")
68 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
69 )))]
70 "TARGET_SIMD"
71 {
72 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
73 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
74 }
75 [(set_attr "type" "neon_dup<q>")]
76 )
77
78 (define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
79 [(set (match_operand:VALL 0 "register_operand" "=w")
80 (vec_duplicate:VALL
81 (vec_select:<VEL>
82 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
83 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
84 )))]
85 "TARGET_SIMD"
86 {
87 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
88 INTVAL (operands[2])));
89 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
90 }
91 [(set_attr "type" "neon_dup<q>")]
92 )
93
94 (define_insn "*aarch64_simd_mov<mode>"
95 [(set (match_operand:VD 0 "nonimmediate_operand"
96 "=w, m, w, ?r, ?w, ?r, w")
97 (match_operand:VD 1 "general_operand"
98 "m, w, w, w, r, r, Dn"))]
99 "TARGET_SIMD
100 && (register_operand (operands[0], <MODE>mode)
101 || register_operand (operands[1], <MODE>mode))"
102 {
103 switch (which_alternative)
104 {
105 case 0: return "ldr\\t%d0, %1";
106 case 1: return "str\\t%d1, %0";
107 case 2: return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
108 case 3: return "umov\t%0, %1.d[0]";
109 case 4: return "ins\t%0.d[0], %1";
110 case 5: return "mov\t%0, %1";
111 case 6:
112 return aarch64_output_simd_mov_immediate (operands[1],
113 <MODE>mode, 64);
114 default: gcc_unreachable ();
115 }
116 }
117 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
118 neon_logic<q>, neon_to_gp<q>, neon_from_gp<q>,\
119 mov_reg, neon_move<q>")]
120 )
121
122 (define_insn "*aarch64_simd_mov<mode>"
123 [(set (match_operand:VQ 0 "nonimmediate_operand"
124 "=w, m, w, ?r, ?w, ?r, w")
125 (match_operand:VQ 1 "general_operand"
126 "m, w, w, w, r, r, Dn"))]
127 "TARGET_SIMD
128 && (register_operand (operands[0], <MODE>mode)
129 || register_operand (operands[1], <MODE>mode))"
130 {
131 switch (which_alternative)
132 {
133 case 0:
134 return "ldr\\t%q0, %1";
135 case 1:
136 return "str\\t%q1, %0";
137 case 2:
138 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %1.<Vbtype>";
139 case 3:
140 case 4:
141 case 5:
142 return "#";
143 case 6:
144 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
145 default:
146 gcc_unreachable ();
147 }
148 }
149 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\
150 neon_logic<q>, multiple, multiple, multiple,\
151 neon_move<q>")
152 (set_attr "length" "4,4,4,8,8,8,4")]
153 )
154
155 (define_split
156 [(set (match_operand:VQ 0 "register_operand" "")
157 (match_operand:VQ 1 "register_operand" ""))]
158 "TARGET_SIMD && reload_completed
159 && GP_REGNUM_P (REGNO (operands[0]))
160 && GP_REGNUM_P (REGNO (operands[1]))"
161 [(set (match_dup 0) (match_dup 1))
162 (set (match_dup 2) (match_dup 3))]
163 {
164 int rdest = REGNO (operands[0]);
165 int rsrc = REGNO (operands[1]);
166 rtx dest[2], src[2];
167
168 dest[0] = gen_rtx_REG (DImode, rdest);
169 src[0] = gen_rtx_REG (DImode, rsrc);
170 dest[1] = gen_rtx_REG (DImode, rdest + 1);
171 src[1] = gen_rtx_REG (DImode, rsrc + 1);
172
173 aarch64_simd_disambiguate_copy (operands, dest, src, 2);
174 })
175
176 (define_split
177 [(set (match_operand:VQ 0 "register_operand" "")
178 (match_operand:VQ 1 "register_operand" ""))]
179 "TARGET_SIMD && reload_completed
180 && ((FP_REGNUM_P (REGNO (operands[0])) && GP_REGNUM_P (REGNO (operands[1])))
181 || (GP_REGNUM_P (REGNO (operands[0])) && FP_REGNUM_P (REGNO (operands[1]))))"
182 [(const_int 0)]
183 {
184 aarch64_split_simd_move (operands[0], operands[1]);
185 DONE;
186 })
187
188 (define_expand "aarch64_split_simd_mov<mode>"
189 [(set (match_operand:VQ 0)
190 (match_operand:VQ 1))]
191 "TARGET_SIMD"
192 {
193 rtx dst = operands[0];
194 rtx src = operands[1];
195
196 if (GP_REGNUM_P (REGNO (src)))
197 {
198 rtx src_low_part = gen_lowpart (<VHALF>mode, src);
199 rtx src_high_part = gen_highpart (<VHALF>mode, src);
200
201 emit_insn
202 (gen_move_lo_quad_<mode> (dst, src_low_part));
203 emit_insn
204 (gen_move_hi_quad_<mode> (dst, src_high_part));
205 }
206
207 else
208 {
209 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
210 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
211 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
212 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
213
214 emit_insn
215 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
216 emit_insn
217 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
218 }
219 DONE;
220 }
221 )
222
223 (define_insn "aarch64_simd_mov_from_<mode>low"
224 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
225 (vec_select:<VHALF>
226 (match_operand:VQ 1 "register_operand" "w")
227 (match_operand:VQ 2 "vect_par_cnst_lo_half" "")))]
228 "TARGET_SIMD && reload_completed"
229 "umov\t%0, %1.d[0]"
230 [(set_attr "type" "neon_to_gp<q>")
231 (set_attr "length" "4")
232 ])
233
234 (define_insn "aarch64_simd_mov_from_<mode>high"
235 [(set (match_operand:<VHALF> 0 "register_operand" "=r")
236 (vec_select:<VHALF>
237 (match_operand:VQ 1 "register_operand" "w")
238 (match_operand:VQ 2 "vect_par_cnst_hi_half" "")))]
239 "TARGET_SIMD && reload_completed"
240 "umov\t%0, %1.d[1]"
241 [(set_attr "type" "neon_to_gp<q>")
242 (set_attr "length" "4")
243 ])
244
245 (define_insn "orn<mode>3"
246 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
247 (ior:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
248 (match_operand:VDQ_I 2 "register_operand" "w")))]
249 "TARGET_SIMD"
250 "orn\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
251 [(set_attr "type" "neon_logic<q>")]
252 )
253
254 (define_insn "bic<mode>3"
255 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
256 (and:VDQ_I (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w"))
257 (match_operand:VDQ_I 2 "register_operand" "w")))]
258 "TARGET_SIMD"
259 "bic\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
260 [(set_attr "type" "neon_logic<q>")]
261 )
262
263 (define_insn "add<mode>3"
264 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
265 (plus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
266 (match_operand:VDQ_I 2 "register_operand" "w")))]
267 "TARGET_SIMD"
268 "add\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
269 [(set_attr "type" "neon_add<q>")]
270 )
271
272 (define_insn "sub<mode>3"
273 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
274 (minus:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
275 (match_operand:VDQ_I 2 "register_operand" "w")))]
276 "TARGET_SIMD"
277 "sub\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
278 [(set_attr "type" "neon_sub<q>")]
279 )
280
281 (define_insn "mul<mode>3"
282 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
283 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
284 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
285 "TARGET_SIMD"
286 "mul\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
287 [(set_attr "type" "neon_mul_<Vetype><q>")]
288 )
289
290 (define_insn "bswap<mode>2"
291 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
292 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
293 "TARGET_SIMD"
294 "rev<Vrevsuff>\\t%0.<Vbtype>, %1.<Vbtype>"
295 [(set_attr "type" "neon_rev<q>")]
296 )
297
298 (define_insn "aarch64_rbit<mode>"
299 [(set (match_operand:VB 0 "register_operand" "=w")
300 (unspec:VB [(match_operand:VB 1 "register_operand" "w")]
301 UNSPEC_RBIT))]
302 "TARGET_SIMD"
303 "rbit\\t%0.<Vbtype>, %1.<Vbtype>"
304 [(set_attr "type" "neon_rbit")]
305 )
306
307 (define_expand "ctz<mode>2"
308 [(set (match_operand:VS 0 "register_operand")
309 (ctz:VS (match_operand:VS 1 "register_operand")))]
310 "TARGET_SIMD"
311 {
312 emit_insn (gen_bswap<mode>2 (operands[0], operands[1]));
313 rtx op0_castsi2qi = simplify_gen_subreg(<VS:VSI2QI>mode, operands[0],
314 <MODE>mode, 0);
315 emit_insn (gen_aarch64_rbit<VS:vsi2qi> (op0_castsi2qi, op0_castsi2qi));
316 emit_insn (gen_clz<mode>2 (operands[0], operands[0]));
317 DONE;
318 }
319 )
320
321 (define_insn "*aarch64_mul3_elt<mode>"
322 [(set (match_operand:VMUL 0 "register_operand" "=w")
323 (mult:VMUL
324 (vec_duplicate:VMUL
325 (vec_select:<VEL>
326 (match_operand:VMUL 1 "register_operand" "<h_con>")
327 (parallel [(match_operand:SI 2 "immediate_operand")])))
328 (match_operand:VMUL 3 "register_operand" "w")))]
329 "TARGET_SIMD"
330 {
331 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
332 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
333 }
334 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
335 )
336
337 (define_insn "*aarch64_mul3_elt_<vswap_width_name><mode>"
338 [(set (match_operand:VMUL_CHANGE_NLANES 0 "register_operand" "=w")
339 (mult:VMUL_CHANGE_NLANES
340 (vec_duplicate:VMUL_CHANGE_NLANES
341 (vec_select:<VEL>
342 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
343 (parallel [(match_operand:SI 2 "immediate_operand")])))
344 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
345 "TARGET_SIMD"
346 {
347 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
348 INTVAL (operands[2])));
349 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
350 }
351 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
352 )
353
354 (define_insn "*aarch64_mul3_elt_to_128df"
355 [(set (match_operand:V2DF 0 "register_operand" "=w")
356 (mult:V2DF
357 (vec_duplicate:V2DF
358 (match_operand:DF 2 "register_operand" "w"))
359 (match_operand:V2DF 1 "register_operand" "w")))]
360 "TARGET_SIMD"
361 "fmul\\t%0.2d, %1.2d, %2.d[0]"
362 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
363 )
364
365 (define_insn "*aarch64_mul3_elt_to_64v2df"
366 [(set (match_operand:DF 0 "register_operand" "=w")
367 (mult:DF
368 (vec_select:DF
369 (match_operand:V2DF 1 "register_operand" "w")
370 (parallel [(match_operand:SI 2 "immediate_operand")]))
371 (match_operand:DF 3 "register_operand" "w")))]
372 "TARGET_SIMD"
373 {
374 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
375 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
376 }
377 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
378 )
379
380 (define_insn "neg<mode>2"
381 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
382 (neg:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
383 "TARGET_SIMD"
384 "neg\t%0.<Vtype>, %1.<Vtype>"
385 [(set_attr "type" "neon_neg<q>")]
386 )
387
388 (define_insn "abs<mode>2"
389 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
390 (abs:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
391 "TARGET_SIMD"
392 "abs\t%0.<Vtype>, %1.<Vtype>"
393 [(set_attr "type" "neon_abs<q>")]
394 )
395
396 (define_insn "abd<mode>_3"
397 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
398 (abs:VDQ_BHSI (minus:VDQ_BHSI
399 (match_operand:VDQ_BHSI 1 "register_operand" "w")
400 (match_operand:VDQ_BHSI 2 "register_operand" "w"))))]
401 "TARGET_SIMD"
402 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
403 [(set_attr "type" "neon_abd<q>")]
404 )
405
406 (define_insn "aba<mode>_3"
407 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
408 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
409 (match_operand:VDQ_BHSI 1 "register_operand" "w")
410 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
411 (match_operand:VDQ_BHSI 3 "register_operand" "0")))]
412 "TARGET_SIMD"
413 "saba\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
414 [(set_attr "type" "neon_arith_acc<q>")]
415 )
416
417 (define_insn "fabd<mode>_3"
418 [(set (match_operand:VDQF 0 "register_operand" "=w")
419 (abs:VDQF (minus:VDQF
420 (match_operand:VDQF 1 "register_operand" "w")
421 (match_operand:VDQF 2 "register_operand" "w"))))]
422 "TARGET_SIMD"
423 "fabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
424 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
425 )
426
427 (define_insn "*fabd_scalar<mode>3"
428 [(set (match_operand:GPF 0 "register_operand" "=w")
429 (abs:GPF (minus:GPF
430 (match_operand:GPF 1 "register_operand" "w")
431 (match_operand:GPF 2 "register_operand" "w"))))]
432 "TARGET_SIMD"
433 "fabd\t%<s>0, %<s>1, %<s>2"
434 [(set_attr "type" "neon_fp_abd_<Vetype><q>")]
435 )
436
437 (define_insn "and<mode>3"
438 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
439 (and:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
440 (match_operand:VDQ_I 2 "register_operand" "w")))]
441 "TARGET_SIMD"
442 "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
443 [(set_attr "type" "neon_logic<q>")]
444 )
445
446 (define_insn "ior<mode>3"
447 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
448 (ior:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
449 (match_operand:VDQ_I 2 "register_operand" "w")))]
450 "TARGET_SIMD"
451 "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
452 [(set_attr "type" "neon_logic<q>")]
453 )
454
455 (define_insn "xor<mode>3"
456 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
457 (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
458 (match_operand:VDQ_I 2 "register_operand" "w")))]
459 "TARGET_SIMD"
460 "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
461 [(set_attr "type" "neon_logic<q>")]
462 )
463
464 (define_insn "one_cmpl<mode>2"
465 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
466 (not:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")))]
467 "TARGET_SIMD"
468 "not\t%0.<Vbtype>, %1.<Vbtype>"
469 [(set_attr "type" "neon_logic<q>")]
470 )
471
472 (define_insn "aarch64_simd_vec_set<mode>"
473 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w")
474 (vec_merge:VDQ_BHSI
475 (vec_duplicate:VDQ_BHSI
476 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv"))
477 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0")
478 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
479 "TARGET_SIMD"
480 {
481 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
482 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
483 switch (which_alternative)
484 {
485 case 0:
486 return "ins\\t%0.<Vetype>[%p2], %w1";
487 case 1:
488 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
489 case 2:
490 return "ld1\\t{%0.<Vetype>}[%p2], %1";
491 default:
492 gcc_unreachable ();
493 }
494 }
495 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_1reg<q>")]
496 )
497
498 (define_insn "aarch64_simd_lshr<mode>"
499 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
500 (lshiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
501 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
502 "TARGET_SIMD"
503 "ushr\t%0.<Vtype>, %1.<Vtype>, %2"
504 [(set_attr "type" "neon_shift_imm<q>")]
505 )
506
507 (define_insn "aarch64_simd_ashr<mode>"
508 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
509 (ashiftrt:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
510 (match_operand:VDQ_I 2 "aarch64_simd_rshift_imm" "Dr")))]
511 "TARGET_SIMD"
512 "sshr\t%0.<Vtype>, %1.<Vtype>, %2"
513 [(set_attr "type" "neon_shift_imm<q>")]
514 )
515
516 (define_insn "aarch64_simd_imm_shl<mode>"
517 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
518 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
519 (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm" "Dl")))]
520 "TARGET_SIMD"
521 "shl\t%0.<Vtype>, %1.<Vtype>, %2"
522 [(set_attr "type" "neon_shift_imm<q>")]
523 )
524
525 (define_insn "aarch64_simd_reg_sshl<mode>"
526 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
527 (ashift:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
528 (match_operand:VDQ_I 2 "register_operand" "w")))]
529 "TARGET_SIMD"
530 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
531 [(set_attr "type" "neon_shift_reg<q>")]
532 )
533
534 (define_insn "aarch64_simd_reg_shl<mode>_unsigned"
535 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
536 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
537 (match_operand:VDQ_I 2 "register_operand" "w")]
538 UNSPEC_ASHIFT_UNSIGNED))]
539 "TARGET_SIMD"
540 "ushl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
541 [(set_attr "type" "neon_shift_reg<q>")]
542 )
543
544 (define_insn "aarch64_simd_reg_shl<mode>_signed"
545 [(set (match_operand:VDQ_I 0 "register_operand" "=w")
546 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")
547 (match_operand:VDQ_I 2 "register_operand" "w")]
548 UNSPEC_ASHIFT_SIGNED))]
549 "TARGET_SIMD"
550 "sshl\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
551 [(set_attr "type" "neon_shift_reg<q>")]
552 )
553
554 (define_expand "ashl<mode>3"
555 [(match_operand:VDQ_I 0 "register_operand" "")
556 (match_operand:VDQ_I 1 "register_operand" "")
557 (match_operand:SI 2 "general_operand" "")]
558 "TARGET_SIMD"
559 {
560 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
561 int shift_amount;
562
563 if (CONST_INT_P (operands[2]))
564 {
565 shift_amount = INTVAL (operands[2]);
566 if (shift_amount >= 0 && shift_amount < bit_width)
567 {
568 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
569 shift_amount);
570 emit_insn (gen_aarch64_simd_imm_shl<mode> (operands[0],
571 operands[1],
572 tmp));
573 DONE;
574 }
575 else
576 {
577 operands[2] = force_reg (SImode, operands[2]);
578 }
579 }
580 else if (MEM_P (operands[2]))
581 {
582 operands[2] = force_reg (SImode, operands[2]);
583 }
584
585 if (REG_P (operands[2]))
586 {
587 rtx tmp = gen_reg_rtx (<MODE>mode);
588 emit_insn (gen_aarch64_simd_dup<mode> (tmp,
589 convert_to_mode (<VEL>mode,
590 operands[2],
591 0)));
592 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
593 tmp));
594 DONE;
595 }
596 else
597 FAIL;
598 }
599 )
600
601 (define_expand "lshr<mode>3"
602 [(match_operand:VDQ_I 0 "register_operand" "")
603 (match_operand:VDQ_I 1 "register_operand" "")
604 (match_operand:SI 2 "general_operand" "")]
605 "TARGET_SIMD"
606 {
607 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
608 int shift_amount;
609
610 if (CONST_INT_P (operands[2]))
611 {
612 shift_amount = INTVAL (operands[2]);
613 if (shift_amount > 0 && shift_amount <= bit_width)
614 {
615 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
616 shift_amount);
617 emit_insn (gen_aarch64_simd_lshr<mode> (operands[0],
618 operands[1],
619 tmp));
620 DONE;
621 }
622 else
623 operands[2] = force_reg (SImode, operands[2]);
624 }
625 else if (MEM_P (operands[2]))
626 {
627 operands[2] = force_reg (SImode, operands[2]);
628 }
629
630 if (REG_P (operands[2]))
631 {
632 rtx tmp = gen_reg_rtx (SImode);
633 rtx tmp1 = gen_reg_rtx (<MODE>mode);
634 emit_insn (gen_negsi2 (tmp, operands[2]));
635 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
636 convert_to_mode (<VEL>mode,
637 tmp, 0)));
638 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0],
639 operands[1],
640 tmp1));
641 DONE;
642 }
643 else
644 FAIL;
645 }
646 )
647
648 (define_expand "ashr<mode>3"
649 [(match_operand:VDQ_I 0 "register_operand" "")
650 (match_operand:VDQ_I 1 "register_operand" "")
651 (match_operand:SI 2 "general_operand" "")]
652 "TARGET_SIMD"
653 {
654 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
655 int shift_amount;
656
657 if (CONST_INT_P (operands[2]))
658 {
659 shift_amount = INTVAL (operands[2]);
660 if (shift_amount > 0 && shift_amount <= bit_width)
661 {
662 rtx tmp = aarch64_simd_gen_const_vector_dup (<MODE>mode,
663 shift_amount);
664 emit_insn (gen_aarch64_simd_ashr<mode> (operands[0],
665 operands[1],
666 tmp));
667 DONE;
668 }
669 else
670 operands[2] = force_reg (SImode, operands[2]);
671 }
672 else if (MEM_P (operands[2]))
673 {
674 operands[2] = force_reg (SImode, operands[2]);
675 }
676
677 if (REG_P (operands[2]))
678 {
679 rtx tmp = gen_reg_rtx (SImode);
680 rtx tmp1 = gen_reg_rtx (<MODE>mode);
681 emit_insn (gen_negsi2 (tmp, operands[2]));
682 emit_insn (gen_aarch64_simd_dup<mode> (tmp1,
683 convert_to_mode (<VEL>mode,
684 tmp, 0)));
685 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0],
686 operands[1],
687 tmp1));
688 DONE;
689 }
690 else
691 FAIL;
692 }
693 )
694
695 (define_expand "vashl<mode>3"
696 [(match_operand:VDQ_I 0 "register_operand" "")
697 (match_operand:VDQ_I 1 "register_operand" "")
698 (match_operand:VDQ_I 2 "register_operand" "")]
699 "TARGET_SIMD"
700 {
701 emit_insn (gen_aarch64_simd_reg_sshl<mode> (operands[0], operands[1],
702 operands[2]));
703 DONE;
704 })
705
706 ;; Using mode VDQ_BHSI as there is no V2DImode neg!
707 ;; Negating individual lanes most certainly offsets the
708 ;; gain from vectorization.
709 (define_expand "vashr<mode>3"
710 [(match_operand:VDQ_BHSI 0 "register_operand" "")
711 (match_operand:VDQ_BHSI 1 "register_operand" "")
712 (match_operand:VDQ_BHSI 2 "register_operand" "")]
713 "TARGET_SIMD"
714 {
715 rtx neg = gen_reg_rtx (<MODE>mode);
716 emit (gen_neg<mode>2 (neg, operands[2]));
717 emit_insn (gen_aarch64_simd_reg_shl<mode>_signed (operands[0], operands[1],
718 neg));
719 DONE;
720 })
721
722 ;; DI vector shift
723 (define_expand "aarch64_ashr_simddi"
724 [(match_operand:DI 0 "register_operand" "=w")
725 (match_operand:DI 1 "register_operand" "w")
726 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
727 "TARGET_SIMD"
728 {
729 /* An arithmetic shift right by 64 fills the result with copies of the sign
730 bit, just like asr by 63 - however the standard pattern does not handle
731 a shift by 64. */
732 if (INTVAL (operands[2]) == 64)
733 operands[2] = GEN_INT (63);
734 emit_insn (gen_ashrdi3 (operands[0], operands[1], operands[2]));
735 DONE;
736 }
737 )
738
739 (define_expand "vlshr<mode>3"
740 [(match_operand:VDQ_BHSI 0 "register_operand" "")
741 (match_operand:VDQ_BHSI 1 "register_operand" "")
742 (match_operand:VDQ_BHSI 2 "register_operand" "")]
743 "TARGET_SIMD"
744 {
745 rtx neg = gen_reg_rtx (<MODE>mode);
746 emit (gen_neg<mode>2 (neg, operands[2]));
747 emit_insn (gen_aarch64_simd_reg_shl<mode>_unsigned (operands[0], operands[1],
748 neg));
749 DONE;
750 })
751
752 (define_expand "aarch64_lshr_simddi"
753 [(match_operand:DI 0 "register_operand" "=w")
754 (match_operand:DI 1 "register_operand" "w")
755 (match_operand:SI 2 "aarch64_shift_imm64_di" "")]
756 "TARGET_SIMD"
757 {
758 if (INTVAL (operands[2]) == 64)
759 emit_insn (gen_aarch64_ushr_simddi (operands[0], operands[1]));
760 else
761 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
762 DONE;
763 }
764 )
765
766 ;; SIMD shift by 64. This pattern is a special case as standard pattern does
767 ;; not handle NEON shifts by 64.
768 (define_insn "aarch64_ushr_simddi"
769 [(set (match_operand:DI 0 "register_operand" "=w")
770 (unspec:DI
771 [(match_operand:DI 1 "register_operand" "w")] UNSPEC_USHR64))]
772 "TARGET_SIMD"
773 "ushr\t%d0, %d1, 64"
774 [(set_attr "type" "neon_shift_imm")]
775 )
776
777 (define_expand "vec_set<mode>"
778 [(match_operand:VDQ_BHSI 0 "register_operand")
779 (match_operand:<VEL> 1 "register_operand")
780 (match_operand:SI 2 "immediate_operand")]
781 "TARGET_SIMD"
782 {
783 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
784 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
785 GEN_INT (elem), operands[0]));
786 DONE;
787 }
788 )
789
790 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
791 (define_insn "vec_shr_<mode>"
792 [(set (match_operand:VD 0 "register_operand" "=w")
793 (lshiftrt:VD (match_operand:VD 1 "register_operand" "w")
794 (match_operand:SI 2 "immediate_operand" "i")))]
795 "TARGET_SIMD"
796 {
797 if (BYTES_BIG_ENDIAN)
798 return "ushl %d0, %d1, %2";
799 else
800 return "ushr %d0, %d1, %2";
801 }
802 [(set_attr "type" "neon_shift_imm")]
803 )
804
805 (define_insn "aarch64_simd_vec_setv2di"
806 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
807 (vec_merge:V2DI
808 (vec_duplicate:V2DI
809 (match_operand:DI 1 "register_operand" "r,w"))
810 (match_operand:V2DI 3 "register_operand" "0,0")
811 (match_operand:SI 2 "immediate_operand" "i,i")))]
812 "TARGET_SIMD"
813 {
814 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
815 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
816 switch (which_alternative)
817 {
818 case 0:
819 return "ins\\t%0.d[%p2], %1";
820 case 1:
821 return "ins\\t%0.d[%p2], %1.d[0]";
822 default:
823 gcc_unreachable ();
824 }
825 }
826 [(set_attr "type" "neon_from_gp, neon_ins_q")]
827 )
828
829 (define_expand "vec_setv2di"
830 [(match_operand:V2DI 0 "register_operand")
831 (match_operand:DI 1 "register_operand")
832 (match_operand:SI 2 "immediate_operand")]
833 "TARGET_SIMD"
834 {
835 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
836 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
837 GEN_INT (elem), operands[0]));
838 DONE;
839 }
840 )
841
842 (define_insn "aarch64_simd_vec_set<mode>"
843 [(set (match_operand:VDQF 0 "register_operand" "=w")
844 (vec_merge:VDQF
845 (vec_duplicate:VDQF
846 (match_operand:<VEL> 1 "register_operand" "w"))
847 (match_operand:VDQF 3 "register_operand" "0")
848 (match_operand:SI 2 "immediate_operand" "i")))]
849 "TARGET_SIMD"
850 {
851 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
852
853 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
854 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
855 }
856 [(set_attr "type" "neon_ins<q>")]
857 )
858
859 (define_expand "vec_set<mode>"
860 [(match_operand:VDQF 0 "register_operand" "+w")
861 (match_operand:<VEL> 1 "register_operand" "w")
862 (match_operand:SI 2 "immediate_operand" "")]
863 "TARGET_SIMD"
864 {
865 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
866 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
867 GEN_INT (elem), operands[0]));
868 DONE;
869 }
870 )
871
872
873 (define_insn "aarch64_mla<mode>"
874 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
875 (plus:VDQ_BHSI (mult:VDQ_BHSI
876 (match_operand:VDQ_BHSI 2 "register_operand" "w")
877 (match_operand:VDQ_BHSI 3 "register_operand" "w"))
878 (match_operand:VDQ_BHSI 1 "register_operand" "0")))]
879 "TARGET_SIMD"
880 "mla\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
881 [(set_attr "type" "neon_mla_<Vetype><q>")]
882 )
883
884 (define_insn "*aarch64_mla_elt<mode>"
885 [(set (match_operand:VDQHS 0 "register_operand" "=w")
886 (plus:VDQHS
887 (mult:VDQHS
888 (vec_duplicate:VDQHS
889 (vec_select:<VEL>
890 (match_operand:VDQHS 1 "register_operand" "<h_con>")
891 (parallel [(match_operand:SI 2 "immediate_operand")])))
892 (match_operand:VDQHS 3 "register_operand" "w"))
893 (match_operand:VDQHS 4 "register_operand" "0")))]
894 "TARGET_SIMD"
895 {
896 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
897 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
898 }
899 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
900 )
901
902 (define_insn "*aarch64_mla_elt_<vswap_width_name><mode>"
903 [(set (match_operand:VDQHS 0 "register_operand" "=w")
904 (plus:VDQHS
905 (mult:VDQHS
906 (vec_duplicate:VDQHS
907 (vec_select:<VEL>
908 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
909 (parallel [(match_operand:SI 2 "immediate_operand")])))
910 (match_operand:VDQHS 3 "register_operand" "w"))
911 (match_operand:VDQHS 4 "register_operand" "0")))]
912 "TARGET_SIMD"
913 {
914 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
915 INTVAL (operands[2])));
916 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
917 }
918 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
919 )
920
921 (define_insn "aarch64_mls<mode>"
922 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
923 (minus:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "0")
924 (mult:VDQ_BHSI (match_operand:VDQ_BHSI 2 "register_operand" "w")
925 (match_operand:VDQ_BHSI 3 "register_operand" "w"))))]
926 "TARGET_SIMD"
927 "mls\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
928 [(set_attr "type" "neon_mla_<Vetype><q>")]
929 )
930
931 (define_insn "*aarch64_mls_elt<mode>"
932 [(set (match_operand:VDQHS 0 "register_operand" "=w")
933 (minus:VDQHS
934 (match_operand:VDQHS 4 "register_operand" "0")
935 (mult:VDQHS
936 (vec_duplicate:VDQHS
937 (vec_select:<VEL>
938 (match_operand:VDQHS 1 "register_operand" "<h_con>")
939 (parallel [(match_operand:SI 2 "immediate_operand")])))
940 (match_operand:VDQHS 3 "register_operand" "w"))))]
941 "TARGET_SIMD"
942 {
943 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
944 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
945 }
946 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
947 )
948
949 (define_insn "*aarch64_mls_elt_<vswap_width_name><mode>"
950 [(set (match_operand:VDQHS 0 "register_operand" "=w")
951 (minus:VDQHS
952 (match_operand:VDQHS 4 "register_operand" "0")
953 (mult:VDQHS
954 (vec_duplicate:VDQHS
955 (vec_select:<VEL>
956 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
957 (parallel [(match_operand:SI 2 "immediate_operand")])))
958 (match_operand:VDQHS 3 "register_operand" "w"))))]
959 "TARGET_SIMD"
960 {
961 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
962 INTVAL (operands[2])));
963 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
964 }
965 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
966 )
967
968 ;; Max/Min operations.
969 (define_insn "<su><maxmin><mode>3"
970 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
971 (MAXMIN:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")
972 (match_operand:VDQ_BHSI 2 "register_operand" "w")))]
973 "TARGET_SIMD"
974 "<su><maxmin>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
975 [(set_attr "type" "neon_minmax<q>")]
976 )
977
978 (define_expand "<su><maxmin>v2di3"
979 [(set (match_operand:V2DI 0 "register_operand" "")
980 (MAXMIN:V2DI (match_operand:V2DI 1 "register_operand" "")
981 (match_operand:V2DI 2 "register_operand" "")))]
982 "TARGET_SIMD"
983 {
984 enum rtx_code cmp_operator;
985 rtx cmp_fmt;
986
987 switch (<CODE>)
988 {
989 case UMIN:
990 cmp_operator = LTU;
991 break;
992 case SMIN:
993 cmp_operator = LT;
994 break;
995 case UMAX:
996 cmp_operator = GTU;
997 break;
998 case SMAX:
999 cmp_operator = GT;
1000 break;
1001 default:
1002 gcc_unreachable ();
1003 }
1004
1005 cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
1006 emit_insn (gen_aarch64_vcond_internalv2div2di (operands[0], operands[1],
1007 operands[2], cmp_fmt, operands[1], operands[2]));
1008 DONE;
1009 })
1010
1011 ;; vec_concat gives a new vector with the low elements from operand 1, and
1012 ;; the high elements from operand 2. That is to say, given op1 = { a, b }
1013 ;; op2 = { c, d }, vec_concat (op1, op2) = { a, b, c, d }.
1014 ;; What that means, is that the RTL descriptions of the below patterns
1015 ;; need to change depending on endianness.
1016
1017 ;; Move to the low architectural bits of the register.
1018 ;; On little-endian this is { operand, zeroes }
1019 ;; On big-endian this is { zeroes, operand }
1020
1021 (define_insn "move_lo_quad_internal_<mode>"
1022 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1023 (vec_concat:VQ_NO2E
1024 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1025 (vec_duplicate:<VHALF> (const_int 0))))]
1026 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1027 "@
1028 dup\\t%d0, %1.d[0]
1029 fmov\\t%d0, %1
1030 dup\\t%d0, %1"
1031 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1032 (set_attr "simd" "yes,*,yes")
1033 (set_attr "fp" "*,yes,*")
1034 (set_attr "length" "4")]
1035 )
1036
1037 (define_insn "move_lo_quad_internal_<mode>"
1038 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1039 (vec_concat:VQ_2E
1040 (match_operand:<VHALF> 1 "register_operand" "w,r,r")
1041 (const_int 0)))]
1042 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1043 "@
1044 dup\\t%d0, %1.d[0]
1045 fmov\\t%d0, %1
1046 dup\\t%d0, %1"
1047 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1048 (set_attr "simd" "yes,*,yes")
1049 (set_attr "fp" "*,yes,*")
1050 (set_attr "length" "4")]
1051 )
1052
1053 (define_insn "move_lo_quad_internal_be_<mode>"
1054 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1055 (vec_concat:VQ_NO2E
1056 (vec_duplicate:<VHALF> (const_int 0))
1057 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1058 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1059 "@
1060 dup\\t%d0, %1.d[0]
1061 fmov\\t%d0, %1
1062 dup\\t%d0, %1"
1063 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1064 (set_attr "simd" "yes,*,yes")
1065 (set_attr "fp" "*,yes,*")
1066 (set_attr "length" "4")]
1067 )
1068
1069 (define_insn "move_lo_quad_internal_be_<mode>"
1070 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1071 (vec_concat:VQ_2E
1072 (const_int 0)
1073 (match_operand:<VHALF> 1 "register_operand" "w,r,r")))]
1074 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1075 "@
1076 dup\\t%d0, %1.d[0]
1077 fmov\\t%d0, %1
1078 dup\\t%d0, %1"
1079 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1080 (set_attr "simd" "yes,*,yes")
1081 (set_attr "fp" "*,yes,*")
1082 (set_attr "length" "4")]
1083 )
1084
1085 (define_expand "move_lo_quad_<mode>"
1086 [(match_operand:VQ 0 "register_operand")
1087 (match_operand:VQ 1 "register_operand")]
1088 "TARGET_SIMD"
1089 {
1090 if (BYTES_BIG_ENDIAN)
1091 emit_insn (gen_move_lo_quad_internal_be_<mode> (operands[0], operands[1]));
1092 else
1093 emit_insn (gen_move_lo_quad_internal_<mode> (operands[0], operands[1]));
1094 DONE;
1095 }
1096 )
1097
1098 ;; Move operand1 to the high architectural bits of the register, keeping
1099 ;; the low architectural bits of operand2.
1100 ;; For little-endian this is { operand2, operand1 }
1101 ;; For big-endian this is { operand1, operand2 }
1102
1103 (define_insn "aarch64_simd_move_hi_quad_<mode>"
1104 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1105 (vec_concat:VQ
1106 (vec_select:<VHALF>
1107 (match_dup 0)
1108 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))
1109 (match_operand:<VHALF> 1 "register_operand" "w,r")))]
1110 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
1111 "@
1112 ins\\t%0.d[1], %1.d[0]
1113 ins\\t%0.d[1], %1"
1114 [(set_attr "type" "neon_ins")]
1115 )
1116
1117 (define_insn "aarch64_simd_move_hi_quad_be_<mode>"
1118 [(set (match_operand:VQ 0 "register_operand" "+w,w")
1119 (vec_concat:VQ
1120 (match_operand:<VHALF> 1 "register_operand" "w,r")
1121 (vec_select:<VHALF>
1122 (match_dup 0)
1123 (match_operand:VQ 2 "vect_par_cnst_lo_half" ""))))]
1124 "TARGET_SIMD && BYTES_BIG_ENDIAN"
1125 "@
1126 ins\\t%0.d[1], %1.d[0]
1127 ins\\t%0.d[1], %1"
1128 [(set_attr "type" "neon_ins")]
1129 )
1130
1131 (define_expand "move_hi_quad_<mode>"
1132 [(match_operand:VQ 0 "register_operand" "")
1133 (match_operand:<VHALF> 1 "register_operand" "")]
1134 "TARGET_SIMD"
1135 {
1136 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1137 if (BYTES_BIG_ENDIAN)
1138 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1139 operands[1], p));
1140 else
1141 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1142 operands[1], p));
1143 DONE;
1144 })
1145
1146 ;; Narrowing operations.
1147
1148 ;; For doubles.
1149 (define_insn "aarch64_simd_vec_pack_trunc_<mode>"
1150 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
1151 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w")))]
1152 "TARGET_SIMD"
1153 "xtn\\t%0.<Vntype>, %1.<Vtype>"
1154 [(set_attr "type" "neon_shift_imm_narrow_q")]
1155 )
1156
1157 (define_expand "vec_pack_trunc_<mode>"
1158 [(match_operand:<VNARROWD> 0 "register_operand" "")
1159 (match_operand:VDN 1 "register_operand" "")
1160 (match_operand:VDN 2 "register_operand" "")]
1161 "TARGET_SIMD"
1162 {
1163 rtx tempreg = gen_reg_rtx (<VDBL>mode);
1164 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1165 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1166
1167 emit_insn (gen_move_lo_quad_<Vdbl> (tempreg, operands[lo]));
1168 emit_insn (gen_move_hi_quad_<Vdbl> (tempreg, operands[hi]));
1169 emit_insn (gen_aarch64_simd_vec_pack_trunc_<Vdbl> (operands[0], tempreg));
1170 DONE;
1171 })
1172
1173 ;; For quads.
1174
1175 (define_insn "vec_pack_trunc_<mode>"
1176 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
1177 (vec_concat:<VNARROWQ2>
1178 (truncate:<VNARROWQ> (match_operand:VQN 1 "register_operand" "w"))
1179 (truncate:<VNARROWQ> (match_operand:VQN 2 "register_operand" "w"))))]
1180 "TARGET_SIMD"
1181 {
1182 if (BYTES_BIG_ENDIAN)
1183 return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
1184 else
1185 return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
1186 }
1187 [(set_attr "type" "multiple")
1188 (set_attr "length" "8")]
1189 )
1190
1191 ;; Widening operations.
1192
1193 (define_insn "aarch64_simd_vec_unpack<su>_lo_<mode>"
1194 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1195 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1196 (match_operand:VQW 1 "register_operand" "w")
1197 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1198 )))]
1199 "TARGET_SIMD"
1200 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0"
1201 [(set_attr "type" "neon_shift_imm_long")]
1202 )
1203
1204 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1205 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1206 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1207 (match_operand:VQW 1 "register_operand" "w")
1208 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1209 )))]
1210 "TARGET_SIMD"
1211 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0"
1212 [(set_attr "type" "neon_shift_imm_long")]
1213 )
1214
1215 (define_expand "vec_unpack<su>_hi_<mode>"
1216 [(match_operand:<VWIDE> 0 "register_operand" "")
1217 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1218 "TARGET_SIMD"
1219 {
1220 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1221 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1222 operands[1], p));
1223 DONE;
1224 }
1225 )
1226
1227 (define_expand "vec_unpack<su>_lo_<mode>"
1228 [(match_operand:<VWIDE> 0 "register_operand" "")
1229 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1230 "TARGET_SIMD"
1231 {
1232 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1233 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1234 operands[1], p));
1235 DONE;
1236 }
1237 )
1238
1239 ;; Widening arithmetic.
1240
1241 (define_insn "*aarch64_<su>mlal_lo<mode>"
1242 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1243 (plus:<VWIDE>
1244 (mult:<VWIDE>
1245 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1246 (match_operand:VQW 2 "register_operand" "w")
1247 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1248 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1249 (match_operand:VQW 4 "register_operand" "w")
1250 (match_dup 3))))
1251 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1252 "TARGET_SIMD"
1253 "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1254 [(set_attr "type" "neon_mla_<Vetype>_long")]
1255 )
1256
1257 (define_insn "*aarch64_<su>mlal_hi<mode>"
1258 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1259 (plus:<VWIDE>
1260 (mult:<VWIDE>
1261 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1262 (match_operand:VQW 2 "register_operand" "w")
1263 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1264 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1265 (match_operand:VQW 4 "register_operand" "w")
1266 (match_dup 3))))
1267 (match_operand:<VWIDE> 1 "register_operand" "0")))]
1268 "TARGET_SIMD"
1269 "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1270 [(set_attr "type" "neon_mla_<Vetype>_long")]
1271 )
1272
1273 (define_insn "*aarch64_<su>mlsl_lo<mode>"
1274 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1275 (minus:<VWIDE>
1276 (match_operand:<VWIDE> 1 "register_operand" "0")
1277 (mult:<VWIDE>
1278 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1279 (match_operand:VQW 2 "register_operand" "w")
1280 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1281 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1282 (match_operand:VQW 4 "register_operand" "w")
1283 (match_dup 3))))))]
1284 "TARGET_SIMD"
1285 "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
1286 [(set_attr "type" "neon_mla_<Vetype>_long")]
1287 )
1288
1289 (define_insn "*aarch64_<su>mlsl_hi<mode>"
1290 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1291 (minus:<VWIDE>
1292 (match_operand:<VWIDE> 1 "register_operand" "0")
1293 (mult:<VWIDE>
1294 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1295 (match_operand:VQW 2 "register_operand" "w")
1296 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1297 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1298 (match_operand:VQW 4 "register_operand" "w")
1299 (match_dup 3))))))]
1300 "TARGET_SIMD"
1301 "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
1302 [(set_attr "type" "neon_mla_<Vetype>_long")]
1303 )
1304
1305 (define_insn "*aarch64_<su>mlal<mode>"
1306 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1307 (plus:<VWIDE>
1308 (mult:<VWIDE>
1309 (ANY_EXTEND:<VWIDE>
1310 (match_operand:VD_BHSI 1 "register_operand" "w"))
1311 (ANY_EXTEND:<VWIDE>
1312 (match_operand:VD_BHSI 2 "register_operand" "w")))
1313 (match_operand:<VWIDE> 3 "register_operand" "0")))]
1314 "TARGET_SIMD"
1315 "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1316 [(set_attr "type" "neon_mla_<Vetype>_long")]
1317 )
1318
1319 (define_insn "*aarch64_<su>mlsl<mode>"
1320 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1321 (minus:<VWIDE>
1322 (match_operand:<VWIDE> 1 "register_operand" "0")
1323 (mult:<VWIDE>
1324 (ANY_EXTEND:<VWIDE>
1325 (match_operand:VD_BHSI 2 "register_operand" "w"))
1326 (ANY_EXTEND:<VWIDE>
1327 (match_operand:VD_BHSI 3 "register_operand" "w")))))]
1328 "TARGET_SIMD"
1329 "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
1330 [(set_attr "type" "neon_mla_<Vetype>_long")]
1331 )
1332
1333 (define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
1334 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1335 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1336 (match_operand:VQW 1 "register_operand" "w")
1337 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
1338 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1339 (match_operand:VQW 2 "register_operand" "w")
1340 (match_dup 3)))))]
1341 "TARGET_SIMD"
1342 "<su>mull\\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
1343 [(set_attr "type" "neon_mul_<Vetype>_long")]
1344 )
1345
1346 (define_expand "vec_widen_<su>mult_lo_<mode>"
1347 [(match_operand:<VWIDE> 0 "register_operand" "")
1348 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1349 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1350 "TARGET_SIMD"
1351 {
1352 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false);
1353 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1354 operands[1],
1355 operands[2], p));
1356 DONE;
1357 }
1358 )
1359
1360 (define_insn "aarch64_simd_vec_<su>mult_hi_<mode>"
1361 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1362 (mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1363 (match_operand:VQW 1 "register_operand" "w")
1364 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
1365 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1366 (match_operand:VQW 2 "register_operand" "w")
1367 (match_dup 3)))))]
1368 "TARGET_SIMD"
1369 "<su>mull2\\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
1370 [(set_attr "type" "neon_mul_<Vetype>_long")]
1371 )
1372
1373 (define_expand "vec_widen_<su>mult_hi_<mode>"
1374 [(match_operand:<VWIDE> 0 "register_operand" "")
1375 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1376 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1377 "TARGET_SIMD"
1378 {
1379 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
1380 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1381 operands[1],
1382 operands[2], p));
1383 DONE;
1384
1385 }
1386 )
1387
1388 ;; FP vector operations.
1389 ;; AArch64 AdvSIMD supports single-precision (32-bit) and
1390 ;; double-precision (64-bit) floating-point data types and arithmetic as
1391 ;; defined by the IEEE 754-2008 standard. This makes them vectorizable
1392 ;; without the need for -ffast-math or -funsafe-math-optimizations.
1393 ;;
1394 ;; Floating-point operations can raise an exception. Vectorizing such
1395 ;; operations are safe because of reasons explained below.
1396 ;;
1397 ;; ARMv8 permits an extension to enable trapped floating-point
1398 ;; exception handling, however this is an optional feature. In the
1399 ;; event of a floating-point exception being raised by vectorised
1400 ;; code then:
1401 ;; 1. If trapped floating-point exceptions are available, then a trap
1402 ;; will be taken when any lane raises an enabled exception. A trap
1403 ;; handler may determine which lane raised the exception.
1404 ;; 2. Alternatively a sticky exception flag is set in the
1405 ;; floating-point status register (FPSR). Software may explicitly
1406 ;; test the exception flags, in which case the tests will either
1407 ;; prevent vectorisation, allowing precise identification of the
1408 ;; failing operation, or if tested outside of vectorisable regions
1409 ;; then the specific operation and lane are not of interest.
1410
1411 ;; FP arithmetic operations.
1412
1413 (define_insn "add<mode>3"
1414 [(set (match_operand:VDQF 0 "register_operand" "=w")
1415 (plus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1416 (match_operand:VDQF 2 "register_operand" "w")))]
1417 "TARGET_SIMD"
1418 "fadd\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1419 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1420 )
1421
1422 (define_insn "sub<mode>3"
1423 [(set (match_operand:VDQF 0 "register_operand" "=w")
1424 (minus:VDQF (match_operand:VDQF 1 "register_operand" "w")
1425 (match_operand:VDQF 2 "register_operand" "w")))]
1426 "TARGET_SIMD"
1427 "fsub\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1428 [(set_attr "type" "neon_fp_addsub_<Vetype><q>")]
1429 )
1430
1431 (define_insn "mul<mode>3"
1432 [(set (match_operand:VDQF 0 "register_operand" "=w")
1433 (mult:VDQF (match_operand:VDQF 1 "register_operand" "w")
1434 (match_operand:VDQF 2 "register_operand" "w")))]
1435 "TARGET_SIMD"
1436 "fmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1437 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
1438 )
1439
1440 (define_insn "div<mode>3"
1441 [(set (match_operand:VDQF 0 "register_operand" "=w")
1442 (div:VDQF (match_operand:VDQF 1 "register_operand" "w")
1443 (match_operand:VDQF 2 "register_operand" "w")))]
1444 "TARGET_SIMD"
1445 "fdiv\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1446 [(set_attr "type" "neon_fp_div_<Vetype><q>")]
1447 )
1448
1449 (define_insn "neg<mode>2"
1450 [(set (match_operand:VDQF 0 "register_operand" "=w")
1451 (neg:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1452 "TARGET_SIMD"
1453 "fneg\\t%0.<Vtype>, %1.<Vtype>"
1454 [(set_attr "type" "neon_fp_neg_<Vetype><q>")]
1455 )
1456
1457 (define_insn "abs<mode>2"
1458 [(set (match_operand:VDQF 0 "register_operand" "=w")
1459 (abs:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
1460 "TARGET_SIMD"
1461 "fabs\\t%0.<Vtype>, %1.<Vtype>"
1462 [(set_attr "type" "neon_fp_abs_<Vetype><q>")]
1463 )
1464
1465 (define_insn "fma<mode>4"
1466 [(set (match_operand:VDQF 0 "register_operand" "=w")
1467 (fma:VDQF (match_operand:VDQF 1 "register_operand" "w")
1468 (match_operand:VDQF 2 "register_operand" "w")
1469 (match_operand:VDQF 3 "register_operand" "0")))]
1470 "TARGET_SIMD"
1471 "fmla\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1472 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1473 )
1474
1475 (define_insn "*aarch64_fma4_elt<mode>"
1476 [(set (match_operand:VDQF 0 "register_operand" "=w")
1477 (fma:VDQF
1478 (vec_duplicate:VDQF
1479 (vec_select:<VEL>
1480 (match_operand:VDQF 1 "register_operand" "<h_con>")
1481 (parallel [(match_operand:SI 2 "immediate_operand")])))
1482 (match_operand:VDQF 3 "register_operand" "w")
1483 (match_operand:VDQF 4 "register_operand" "0")))]
1484 "TARGET_SIMD"
1485 {
1486 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1487 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1488 }
1489 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1490 )
1491
1492 (define_insn "*aarch64_fma4_elt_<vswap_width_name><mode>"
1493 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1494 (fma:VDQSF
1495 (vec_duplicate:VDQSF
1496 (vec_select:<VEL>
1497 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1498 (parallel [(match_operand:SI 2 "immediate_operand")])))
1499 (match_operand:VDQSF 3 "register_operand" "w")
1500 (match_operand:VDQSF 4 "register_operand" "0")))]
1501 "TARGET_SIMD"
1502 {
1503 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1504 INTVAL (operands[2])));
1505 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1506 }
1507 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1508 )
1509
1510 (define_insn "*aarch64_fma4_elt_to_128df"
1511 [(set (match_operand:V2DF 0 "register_operand" "=w")
1512 (fma:V2DF
1513 (vec_duplicate:V2DF
1514 (match_operand:DF 1 "register_operand" "w"))
1515 (match_operand:V2DF 2 "register_operand" "w")
1516 (match_operand:V2DF 3 "register_operand" "0")))]
1517 "TARGET_SIMD"
1518 "fmla\\t%0.2d, %2.2d, %1.2d[0]"
1519 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1520 )
1521
1522 (define_insn "*aarch64_fma4_elt_to_64v2df"
1523 [(set (match_operand:DF 0 "register_operand" "=w")
1524 (fma:DF
1525 (vec_select:DF
1526 (match_operand:V2DF 1 "register_operand" "w")
1527 (parallel [(match_operand:SI 2 "immediate_operand")]))
1528 (match_operand:DF 3 "register_operand" "w")
1529 (match_operand:DF 4 "register_operand" "0")))]
1530 "TARGET_SIMD"
1531 {
1532 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1533 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1534 }
1535 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1536 )
1537
1538 (define_insn "fnma<mode>4"
1539 [(set (match_operand:VDQF 0 "register_operand" "=w")
1540 (fma:VDQF
1541 (match_operand:VDQF 1 "register_operand" "w")
1542 (neg:VDQF
1543 (match_operand:VDQF 2 "register_operand" "w"))
1544 (match_operand:VDQF 3 "register_operand" "0")))]
1545 "TARGET_SIMD"
1546 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1547 [(set_attr "type" "neon_fp_mla_<Vetype><q>")]
1548 )
1549
1550 (define_insn "*aarch64_fnma4_elt<mode>"
1551 [(set (match_operand:VDQF 0 "register_operand" "=w")
1552 (fma:VDQF
1553 (neg:VDQF
1554 (match_operand:VDQF 3 "register_operand" "w"))
1555 (vec_duplicate:VDQF
1556 (vec_select:<VEL>
1557 (match_operand:VDQF 1 "register_operand" "<h_con>")
1558 (parallel [(match_operand:SI 2 "immediate_operand")])))
1559 (match_operand:VDQF 4 "register_operand" "0")))]
1560 "TARGET_SIMD"
1561 {
1562 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
1563 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1564 }
1565 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1566 )
1567
1568 (define_insn "*aarch64_fnma4_elt_<vswap_width_name><mode>"
1569 [(set (match_operand:VDQSF 0 "register_operand" "=w")
1570 (fma:VDQSF
1571 (neg:VDQSF
1572 (match_operand:VDQSF 3 "register_operand" "w"))
1573 (vec_duplicate:VDQSF
1574 (vec_select:<VEL>
1575 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1576 (parallel [(match_operand:SI 2 "immediate_operand")])))
1577 (match_operand:VDQSF 4 "register_operand" "0")))]
1578 "TARGET_SIMD"
1579 {
1580 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode,
1581 INTVAL (operands[2])));
1582 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1583 }
1584 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1585 )
1586
1587 (define_insn "*aarch64_fnma4_elt_to_128df"
1588 [(set (match_operand:V2DF 0 "register_operand" "=w")
1589 (fma:V2DF
1590 (neg:V2DF
1591 (match_operand:V2DF 2 "register_operand" "w"))
1592 (vec_duplicate:V2DF
1593 (match_operand:DF 1 "register_operand" "w"))
1594 (match_operand:V2DF 3 "register_operand" "0")))]
1595 "TARGET_SIMD"
1596 "fmls\\t%0.2d, %2.2d, %1.2d[0]"
1597 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1598 )
1599
1600 (define_insn "*aarch64_fnma4_elt_to_64v2df"
1601 [(set (match_operand:DF 0 "register_operand" "=w")
1602 (fma:DF
1603 (vec_select:DF
1604 (match_operand:V2DF 1 "register_operand" "w")
1605 (parallel [(match_operand:SI 2 "immediate_operand")]))
1606 (neg:DF
1607 (match_operand:DF 3 "register_operand" "w"))
1608 (match_operand:DF 4 "register_operand" "0")))]
1609 "TARGET_SIMD"
1610 {
1611 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2])));
1612 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1613 }
1614 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1615 )
1616
1617 ;; Vector versions of the floating-point frint patterns.
1618 ;; Expands to btrunc, ceil, floor, nearbyint, rint, round, frintn.
1619 (define_insn "<frint_pattern><mode>2"
1620 [(set (match_operand:VDQF 0 "register_operand" "=w")
1621 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1622 FRINT))]
1623 "TARGET_SIMD"
1624 "frint<frint_suffix>\\t%0.<Vtype>, %1.<Vtype>"
1625 [(set_attr "type" "neon_fp_round_<Vetype><q>")]
1626 )
1627
1628 ;; Vector versions of the fcvt standard patterns.
1629 ;; Expands to lbtrunc, lround, lceil, lfloor
1630 (define_insn "l<fcvt_pattern><su_optab><VDQF:mode><fcvt_target>2"
1631 [(set (match_operand:<FCVT_TARGET> 0 "register_operand" "=w")
1632 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1633 [(match_operand:VDQF 1 "register_operand" "w")]
1634 FCVT)))]
1635 "TARGET_SIMD"
1636 "fcvt<frint_suffix><su>\\t%0.<Vtype>, %1.<Vtype>"
1637 [(set_attr "type" "neon_fp_to_int_<Vetype><q>")]
1638 )
1639
1640 (define_expand "<optab><VDQF:mode><fcvt_target>2"
1641 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1642 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1643 [(match_operand:VDQF 1 "register_operand")]
1644 UNSPEC_FRINTZ)))]
1645 "TARGET_SIMD"
1646 {})
1647
1648 (define_expand "<fix_trunc_optab><VDQF:mode><fcvt_target>2"
1649 [(set (match_operand:<FCVT_TARGET> 0 "register_operand")
1650 (FIXUORS:<FCVT_TARGET> (unspec:<FCVT_TARGET>
1651 [(match_operand:VDQF 1 "register_operand")]
1652 UNSPEC_FRINTZ)))]
1653 "TARGET_SIMD"
1654 {})
1655
1656 (define_expand "ftrunc<VDQF:mode>2"
1657 [(set (match_operand:VDQF 0 "register_operand")
1658 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1659 UNSPEC_FRINTZ))]
1660 "TARGET_SIMD"
1661 {})
1662
1663 (define_insn "<optab><fcvt_target><VDQF:mode>2"
1664 [(set (match_operand:VDQF 0 "register_operand" "=w")
1665 (FLOATUORS:VDQF
1666 (match_operand:<FCVT_TARGET> 1 "register_operand" "w")))]
1667 "TARGET_SIMD"
1668 "<su_optab>cvtf\\t%0.<Vtype>, %1.<Vtype>"
1669 [(set_attr "type" "neon_int_to_fp_<Vetype><q>")]
1670 )
1671
1672 ;; Conversions between vectors of floats and doubles.
1673 ;; Contains a mix of patterns to match standard pattern names
1674 ;; and those for intrinsics.
1675
1676 ;; Float widening operations.
1677
1678 (define_insn "vec_unpacks_lo_v4sf"
1679 [(set (match_operand:V2DF 0 "register_operand" "=w")
1680 (float_extend:V2DF
1681 (vec_select:V2SF
1682 (match_operand:V4SF 1 "register_operand" "w")
1683 (parallel [(const_int 0) (const_int 1)])
1684 )))]
1685 "TARGET_SIMD"
1686 "fcvtl\\t%0.2d, %1.2s"
1687 [(set_attr "type" "neon_fp_cvt_widen_s")]
1688 )
1689
1690 (define_insn "aarch64_float_extend_lo_v2df"
1691 [(set (match_operand:V2DF 0 "register_operand" "=w")
1692 (float_extend:V2DF
1693 (match_operand:V2SF 1 "register_operand" "w")))]
1694 "TARGET_SIMD"
1695 "fcvtl\\t%0.2d, %1.2s"
1696 [(set_attr "type" "neon_fp_cvt_widen_s")]
1697 )
1698
1699 (define_insn "vec_unpacks_hi_v4sf"
1700 [(set (match_operand:V2DF 0 "register_operand" "=w")
1701 (float_extend:V2DF
1702 (vec_select:V2SF
1703 (match_operand:V4SF 1 "register_operand" "w")
1704 (parallel [(const_int 2) (const_int 3)])
1705 )))]
1706 "TARGET_SIMD"
1707 "fcvtl2\\t%0.2d, %1.4s"
1708 [(set_attr "type" "neon_fp_cvt_widen_s")]
1709 )
1710
1711 ;; Float narrowing operations.
1712
1713 (define_insn "aarch64_float_truncate_lo_v2sf"
1714 [(set (match_operand:V2SF 0 "register_operand" "=w")
1715 (float_truncate:V2SF
1716 (match_operand:V2DF 1 "register_operand" "w")))]
1717 "TARGET_SIMD"
1718 "fcvtn\\t%0.2s, %1.2d"
1719 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1720 )
1721
1722 (define_insn "aarch64_float_truncate_hi_v4sf"
1723 [(set (match_operand:V4SF 0 "register_operand" "=w")
1724 (vec_concat:V4SF
1725 (match_operand:V2SF 1 "register_operand" "0")
1726 (float_truncate:V2SF
1727 (match_operand:V2DF 2 "register_operand" "w"))))]
1728 "TARGET_SIMD"
1729 "fcvtn2\\t%0.4s, %2.2d"
1730 [(set_attr "type" "neon_fp_cvt_narrow_d_q")]
1731 )
1732
1733 (define_expand "vec_pack_trunc_v2df"
1734 [(set (match_operand:V4SF 0 "register_operand")
1735 (vec_concat:V4SF
1736 (float_truncate:V2SF
1737 (match_operand:V2DF 1 "register_operand"))
1738 (float_truncate:V2SF
1739 (match_operand:V2DF 2 "register_operand"))
1740 ))]
1741 "TARGET_SIMD"
1742 {
1743 rtx tmp = gen_reg_rtx (V2SFmode);
1744 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1745 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1746
1747 emit_insn (gen_aarch64_float_truncate_lo_v2sf (tmp, operands[lo]));
1748 emit_insn (gen_aarch64_float_truncate_hi_v4sf (operands[0],
1749 tmp, operands[hi]));
1750 DONE;
1751 }
1752 )
1753
1754 (define_expand "vec_pack_trunc_df"
1755 [(set (match_operand:V2SF 0 "register_operand")
1756 (vec_concat:V2SF
1757 (float_truncate:SF
1758 (match_operand:DF 1 "register_operand"))
1759 (float_truncate:SF
1760 (match_operand:DF 2 "register_operand"))
1761 ))]
1762 "TARGET_SIMD"
1763 {
1764 rtx tmp = gen_reg_rtx (V2SFmode);
1765 int lo = BYTES_BIG_ENDIAN ? 2 : 1;
1766 int hi = BYTES_BIG_ENDIAN ? 1 : 2;
1767
1768 emit_insn (gen_move_lo_quad_v2df (tmp, operands[lo]));
1769 emit_insn (gen_move_hi_quad_v2df (tmp, operands[hi]));
1770 emit_insn (gen_aarch64_float_truncate_lo_v2sf (operands[0], tmp));
1771 DONE;
1772 }
1773 )
1774
1775 (define_insn "aarch64_vmls<mode>"
1776 [(set (match_operand:VDQF 0 "register_operand" "=w")
1777 (minus:VDQF (match_operand:VDQF 1 "register_operand" "0")
1778 (mult:VDQF (match_operand:VDQF 2 "register_operand" "w")
1779 (match_operand:VDQF 3 "register_operand" "w"))))]
1780 "TARGET_SIMD"
1781 "fmls\\t%0.<Vtype>, %2.<Vtype>, %3.<Vtype>"
1782 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1783 )
1784
1785 ;; FP Max/Min
1786 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
1787 ;; expression like:
1788 ;; a = (b < c) ? b : c;
1789 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled
1790 ;; either explicitly or indirectly via -ffast-math.
1791 ;;
1792 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
1793 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
1794 ;; operand will be returned when both operands are zero (i.e. they may not
1795 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
1796 ;; only introduces MIN_EXPR/MAX_EXPR in fast math mode or when not honouring
1797 ;; NaNs.
1798
1799 (define_insn "<su><maxmin><mode>3"
1800 [(set (match_operand:VDQF 0 "register_operand" "=w")
1801 (FMAXMIN:VDQF (match_operand:VDQF 1 "register_operand" "w")
1802 (match_operand:VDQF 2 "register_operand" "w")))]
1803 "TARGET_SIMD"
1804 "f<maxmin>nm\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1805 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1806 )
1807
1808 (define_insn "<maxmin_uns><mode>3"
1809 [(set (match_operand:VDQF 0 "register_operand" "=w")
1810 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")
1811 (match_operand:VDQF 2 "register_operand" "w")]
1812 FMAXMIN_UNS))]
1813 "TARGET_SIMD"
1814 "<maxmin_uns_op>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1815 [(set_attr "type" "neon_fp_minmax_<Vetype><q>")]
1816 )
1817
1818 ;; 'across lanes' add.
1819
1820 (define_expand "reduc_plus_scal_<mode>"
1821 [(match_operand:<VEL> 0 "register_operand" "=w")
1822 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
1823 UNSPEC_ADDV)]
1824 "TARGET_SIMD"
1825 {
1826 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1827 rtx scratch = gen_reg_rtx (<MODE>mode);
1828 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
1829 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1830 DONE;
1831 }
1832 )
1833
1834 (define_expand "reduc_plus_scal_<mode>"
1835 [(match_operand:<VEL> 0 "register_operand" "=w")
1836 (match_operand:V2F 1 "register_operand" "w")]
1837 "TARGET_SIMD"
1838 {
1839 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1840 rtx scratch = gen_reg_rtx (<MODE>mode);
1841 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
1842 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1843 DONE;
1844 }
1845 )
1846
1847 (define_insn "aarch64_reduc_plus_internal<mode>"
1848 [(set (match_operand:VDQV 0 "register_operand" "=w")
1849 (unspec:VDQV [(match_operand:VDQV 1 "register_operand" "w")]
1850 UNSPEC_ADDV))]
1851 "TARGET_SIMD"
1852 "add<VDQV:vp>\\t%<Vetype>0, %1.<Vtype>"
1853 [(set_attr "type" "neon_reduc_add<q>")]
1854 )
1855
1856 (define_insn "aarch64_reduc_plus_internalv2si"
1857 [(set (match_operand:V2SI 0 "register_operand" "=w")
1858 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1859 UNSPEC_ADDV))]
1860 "TARGET_SIMD"
1861 "addp\\t%0.2s, %1.2s, %1.2s"
1862 [(set_attr "type" "neon_reduc_add")]
1863 )
1864
1865 (define_insn "aarch64_reduc_plus_internal<mode>"
1866 [(set (match_operand:V2F 0 "register_operand" "=w")
1867 (unspec:V2F [(match_operand:V2F 1 "register_operand" "w")]
1868 UNSPEC_FADDV))]
1869 "TARGET_SIMD"
1870 "faddp\\t%<Vetype>0, %1.<Vtype>"
1871 [(set_attr "type" "neon_fp_reduc_add_<Vetype><q>")]
1872 )
1873
1874 (define_insn "aarch64_addpv4sf"
1875 [(set (match_operand:V4SF 0 "register_operand" "=w")
1876 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "w")]
1877 UNSPEC_FADDV))]
1878 "TARGET_SIMD"
1879 "faddp\\t%0.4s, %1.4s, %1.4s"
1880 [(set_attr "type" "neon_fp_reduc_add_s_q")]
1881 )
1882
1883 (define_expand "reduc_plus_scal_v4sf"
1884 [(set (match_operand:SF 0 "register_operand")
1885 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
1886 UNSPEC_FADDV))]
1887 "TARGET_SIMD"
1888 {
1889 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0));
1890 rtx scratch = gen_reg_rtx (V4SFmode);
1891 emit_insn (gen_aarch64_addpv4sf (scratch, operands[1]));
1892 emit_insn (gen_aarch64_addpv4sf (scratch, scratch));
1893 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
1894 DONE;
1895 })
1896
1897 (define_insn "clrsb<mode>2"
1898 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1899 (clrsb:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
1900 "TARGET_SIMD"
1901 "cls\\t%0.<Vtype>, %1.<Vtype>"
1902 [(set_attr "type" "neon_cls<q>")]
1903 )
1904
1905 (define_insn "clz<mode>2"
1906 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
1907 (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
1908 "TARGET_SIMD"
1909 "clz\\t%0.<Vtype>, %1.<Vtype>"
1910 [(set_attr "type" "neon_cls<q>")]
1911 )
1912
1913 (define_insn "popcount<mode>2"
1914 [(set (match_operand:VB 0 "register_operand" "=w")
1915 (popcount:VB (match_operand:VB 1 "register_operand" "w")))]
1916 "TARGET_SIMD"
1917 "cnt\\t%0.<Vbtype>, %1.<Vbtype>"
1918 [(set_attr "type" "neon_cnt<q>")]
1919 )
1920
1921 ;; 'across lanes' max and min ops.
1922
1923 ;; Template for outputting a scalar, so we can create __builtins which can be
1924 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin).
1925 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
1926 [(match_operand:<VEL> 0 "register_operand")
1927 (unspec:VDQF [(match_operand:VDQF 1 "register_operand")]
1928 FMAXMINV)]
1929 "TARGET_SIMD"
1930 {
1931 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1932 rtx scratch = gen_reg_rtx (<MODE>mode);
1933 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
1934 operands[1]));
1935 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1936 DONE;
1937 }
1938 )
1939
1940 ;; Likewise for integer cases, signed and unsigned.
1941 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
1942 [(match_operand:<VEL> 0 "register_operand")
1943 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
1944 MAXMINV)]
1945 "TARGET_SIMD"
1946 {
1947 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0));
1948 rtx scratch = gen_reg_rtx (<MODE>mode);
1949 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
1950 operands[1]));
1951 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
1952 DONE;
1953 }
1954 )
1955
1956 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
1957 [(set (match_operand:VDQV_S 0 "register_operand" "=w")
1958 (unspec:VDQV_S [(match_operand:VDQV_S 1 "register_operand" "w")]
1959 MAXMINV))]
1960 "TARGET_SIMD"
1961 "<maxmin_uns_op>v\\t%<Vetype>0, %1.<Vtype>"
1962 [(set_attr "type" "neon_reduc_minmax<q>")]
1963 )
1964
1965 (define_insn "aarch64_reduc_<maxmin_uns>_internalv2si"
1966 [(set (match_operand:V2SI 0 "register_operand" "=w")
1967 (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "w")]
1968 MAXMINV))]
1969 "TARGET_SIMD"
1970 "<maxmin_uns_op>p\\t%0.2s, %1.2s, %1.2s"
1971 [(set_attr "type" "neon_reduc_minmax")]
1972 )
1973
1974 (define_insn "aarch64_reduc_<maxmin_uns>_internal<mode>"
1975 [(set (match_operand:VDQF 0 "register_operand" "=w")
1976 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
1977 FMAXMINV))]
1978 "TARGET_SIMD"
1979 "<maxmin_uns_op><vp>\\t%<Vetype>0, %1.<Vtype>"
1980 [(set_attr "type" "neon_fp_reduc_minmax_<Vetype><q>")]
1981 )
1982
1983 ;; aarch64_simd_bsl may compile to any of bsl/bif/bit depending on register
1984 ;; allocation.
1985 ;; Operand 1 is the mask, operands 2 and 3 are the bitfields from which
1986 ;; to select.
1987 ;;
1988 ;; Thus our BSL is of the form:
1989 ;; op0 = bsl (mask, op2, op3)
1990 ;; We can use any of:
1991 ;;
1992 ;; if (op0 = mask)
1993 ;; bsl mask, op1, op2
1994 ;; if (op0 = op1) (so 1-bits in mask choose bits from op2, else op0)
1995 ;; bit op0, op2, mask
1996 ;; if (op0 = op2) (so 0-bits in mask choose bits from op1, else op0)
1997 ;; bif op0, op1, mask
1998
1999 (define_insn "aarch64_simd_bsl<mode>_internal"
2000 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w")
2001 (ior:VSDQ_I_DI
2002 (and:VSDQ_I_DI
2003 (not:<V_cmp_result>
2004 (match_operand:<V_cmp_result> 1 "register_operand" " 0,w,w"))
2005 (match_operand:VSDQ_I_DI 3 "register_operand" " w,0,w"))
2006 (and:VSDQ_I_DI
2007 (match_dup:<V_cmp_result> 1)
2008 (match_operand:VSDQ_I_DI 2 "register_operand" " w,w,0"))
2009 ))]
2010 "TARGET_SIMD"
2011 "@
2012 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2013 bit\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
2014 bif\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>"
2015 [(set_attr "type" "neon_bsl<q>")]
2016 )
2017
2018 (define_expand "aarch64_simd_bsl<mode>"
2019 [(match_operand:VALLDIF 0 "register_operand")
2020 (match_operand:<V_cmp_result> 1 "register_operand")
2021 (match_operand:VALLDIF 2 "register_operand")
2022 (match_operand:VALLDIF 3 "register_operand")]
2023 "TARGET_SIMD"
2024 {
2025 /* We can't alias operands together if they have different modes. */
2026 rtx tmp = operands[0];
2027 if (FLOAT_MODE_P (<MODE>mode))
2028 {
2029 operands[2] = gen_lowpart (<V_cmp_result>mode, operands[2]);
2030 operands[3] = gen_lowpart (<V_cmp_result>mode, operands[3]);
2031 tmp = gen_reg_rtx (<V_cmp_result>mode);
2032 }
2033 operands[1] = gen_lowpart (<V_cmp_result>mode, operands[1]);
2034 emit_insn (gen_aarch64_simd_bsl<v_cmp_result>_internal (tmp,
2035 operands[1],
2036 operands[2],
2037 operands[3]));
2038 if (tmp != operands[0])
2039 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
2040
2041 DONE;
2042 })
2043
2044 (define_expand "aarch64_vcond_internal<mode><mode>"
2045 [(set (match_operand:VDQ_I 0 "register_operand")
2046 (if_then_else:VDQ_I
2047 (match_operator 3 "comparison_operator"
2048 [(match_operand:VDQ_I 4 "register_operand")
2049 (match_operand:VDQ_I 5 "nonmemory_operand")])
2050 (match_operand:VDQ_I 1 "nonmemory_operand")
2051 (match_operand:VDQ_I 2 "nonmemory_operand")))]
2052 "TARGET_SIMD"
2053 {
2054 rtx op1 = operands[1];
2055 rtx op2 = operands[2];
2056 rtx mask = gen_reg_rtx (<MODE>mode);
2057 enum rtx_code code = GET_CODE (operands[3]);
2058
2059 /* Switching OP1 and OP2 is necessary for NE (to output a cmeq insn),
2060 and desirable for other comparisons if it results in FOO ? -1 : 0
2061 (this allows direct use of the comparison result without a bsl). */
2062 if (code == NE
2063 || (code != EQ
2064 && op1 == CONST0_RTX (<V_cmp_result>mode)
2065 && op2 == CONSTM1_RTX (<V_cmp_result>mode)))
2066 {
2067 op1 = operands[2];
2068 op2 = operands[1];
2069 switch (code)
2070 {
2071 case LE: code = GT; break;
2072 case LT: code = GE; break;
2073 case GE: code = LT; break;
2074 case GT: code = LE; break;
2075 /* No case EQ. */
2076 case NE: code = EQ; break;
2077 case LTU: code = GEU; break;
2078 case LEU: code = GTU; break;
2079 case GTU: code = LEU; break;
2080 case GEU: code = LTU; break;
2081 default: gcc_unreachable ();
2082 }
2083 }
2084
2085 /* Make sure we can handle the last operand. */
2086 switch (code)
2087 {
2088 case NE:
2089 /* Normalized to EQ above. */
2090 gcc_unreachable ();
2091
2092 case LE:
2093 case LT:
2094 case GE:
2095 case GT:
2096 case EQ:
2097 /* These instructions have a form taking an immediate zero. */
2098 if (operands[5] == CONST0_RTX (<MODE>mode))
2099 break;
2100 /* Fall through, as may need to load into register. */
2101 default:
2102 if (!REG_P (operands[5]))
2103 operands[5] = force_reg (<MODE>mode, operands[5]);
2104 break;
2105 }
2106
2107 switch (code)
2108 {
2109 case LT:
2110 emit_insn (gen_aarch64_cmlt<mode> (mask, operands[4], operands[5]));
2111 break;
2112
2113 case GE:
2114 emit_insn (gen_aarch64_cmge<mode> (mask, operands[4], operands[5]));
2115 break;
2116
2117 case LE:
2118 emit_insn (gen_aarch64_cmle<mode> (mask, operands[4], operands[5]));
2119 break;
2120
2121 case GT:
2122 emit_insn (gen_aarch64_cmgt<mode> (mask, operands[4], operands[5]));
2123 break;
2124
2125 case LTU:
2126 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[5], operands[4]));
2127 break;
2128
2129 case GEU:
2130 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[4], operands[5]));
2131 break;
2132
2133 case LEU:
2134 emit_insn (gen_aarch64_cmgeu<mode> (mask, operands[5], operands[4]));
2135 break;
2136
2137 case GTU:
2138 emit_insn (gen_aarch64_cmgtu<mode> (mask, operands[4], operands[5]));
2139 break;
2140
2141 /* NE has been normalized to EQ above. */
2142 case EQ:
2143 emit_insn (gen_aarch64_cmeq<mode> (mask, operands[4], operands[5]));
2144 break;
2145
2146 default:
2147 gcc_unreachable ();
2148 }
2149
2150 /* If we have (a = (b CMP c) ? -1 : 0);
2151 Then we can simply move the generated mask. */
2152
2153 if (op1 == CONSTM1_RTX (<V_cmp_result>mode)
2154 && op2 == CONST0_RTX (<V_cmp_result>mode))
2155 emit_move_insn (operands[0], mask);
2156 else
2157 {
2158 if (!REG_P (op1))
2159 op1 = force_reg (<MODE>mode, op1);
2160 if (!REG_P (op2))
2161 op2 = force_reg (<MODE>mode, op2);
2162 emit_insn (gen_aarch64_simd_bsl<mode> (operands[0], mask,
2163 op1, op2));
2164 }
2165
2166 DONE;
2167 })
2168
2169 (define_expand "aarch64_vcond_internal<VDQF_COND:mode><VDQF:mode>"
2170 [(set (match_operand:VDQF_COND 0 "register_operand")
2171 (if_then_else:VDQF
2172 (match_operator 3 "comparison_operator"
2173 [(match_operand:VDQF 4 "register_operand")
2174 (match_operand:VDQF 5 "nonmemory_operand")])
2175 (match_operand:VDQF_COND 1 "nonmemory_operand")
2176 (match_operand:VDQF_COND 2 "nonmemory_operand")))]
2177 "TARGET_SIMD"
2178 {
2179 int inverse = 0;
2180 int use_zero_form = 0;
2181 int swap_bsl_operands = 0;
2182 rtx op1 = operands[1];
2183 rtx op2 = operands[2];
2184 rtx mask = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2185 rtx tmp = gen_reg_rtx (<VDQF_COND:V_cmp_result>mode);
2186
2187 rtx (*base_comparison) (rtx, rtx, rtx);
2188 rtx (*complimentary_comparison) (rtx, rtx, rtx);
2189
2190 switch (GET_CODE (operands[3]))
2191 {
2192 case GE:
2193 case GT:
2194 case LE:
2195 case LT:
2196 case EQ:
2197 if (operands[5] == CONST0_RTX (<MODE>mode))
2198 {
2199 use_zero_form = 1;
2200 break;
2201 }
2202 /* Fall through. */
2203 default:
2204 if (!REG_P (operands[5]))
2205 operands[5] = force_reg (<VDQF:MODE>mode, operands[5]);
2206 }
2207
2208 switch (GET_CODE (operands[3]))
2209 {
2210 case LT:
2211 case UNLT:
2212 inverse = 1;
2213 /* Fall through. */
2214 case GE:
2215 case UNGE:
2216 case ORDERED:
2217 case UNORDERED:
2218 base_comparison = gen_aarch64_cmge<VDQF:mode>;
2219 complimentary_comparison = gen_aarch64_cmgt<VDQF:mode>;
2220 break;
2221 case LE:
2222 case UNLE:
2223 inverse = 1;
2224 /* Fall through. */
2225 case GT:
2226 case UNGT:
2227 base_comparison = gen_aarch64_cmgt<VDQF:mode>;
2228 complimentary_comparison = gen_aarch64_cmge<VDQF:mode>;
2229 break;
2230 case EQ:
2231 case NE:
2232 case UNEQ:
2233 base_comparison = gen_aarch64_cmeq<VDQF:mode>;
2234 complimentary_comparison = gen_aarch64_cmeq<VDQF:mode>;
2235 break;
2236 default:
2237 gcc_unreachable ();
2238 }
2239
2240 switch (GET_CODE (operands[3]))
2241 {
2242 case LT:
2243 case LE:
2244 case GT:
2245 case GE:
2246 case EQ:
2247 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2248 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2249 a GE b -> a GE b
2250 a GT b -> a GT b
2251 a LE b -> b GE a
2252 a LT b -> b GT a
2253 a EQ b -> a EQ b
2254 Note that there also exist direct comparison against 0 forms,
2255 so catch those as a special case. */
2256 if (use_zero_form)
2257 {
2258 inverse = 0;
2259 switch (GET_CODE (operands[3]))
2260 {
2261 case LT:
2262 base_comparison = gen_aarch64_cmlt<VDQF:mode>;
2263 break;
2264 case LE:
2265 base_comparison = gen_aarch64_cmle<VDQF:mode>;
2266 break;
2267 default:
2268 /* Do nothing, other zero form cases already have the correct
2269 base_comparison. */
2270 break;
2271 }
2272 }
2273
2274 if (!inverse)
2275 emit_insn (base_comparison (mask, operands[4], operands[5]));
2276 else
2277 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2278 break;
2279 case UNLT:
2280 case UNLE:
2281 case UNGT:
2282 case UNGE:
2283 case NE:
2284 /* FCM returns false for lanes which are unordered, so if we use
2285 the inverse of the comparison we actually want to emit, then
2286 swap the operands to BSL, we will end up with the correct result.
2287 Note that a NE NaN and NaN NE b are true for all a, b.
2288
2289 Our transformations are:
2290 a GE b -> !(b GT a)
2291 a GT b -> !(b GE a)
2292 a LE b -> !(a GT b)
2293 a LT b -> !(a GE b)
2294 a NE b -> !(a EQ b) */
2295
2296 if (inverse)
2297 emit_insn (base_comparison (mask, operands[4], operands[5]));
2298 else
2299 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
2300
2301 swap_bsl_operands = 1;
2302 break;
2303 case UNEQ:
2304 /* We check (a > b || b > a). combining these comparisons give us
2305 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2306 will then give us (a == b || a UNORDERED b) as intended. */
2307
2308 emit_insn (gen_aarch64_cmgt<VDQF:mode> (mask, operands[4], operands[5]));
2309 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[5], operands[4]));
2310 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2311 swap_bsl_operands = 1;
2312 break;
2313 case UNORDERED:
2314 /* Operands are ORDERED iff (a > b || b >= a).
2315 Swapping the operands to BSL will give the UNORDERED case. */
2316 swap_bsl_operands = 1;
2317 /* Fall through. */
2318 case ORDERED:
2319 emit_insn (gen_aarch64_cmgt<VDQF:mode> (tmp, operands[4], operands[5]));
2320 emit_insn (gen_aarch64_cmge<VDQF:mode> (mask, operands[5], operands[4]));
2321 emit_insn (gen_ior<VDQF_COND:v_cmp_result>3 (mask, mask, tmp));
2322 break;
2323 default:
2324 gcc_unreachable ();
2325 }
2326
2327 if (swap_bsl_operands)
2328 {
2329 op1 = operands[2];
2330 op2 = operands[1];
2331 }
2332
2333 /* If we have (a = (b CMP c) ? -1 : 0);
2334 Then we can simply move the generated mask. */
2335
2336 if (op1 == CONSTM1_RTX (<VDQF_COND:V_cmp_result>mode)
2337 && op2 == CONST0_RTX (<VDQF_COND:V_cmp_result>mode))
2338 emit_move_insn (operands[0], mask);
2339 else
2340 {
2341 if (!REG_P (op1))
2342 op1 = force_reg (<VDQF_COND:MODE>mode, op1);
2343 if (!REG_P (op2))
2344 op2 = force_reg (<VDQF_COND:MODE>mode, op2);
2345 emit_insn (gen_aarch64_simd_bsl<VDQF_COND:mode> (operands[0], mask,
2346 op1, op2));
2347 }
2348
2349 DONE;
2350 })
2351
2352 (define_expand "vcond<mode><mode>"
2353 [(set (match_operand:VALL 0 "register_operand")
2354 (if_then_else:VALL
2355 (match_operator 3 "comparison_operator"
2356 [(match_operand:VALL 4 "register_operand")
2357 (match_operand:VALL 5 "nonmemory_operand")])
2358 (match_operand:VALL 1 "nonmemory_operand")
2359 (match_operand:VALL 2 "nonmemory_operand")))]
2360 "TARGET_SIMD"
2361 {
2362 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2363 operands[2], operands[3],
2364 operands[4], operands[5]));
2365 DONE;
2366 })
2367
2368 (define_expand "vcond<v_cmp_result><mode>"
2369 [(set (match_operand:<V_cmp_result> 0 "register_operand")
2370 (if_then_else:<V_cmp_result>
2371 (match_operator 3 "comparison_operator"
2372 [(match_operand:VDQF 4 "register_operand")
2373 (match_operand:VDQF 5 "nonmemory_operand")])
2374 (match_operand:<V_cmp_result> 1 "nonmemory_operand")
2375 (match_operand:<V_cmp_result> 2 "nonmemory_operand")))]
2376 "TARGET_SIMD"
2377 {
2378 emit_insn (gen_aarch64_vcond_internal<v_cmp_result><mode> (
2379 operands[0], operands[1],
2380 operands[2], operands[3],
2381 operands[4], operands[5]));
2382 DONE;
2383 })
2384
2385 (define_expand "vcondu<mode><mode>"
2386 [(set (match_operand:VDQ_I 0 "register_operand")
2387 (if_then_else:VDQ_I
2388 (match_operator 3 "comparison_operator"
2389 [(match_operand:VDQ_I 4 "register_operand")
2390 (match_operand:VDQ_I 5 "nonmemory_operand")])
2391 (match_operand:VDQ_I 1 "nonmemory_operand")
2392 (match_operand:VDQ_I 2 "nonmemory_operand")))]
2393 "TARGET_SIMD"
2394 {
2395 emit_insn (gen_aarch64_vcond_internal<mode><mode> (operands[0], operands[1],
2396 operands[2], operands[3],
2397 operands[4], operands[5]));
2398 DONE;
2399 })
2400
2401 ;; Patterns for AArch64 SIMD Intrinsics.
2402
2403 ;; Lane extraction with sign extension to general purpose register.
2404 (define_insn "*aarch64_get_lane_extend<GPI:mode><VDQQH:mode>"
2405 [(set (match_operand:GPI 0 "register_operand" "=r")
2406 (sign_extend:GPI
2407 (vec_select:<VEL>
2408 (match_operand:VDQQH 1 "register_operand" "w")
2409 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2410 "TARGET_SIMD"
2411 {
2412 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2413 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2414 }
2415 [(set_attr "type" "neon_to_gp<q>")]
2416 )
2417
2418 (define_insn "*aarch64_get_lane_zero_extendsi<mode>"
2419 [(set (match_operand:SI 0 "register_operand" "=r")
2420 (zero_extend:SI
2421 (vec_select:<VEL>
2422 (match_operand:VDQQH 1 "register_operand" "w")
2423 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2424 "TARGET_SIMD"
2425 {
2426 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2427 return "umov\\t%w0, %1.<Vetype>[%2]";
2428 }
2429 [(set_attr "type" "neon_to_gp<q>")]
2430 )
2431
2432 ;; Lane extraction of a value, neither sign nor zero extension
2433 ;; is guaranteed so upper bits should be considered undefined.
2434 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2435 (define_insn "aarch64_get_lane<mode>"
2436 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
2437 (vec_select:<VEL>
2438 (match_operand:VALL 1 "register_operand" "w, w, w")
2439 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2440 "TARGET_SIMD"
2441 {
2442 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
2443 switch (which_alternative)
2444 {
2445 case 0:
2446 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2447 case 1:
2448 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
2449 case 2:
2450 return "st1\\t{%1.<Vetype>}[%2], %0";
2451 default:
2452 gcc_unreachable ();
2453 }
2454 }
2455 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2456 )
2457
2458 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2459 ;; dest vector.
2460
2461 (define_insn "*aarch64_combinez<mode>"
2462 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2463 (vec_concat:<VDBL>
2464 (match_operand:VD_BHSI 1 "register_operand" "w")
2465 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz")))]
2466 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2467 "mov\\t%0.8b, %1.8b"
2468 [(set_attr "type" "neon_move<q>")]
2469 )
2470
2471 (define_insn "*aarch64_combinez_be<mode>"
2472 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2473 (vec_concat:<VDBL>
2474 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz")
2475 (match_operand:VD_BHSI 1 "register_operand" "w")))]
2476 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2477 "mov\\t%0.8b, %1.8b"
2478 [(set_attr "type" "neon_move<q>")]
2479 )
2480
2481 (define_expand "aarch64_combine<mode>"
2482 [(match_operand:<VDBL> 0 "register_operand")
2483 (match_operand:VDC 1 "register_operand")
2484 (match_operand:VDC 2 "register_operand")]
2485 "TARGET_SIMD"
2486 {
2487 rtx op1, op2;
2488 if (BYTES_BIG_ENDIAN)
2489 {
2490 op1 = operands[2];
2491 op2 = operands[1];
2492 }
2493 else
2494 {
2495 op1 = operands[1];
2496 op2 = operands[2];
2497 }
2498 emit_insn (gen_aarch64_combine_internal<mode> (operands[0], op1, op2));
2499 DONE;
2500 }
2501 )
2502
2503 (define_insn_and_split "aarch64_combine_internal<mode>"
2504 [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
2505 (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
2506 (match_operand:VDC 2 "register_operand" "w")))]
2507 "TARGET_SIMD"
2508 "#"
2509 "&& reload_completed"
2510 [(const_int 0)]
2511 {
2512 if (BYTES_BIG_ENDIAN)
2513 aarch64_split_simd_combine (operands[0], operands[2], operands[1]);
2514 else
2515 aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
2516 DONE;
2517 }
2518 [(set_attr "type" "multiple")]
2519 )
2520
2521 (define_expand "aarch64_simd_combine<mode>"
2522 [(match_operand:<VDBL> 0 "register_operand")
2523 (match_operand:VDC 1 "register_operand")
2524 (match_operand:VDC 2 "register_operand")]
2525 "TARGET_SIMD"
2526 {
2527 emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
2528 emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
2529 DONE;
2530 }
2531 [(set_attr "type" "multiple")]
2532 )
2533
2534 ;; <su><addsub>l<q>.
2535
2536 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_hi_internal"
2537 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2538 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2539 (match_operand:VQW 1 "register_operand" "w")
2540 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
2541 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2542 (match_operand:VQW 2 "register_operand" "w")
2543 (match_dup 3)))))]
2544 "TARGET_SIMD"
2545 "<ANY_EXTEND:su><ADDSUB:optab>l2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2546 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2547 )
2548
2549 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>_lo_internal"
2550 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2551 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2552 (match_operand:VQW 1 "register_operand" "w")
2553 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
2554 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
2555 (match_operand:VQW 2 "register_operand" "w")
2556 (match_dup 3)))))]
2557 "TARGET_SIMD"
2558 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
2559 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2560 )
2561
2562
2563 (define_expand "aarch64_saddl2<mode>"
2564 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2565 (match_operand:VQW 1 "register_operand" "w")
2566 (match_operand:VQW 2 "register_operand" "w")]
2567 "TARGET_SIMD"
2568 {
2569 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2570 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
2571 operands[2], p));
2572 DONE;
2573 })
2574
2575 (define_expand "aarch64_uaddl2<mode>"
2576 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2577 (match_operand:VQW 1 "register_operand" "w")
2578 (match_operand:VQW 2 "register_operand" "w")]
2579 "TARGET_SIMD"
2580 {
2581 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2582 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
2583 operands[2], p));
2584 DONE;
2585 })
2586
2587 (define_expand "aarch64_ssubl2<mode>"
2588 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2589 (match_operand:VQW 1 "register_operand" "w")
2590 (match_operand:VQW 2 "register_operand" "w")]
2591 "TARGET_SIMD"
2592 {
2593 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2594 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
2595 operands[2], p));
2596 DONE;
2597 })
2598
2599 (define_expand "aarch64_usubl2<mode>"
2600 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2601 (match_operand:VQW 1 "register_operand" "w")
2602 (match_operand:VQW 2 "register_operand" "w")]
2603 "TARGET_SIMD"
2604 {
2605 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2606 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
2607 operands[2], p));
2608 DONE;
2609 })
2610
2611 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>"
2612 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2613 (ADDSUB:<VWIDE> (ANY_EXTEND:<VWIDE>
2614 (match_operand:VD_BHSI 1 "register_operand" "w"))
2615 (ANY_EXTEND:<VWIDE>
2616 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2617 "TARGET_SIMD"
2618 "<ANY_EXTEND:su><ADDSUB:optab>l\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
2619 [(set_attr "type" "neon_<ADDSUB:optab>_long")]
2620 )
2621
2622 ;; <su><addsub>w<q>.
2623
2624 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>"
2625 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2626 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2627 (ANY_EXTEND:<VWIDE>
2628 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
2629 "TARGET_SIMD"
2630 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2631 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2632 )
2633
2634 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal"
2635 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2636 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
2637 (ANY_EXTEND:<VWIDE>
2638 (vec_select:<VHALF>
2639 (match_operand:VQW 2 "register_operand" "w")
2640 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
2641 "TARGET_SIMD"
2642 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
2643 [(set_attr "type" "neon_<ADDSUB:optab>_widen")]
2644 )
2645
2646 (define_expand "aarch64_saddw2<mode>"
2647 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2648 (match_operand:<VWIDE> 1 "register_operand" "w")
2649 (match_operand:VQW 2 "register_operand" "w")]
2650 "TARGET_SIMD"
2651 {
2652 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2653 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
2654 operands[2], p));
2655 DONE;
2656 })
2657
2658 (define_expand "aarch64_uaddw2<mode>"
2659 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2660 (match_operand:<VWIDE> 1 "register_operand" "w")
2661 (match_operand:VQW 2 "register_operand" "w")]
2662 "TARGET_SIMD"
2663 {
2664 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2665 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
2666 operands[2], p));
2667 DONE;
2668 })
2669
2670
2671 (define_expand "aarch64_ssubw2<mode>"
2672 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2673 (match_operand:<VWIDE> 1 "register_operand" "w")
2674 (match_operand:VQW 2 "register_operand" "w")]
2675 "TARGET_SIMD"
2676 {
2677 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2678 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
2679 operands[2], p));
2680 DONE;
2681 })
2682
2683 (define_expand "aarch64_usubw2<mode>"
2684 [(match_operand:<VWIDE> 0 "register_operand" "=w")
2685 (match_operand:<VWIDE> 1 "register_operand" "w")
2686 (match_operand:VQW 2 "register_operand" "w")]
2687 "TARGET_SIMD"
2688 {
2689 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
2690 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
2691 operands[2], p));
2692 DONE;
2693 })
2694
2695 ;; <su><r>h<addsub>.
2696
2697 (define_insn "aarch64_<sur>h<addsub><mode>"
2698 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
2699 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
2700 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
2701 HADDSUB))]
2702 "TARGET_SIMD"
2703 "<sur>h<addsub>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2704 [(set_attr "type" "neon_<addsub>_halve<q>")]
2705 )
2706
2707 ;; <r><addsub>hn<q>.
2708
2709 (define_insn "aarch64_<sur><addsub>hn<mode>"
2710 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2711 (unspec:<VNARROWQ> [(match_operand:VQN 1 "register_operand" "w")
2712 (match_operand:VQN 2 "register_operand" "w")]
2713 ADDSUBHN))]
2714 "TARGET_SIMD"
2715 "<sur><addsub>hn\\t%0.<Vntype>, %1.<Vtype>, %2.<Vtype>"
2716 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2717 )
2718
2719 (define_insn "aarch64_<sur><addsub>hn2<mode>"
2720 [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=w")
2721 (unspec:<VNARROWQ2> [(match_operand:<VNARROWQ> 1 "register_operand" "0")
2722 (match_operand:VQN 2 "register_operand" "w")
2723 (match_operand:VQN 3 "register_operand" "w")]
2724 ADDSUBHN2))]
2725 "TARGET_SIMD"
2726 "<sur><addsub>hn2\\t%0.<V2ntype>, %2.<Vtype>, %3.<Vtype>"
2727 [(set_attr "type" "neon_<addsub>_halve_narrow_q")]
2728 )
2729
2730 ;; pmul.
2731
2732 (define_insn "aarch64_pmul<mode>"
2733 [(set (match_operand:VB 0 "register_operand" "=w")
2734 (unspec:VB [(match_operand:VB 1 "register_operand" "w")
2735 (match_operand:VB 2 "register_operand" "w")]
2736 UNSPEC_PMUL))]
2737 "TARGET_SIMD"
2738 "pmul\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
2739 [(set_attr "type" "neon_mul_<Vetype><q>")]
2740 )
2741
2742 ;; <su>q<addsub>
2743
2744 (define_insn "aarch64_<su_optab><optab><mode>"
2745 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2746 (BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
2747 (match_operand:VSDQ_I 2 "register_operand" "w")))]
2748 "TARGET_SIMD"
2749 "<su_optab><optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2750 [(set_attr "type" "neon_<optab><q>")]
2751 )
2752
2753 ;; suqadd and usqadd
2754
2755 (define_insn "aarch64_<sur>qadd<mode>"
2756 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2757 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "0")
2758 (match_operand:VSDQ_I 2 "register_operand" "w")]
2759 USSUQADD))]
2760 "TARGET_SIMD"
2761 "<sur>qadd\\t%<v>0<Vmtype>, %<v>2<Vmtype>"
2762 [(set_attr "type" "neon_qadd<q>")]
2763 )
2764
2765 ;; sqmovun
2766
2767 (define_insn "aarch64_sqmovun<mode>"
2768 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2769 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
2770 UNSPEC_SQXTUN))]
2771 "TARGET_SIMD"
2772 "sqxtun\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
2773 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
2774 )
2775
2776 ;; sqmovn and uqmovn
2777
2778 (define_insn "aarch64_<sur>qmovn<mode>"
2779 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
2780 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")]
2781 SUQMOVN))]
2782 "TARGET_SIMD"
2783 "<sur>qxtn\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>"
2784 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
2785 )
2786
2787 ;; <su>q<absneg>
2788
2789 (define_insn "aarch64_s<optab><mode>"
2790 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
2791 (UNQOPS:VSDQ_I
2792 (match_operand:VSDQ_I 1 "register_operand" "w")))]
2793 "TARGET_SIMD"
2794 "s<optab>\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
2795 [(set_attr "type" "neon_<optab><q>")]
2796 )
2797
2798 ;; sq<r>dmulh.
2799
2800 (define_insn "aarch64_sq<r>dmulh<mode>"
2801 [(set (match_operand:VSDQ_HSI 0 "register_operand" "=w")
2802 (unspec:VSDQ_HSI
2803 [(match_operand:VSDQ_HSI 1 "register_operand" "w")
2804 (match_operand:VSDQ_HSI 2 "register_operand" "w")]
2805 VQDMULH))]
2806 "TARGET_SIMD"
2807 "sq<r>dmulh\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
2808 [(set_attr "type" "neon_sat_mul_<Vetype><q>")]
2809 )
2810
2811 ;; sq<r>dmulh_lane
2812
2813 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
2814 [(set (match_operand:VDQHS 0 "register_operand" "=w")
2815 (unspec:VDQHS
2816 [(match_operand:VDQHS 1 "register_operand" "w")
2817 (vec_select:<VEL>
2818 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
2819 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2820 VQDMULH))]
2821 "TARGET_SIMD"
2822 "*
2823 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
2824 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
2825 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2826 )
2827
2828 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
2829 [(set (match_operand:VDQHS 0 "register_operand" "=w")
2830 (unspec:VDQHS
2831 [(match_operand:VDQHS 1 "register_operand" "w")
2832 (vec_select:<VEL>
2833 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
2834 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2835 VQDMULH))]
2836 "TARGET_SIMD"
2837 "*
2838 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
2839 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
2840 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2841 )
2842
2843 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
2844 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
2845 (unspec:SD_HSI
2846 [(match_operand:SD_HSI 1 "register_operand" "w")
2847 (vec_select:<VEL>
2848 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
2849 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2850 VQDMULH))]
2851 "TARGET_SIMD"
2852 "*
2853 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
2854 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
2855 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2856 )
2857
2858 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
2859 [(set (match_operand:SD_HSI 0 "register_operand" "=w")
2860 (unspec:SD_HSI
2861 [(match_operand:SD_HSI 1 "register_operand" "w")
2862 (vec_select:<VEL>
2863 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
2864 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
2865 VQDMULH))]
2866 "TARGET_SIMD"
2867 "*
2868 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
2869 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
2870 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
2871 )
2872
2873 ;; vqdml[sa]l
2874
2875 (define_insn "aarch64_sqdml<SBINQOPS:as>l<mode>"
2876 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2877 (SBINQOPS:<VWIDE>
2878 (match_operand:<VWIDE> 1 "register_operand" "0")
2879 (ss_ashift:<VWIDE>
2880 (mult:<VWIDE>
2881 (sign_extend:<VWIDE>
2882 (match_operand:VSD_HSI 2 "register_operand" "w"))
2883 (sign_extend:<VWIDE>
2884 (match_operand:VSD_HSI 3 "register_operand" "w")))
2885 (const_int 1))))]
2886 "TARGET_SIMD"
2887 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
2888 [(set_attr "type" "neon_sat_mla_<Vetype>_long")]
2889 )
2890
2891 ;; vqdml[sa]l_lane
2892
2893 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
2894 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2895 (SBINQOPS:<VWIDE>
2896 (match_operand:<VWIDE> 1 "register_operand" "0")
2897 (ss_ashift:<VWIDE>
2898 (mult:<VWIDE>
2899 (sign_extend:<VWIDE>
2900 (match_operand:VD_HSI 2 "register_operand" "w"))
2901 (sign_extend:<VWIDE>
2902 (vec_duplicate:VD_HSI
2903 (vec_select:<VEL>
2904 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2905 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2906 ))
2907 (const_int 1))))]
2908 "TARGET_SIMD"
2909 {
2910 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
2911 return
2912 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2913 }
2914 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2915 )
2916
2917 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
2918 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2919 (SBINQOPS:<VWIDE>
2920 (match_operand:<VWIDE> 1 "register_operand" "0")
2921 (ss_ashift:<VWIDE>
2922 (mult:<VWIDE>
2923 (sign_extend:<VWIDE>
2924 (match_operand:VD_HSI 2 "register_operand" "w"))
2925 (sign_extend:<VWIDE>
2926 (vec_duplicate:VD_HSI
2927 (vec_select:<VEL>
2928 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2929 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2930 ))
2931 (const_int 1))))]
2932 "TARGET_SIMD"
2933 {
2934 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
2935 return
2936 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2937 }
2938 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2939 )
2940
2941 (define_insn "aarch64_sqdml<SBINQOPS:as>l_lane<mode>"
2942 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2943 (SBINQOPS:<VWIDE>
2944 (match_operand:<VWIDE> 1 "register_operand" "0")
2945 (ss_ashift:<VWIDE>
2946 (mult:<VWIDE>
2947 (sign_extend:<VWIDE>
2948 (match_operand:SD_HSI 2 "register_operand" "w"))
2949 (sign_extend:<VWIDE>
2950 (vec_select:<VEL>
2951 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
2952 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2953 )
2954 (const_int 1))))]
2955 "TARGET_SIMD"
2956 {
2957 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
2958 return
2959 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2960 }
2961 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2962 )
2963
2964 (define_insn "aarch64_sqdml<SBINQOPS:as>l_laneq<mode>"
2965 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2966 (SBINQOPS:<VWIDE>
2967 (match_operand:<VWIDE> 1 "register_operand" "0")
2968 (ss_ashift:<VWIDE>
2969 (mult:<VWIDE>
2970 (sign_extend:<VWIDE>
2971 (match_operand:SD_HSI 2 "register_operand" "w"))
2972 (sign_extend:<VWIDE>
2973 (vec_select:<VEL>
2974 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
2975 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
2976 )
2977 (const_int 1))))]
2978 "TARGET_SIMD"
2979 {
2980 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
2981 return
2982 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
2983 }
2984 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
2985 )
2986
2987 ;; vqdml[sa]l_n
2988
2989 (define_insn "aarch64_sqdml<SBINQOPS:as>l_n<mode>"
2990 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
2991 (SBINQOPS:<VWIDE>
2992 (match_operand:<VWIDE> 1 "register_operand" "0")
2993 (ss_ashift:<VWIDE>
2994 (mult:<VWIDE>
2995 (sign_extend:<VWIDE>
2996 (match_operand:VD_HSI 2 "register_operand" "w"))
2997 (sign_extend:<VWIDE>
2998 (vec_duplicate:VD_HSI
2999 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3000 (const_int 1))))]
3001 "TARGET_SIMD"
3002 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3003 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3004 )
3005
3006 ;; sqdml[as]l2
3007
3008 (define_insn "aarch64_sqdml<SBINQOPS:as>l2<mode>_internal"
3009 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3010 (SBINQOPS:<VWIDE>
3011 (match_operand:<VWIDE> 1 "register_operand" "0")
3012 (ss_ashift:<VWIDE>
3013 (mult:<VWIDE>
3014 (sign_extend:<VWIDE>
3015 (vec_select:<VHALF>
3016 (match_operand:VQ_HSI 2 "register_operand" "w")
3017 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3018 (sign_extend:<VWIDE>
3019 (vec_select:<VHALF>
3020 (match_operand:VQ_HSI 3 "register_operand" "w")
3021 (match_dup 4))))
3022 (const_int 1))))]
3023 "TARGET_SIMD"
3024 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %<v>3<Vmtype>"
3025 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3026 )
3027
3028 (define_expand "aarch64_sqdmlal2<mode>"
3029 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3030 (match_operand:<VWIDE> 1 "register_operand" "w")
3031 (match_operand:VQ_HSI 2 "register_operand" "w")
3032 (match_operand:VQ_HSI 3 "register_operand" "w")]
3033 "TARGET_SIMD"
3034 {
3035 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3036 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3037 operands[2], operands[3], p));
3038 DONE;
3039 })
3040
3041 (define_expand "aarch64_sqdmlsl2<mode>"
3042 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3043 (match_operand:<VWIDE> 1 "register_operand" "w")
3044 (match_operand:VQ_HSI 2 "register_operand" "w")
3045 (match_operand:VQ_HSI 3 "register_operand" "w")]
3046 "TARGET_SIMD"
3047 {
3048 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3049 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3050 operands[2], operands[3], p));
3051 DONE;
3052 })
3053
3054 ;; vqdml[sa]l2_lane
3055
3056 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_lane<mode>_internal"
3057 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3058 (SBINQOPS:<VWIDE>
3059 (match_operand:<VWIDE> 1 "register_operand" "0")
3060 (ss_ashift:<VWIDE>
3061 (mult:<VWIDE>
3062 (sign_extend:<VWIDE>
3063 (vec_select:<VHALF>
3064 (match_operand:VQ_HSI 2 "register_operand" "w")
3065 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3066 (sign_extend:<VWIDE>
3067 (vec_duplicate:<VHALF>
3068 (vec_select:<VEL>
3069 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3070 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3071 ))))
3072 (const_int 1))))]
3073 "TARGET_SIMD"
3074 {
3075 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4])));
3076 return
3077 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3078 }
3079 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3080 )
3081
3082 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_laneq<mode>_internal"
3083 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3084 (SBINQOPS:<VWIDE>
3085 (match_operand:<VWIDE> 1 "register_operand" "0")
3086 (ss_ashift:<VWIDE>
3087 (mult:<VWIDE>
3088 (sign_extend:<VWIDE>
3089 (vec_select:<VHALF>
3090 (match_operand:VQ_HSI 2 "register_operand" "w")
3091 (match_operand:VQ_HSI 5 "vect_par_cnst_hi_half" "")))
3092 (sign_extend:<VWIDE>
3093 (vec_duplicate:<VHALF>
3094 (vec_select:<VEL>
3095 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3096 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3097 ))))
3098 (const_int 1))))]
3099 "TARGET_SIMD"
3100 {
3101 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4])));
3102 return
3103 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3104 }
3105 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3106 )
3107
3108 (define_expand "aarch64_sqdmlal2_lane<mode>"
3109 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3110 (match_operand:<VWIDE> 1 "register_operand" "w")
3111 (match_operand:VQ_HSI 2 "register_operand" "w")
3112 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3113 (match_operand:SI 4 "immediate_operand" "i")]
3114 "TARGET_SIMD"
3115 {
3116 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3117 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3118 operands[2], operands[3],
3119 operands[4], p));
3120 DONE;
3121 })
3122
3123 (define_expand "aarch64_sqdmlal2_laneq<mode>"
3124 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3125 (match_operand:<VWIDE> 1 "register_operand" "w")
3126 (match_operand:VQ_HSI 2 "register_operand" "w")
3127 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3128 (match_operand:SI 4 "immediate_operand" "i")]
3129 "TARGET_SIMD"
3130 {
3131 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3132 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3133 operands[2], operands[3],
3134 operands[4], p));
3135 DONE;
3136 })
3137
3138 (define_expand "aarch64_sqdmlsl2_lane<mode>"
3139 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3140 (match_operand:<VWIDE> 1 "register_operand" "w")
3141 (match_operand:VQ_HSI 2 "register_operand" "w")
3142 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3143 (match_operand:SI 4 "immediate_operand" "i")]
3144 "TARGET_SIMD"
3145 {
3146 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3147 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3148 operands[2], operands[3],
3149 operands[4], p));
3150 DONE;
3151 })
3152
3153 (define_expand "aarch64_sqdmlsl2_laneq<mode>"
3154 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3155 (match_operand:<VWIDE> 1 "register_operand" "w")
3156 (match_operand:VQ_HSI 2 "register_operand" "w")
3157 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3158 (match_operand:SI 4 "immediate_operand" "i")]
3159 "TARGET_SIMD"
3160 {
3161 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3162 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3163 operands[2], operands[3],
3164 operands[4], p));
3165 DONE;
3166 })
3167
3168 (define_insn "aarch64_sqdml<SBINQOPS:as>l2_n<mode>_internal"
3169 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3170 (SBINQOPS:<VWIDE>
3171 (match_operand:<VWIDE> 1 "register_operand" "0")
3172 (ss_ashift:<VWIDE>
3173 (mult:<VWIDE>
3174 (sign_extend:<VWIDE>
3175 (vec_select:<VHALF>
3176 (match_operand:VQ_HSI 2 "register_operand" "w")
3177 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3178 (sign_extend:<VWIDE>
3179 (vec_duplicate:<VHALF>
3180 (match_operand:<VEL> 3 "register_operand" "<vwx>"))))
3181 (const_int 1))))]
3182 "TARGET_SIMD"
3183 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[0]"
3184 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3185 )
3186
3187 (define_expand "aarch64_sqdmlal2_n<mode>"
3188 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3189 (match_operand:<VWIDE> 1 "register_operand" "w")
3190 (match_operand:VQ_HSI 2 "register_operand" "w")
3191 (match_operand:<VEL> 3 "register_operand" "w")]
3192 "TARGET_SIMD"
3193 {
3194 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3195 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3196 operands[2], operands[3],
3197 p));
3198 DONE;
3199 })
3200
3201 (define_expand "aarch64_sqdmlsl2_n<mode>"
3202 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3203 (match_operand:<VWIDE> 1 "register_operand" "w")
3204 (match_operand:VQ_HSI 2 "register_operand" "w")
3205 (match_operand:<VEL> 3 "register_operand" "w")]
3206 "TARGET_SIMD"
3207 {
3208 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3209 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3210 operands[2], operands[3],
3211 p));
3212 DONE;
3213 })
3214
3215 ;; vqdmull
3216
3217 (define_insn "aarch64_sqdmull<mode>"
3218 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3219 (ss_ashift:<VWIDE>
3220 (mult:<VWIDE>
3221 (sign_extend:<VWIDE>
3222 (match_operand:VSD_HSI 1 "register_operand" "w"))
3223 (sign_extend:<VWIDE>
3224 (match_operand:VSD_HSI 2 "register_operand" "w")))
3225 (const_int 1)))]
3226 "TARGET_SIMD"
3227 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3228 [(set_attr "type" "neon_sat_mul_<Vetype>_long")]
3229 )
3230
3231 ;; vqdmull_lane
3232
3233 (define_insn "aarch64_sqdmull_lane<mode>"
3234 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3235 (ss_ashift:<VWIDE>
3236 (mult:<VWIDE>
3237 (sign_extend:<VWIDE>
3238 (match_operand:VD_HSI 1 "register_operand" "w"))
3239 (sign_extend:<VWIDE>
3240 (vec_duplicate:VD_HSI
3241 (vec_select:<VEL>
3242 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3243 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3244 ))
3245 (const_int 1)))]
3246 "TARGET_SIMD"
3247 {
3248 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3249 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3250 }
3251 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3252 )
3253
3254 (define_insn "aarch64_sqdmull_laneq<mode>"
3255 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3256 (ss_ashift:<VWIDE>
3257 (mult:<VWIDE>
3258 (sign_extend:<VWIDE>
3259 (match_operand:VD_HSI 1 "register_operand" "w"))
3260 (sign_extend:<VWIDE>
3261 (vec_duplicate:VD_HSI
3262 (vec_select:<VEL>
3263 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3264 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3265 ))
3266 (const_int 1)))]
3267 "TARGET_SIMD"
3268 {
3269 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3270 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3271 }
3272 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3273 )
3274
3275 (define_insn "aarch64_sqdmull_lane<mode>"
3276 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3277 (ss_ashift:<VWIDE>
3278 (mult:<VWIDE>
3279 (sign_extend:<VWIDE>
3280 (match_operand:SD_HSI 1 "register_operand" "w"))
3281 (sign_extend:<VWIDE>
3282 (vec_select:<VEL>
3283 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3284 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3285 ))
3286 (const_int 1)))]
3287 "TARGET_SIMD"
3288 {
3289 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3290 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3291 }
3292 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3293 )
3294
3295 (define_insn "aarch64_sqdmull_laneq<mode>"
3296 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3297 (ss_ashift:<VWIDE>
3298 (mult:<VWIDE>
3299 (sign_extend:<VWIDE>
3300 (match_operand:SD_HSI 1 "register_operand" "w"))
3301 (sign_extend:<VWIDE>
3302 (vec_select:<VEL>
3303 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3304 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3305 ))
3306 (const_int 1)))]
3307 "TARGET_SIMD"
3308 {
3309 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3310 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3311 }
3312 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3313 )
3314
3315 ;; vqdmull_n
3316
3317 (define_insn "aarch64_sqdmull_n<mode>"
3318 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3319 (ss_ashift:<VWIDE>
3320 (mult:<VWIDE>
3321 (sign_extend:<VWIDE>
3322 (match_operand:VD_HSI 1 "register_operand" "w"))
3323 (sign_extend:<VWIDE>
3324 (vec_duplicate:VD_HSI
3325 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3326 )
3327 (const_int 1)))]
3328 "TARGET_SIMD"
3329 "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3330 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3331 )
3332
3333 ;; vqdmull2
3334
3335
3336
3337 (define_insn "aarch64_sqdmull2<mode>_internal"
3338 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3339 (ss_ashift:<VWIDE>
3340 (mult:<VWIDE>
3341 (sign_extend:<VWIDE>
3342 (vec_select:<VHALF>
3343 (match_operand:VQ_HSI 1 "register_operand" "w")
3344 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3345 (sign_extend:<VWIDE>
3346 (vec_select:<VHALF>
3347 (match_operand:VQ_HSI 2 "register_operand" "w")
3348 (match_dup 3)))
3349 )
3350 (const_int 1)))]
3351 "TARGET_SIMD"
3352 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3353 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3354 )
3355
3356 (define_expand "aarch64_sqdmull2<mode>"
3357 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3358 (match_operand:VQ_HSI 1 "register_operand" "w")
3359 (match_operand:VQ_HSI 2 "register_operand" "w")]
3360 "TARGET_SIMD"
3361 {
3362 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3363 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
3364 operands[2], p));
3365 DONE;
3366 })
3367
3368 ;; vqdmull2_lane
3369
3370 (define_insn "aarch64_sqdmull2_lane<mode>_internal"
3371 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3372 (ss_ashift:<VWIDE>
3373 (mult:<VWIDE>
3374 (sign_extend:<VWIDE>
3375 (vec_select:<VHALF>
3376 (match_operand:VQ_HSI 1 "register_operand" "w")
3377 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3378 (sign_extend:<VWIDE>
3379 (vec_duplicate:<VHALF>
3380 (vec_select:<VEL>
3381 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3382 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3383 ))
3384 (const_int 1)))]
3385 "TARGET_SIMD"
3386 {
3387 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3])));
3388 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3389 }
3390 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3391 )
3392
3393 (define_insn "aarch64_sqdmull2_laneq<mode>_internal"
3394 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3395 (ss_ashift:<VWIDE>
3396 (mult:<VWIDE>
3397 (sign_extend:<VWIDE>
3398 (vec_select:<VHALF>
3399 (match_operand:VQ_HSI 1 "register_operand" "w")
3400 (match_operand:VQ_HSI 4 "vect_par_cnst_hi_half" "")))
3401 (sign_extend:<VWIDE>
3402 (vec_duplicate:<VHALF>
3403 (vec_select:<VEL>
3404 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3405 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3406 ))
3407 (const_int 1)))]
3408 "TARGET_SIMD"
3409 {
3410 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3])));
3411 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3412 }
3413 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3414 )
3415
3416 (define_expand "aarch64_sqdmull2_lane<mode>"
3417 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3418 (match_operand:VQ_HSI 1 "register_operand" "w")
3419 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3420 (match_operand:SI 3 "immediate_operand" "i")]
3421 "TARGET_SIMD"
3422 {
3423 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3424 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
3425 operands[2], operands[3],
3426 p));
3427 DONE;
3428 })
3429
3430 (define_expand "aarch64_sqdmull2_laneq<mode>"
3431 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3432 (match_operand:VQ_HSI 1 "register_operand" "w")
3433 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3434 (match_operand:SI 3 "immediate_operand" "i")]
3435 "TARGET_SIMD"
3436 {
3437 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3438 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
3439 operands[2], operands[3],
3440 p));
3441 DONE;
3442 })
3443
3444 ;; vqdmull2_n
3445
3446 (define_insn "aarch64_sqdmull2_n<mode>_internal"
3447 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3448 (ss_ashift:<VWIDE>
3449 (mult:<VWIDE>
3450 (sign_extend:<VWIDE>
3451 (vec_select:<VHALF>
3452 (match_operand:VQ_HSI 1 "register_operand" "w")
3453 (match_operand:VQ_HSI 3 "vect_par_cnst_hi_half" "")))
3454 (sign_extend:<VWIDE>
3455 (vec_duplicate:<VHALF>
3456 (match_operand:<VEL> 2 "register_operand" "<vwx>")))
3457 )
3458 (const_int 1)))]
3459 "TARGET_SIMD"
3460 "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[0]"
3461 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3462 )
3463
3464 (define_expand "aarch64_sqdmull2_n<mode>"
3465 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3466 (match_operand:VQ_HSI 1 "register_operand" "w")
3467 (match_operand:<VEL> 2 "register_operand" "w")]
3468 "TARGET_SIMD"
3469 {
3470 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true);
3471 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
3472 operands[2], p));
3473 DONE;
3474 })
3475
3476 ;; vshl
3477
3478 (define_insn "aarch64_<sur>shl<mode>"
3479 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3480 (unspec:VSDQ_I_DI
3481 [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3482 (match_operand:VSDQ_I_DI 2 "register_operand" "w")]
3483 VSHL))]
3484 "TARGET_SIMD"
3485 "<sur>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3486 [(set_attr "type" "neon_shift_reg<q>")]
3487 )
3488
3489
3490 ;; vqshl
3491
3492 (define_insn "aarch64_<sur>q<r>shl<mode>"
3493 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3494 (unspec:VSDQ_I
3495 [(match_operand:VSDQ_I 1 "register_operand" "w")
3496 (match_operand:VSDQ_I 2 "register_operand" "w")]
3497 VQSHL))]
3498 "TARGET_SIMD"
3499 "<sur>q<r>shl\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>";
3500 [(set_attr "type" "neon_sat_shift_reg<q>")]
3501 )
3502
3503 ;; vshll_n
3504
3505 (define_insn "aarch64_<sur>shll_n<mode>"
3506 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3507 (unspec:<VWIDE> [(match_operand:VD_BHSI 1 "register_operand" "w")
3508 (match_operand:SI 2
3509 "aarch64_simd_shift_imm_bitsize_<ve_mode>" "i")]
3510 VSHLL))]
3511 "TARGET_SIMD"
3512 "*
3513 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3514 if (INTVAL (operands[2]) == bit_width)
3515 {
3516 return \"shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3517 }
3518 else {
3519 return \"<sur>shll\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3520 }"
3521 [(set_attr "type" "neon_shift_imm_long")]
3522 )
3523
3524 ;; vshll_high_n
3525
3526 (define_insn "aarch64_<sur>shll2_n<mode>"
3527 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3528 (unspec:<VWIDE> [(match_operand:VQW 1 "register_operand" "w")
3529 (match_operand:SI 2 "immediate_operand" "i")]
3530 VSHLL))]
3531 "TARGET_SIMD"
3532 "*
3533 int bit_width = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
3534 if (INTVAL (operands[2]) == bit_width)
3535 {
3536 return \"shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3537 }
3538 else {
3539 return \"<sur>shll2\\t%0.<Vwtype>, %1.<Vtype>, %2\";
3540 }"
3541 [(set_attr "type" "neon_shift_imm_long")]
3542 )
3543
3544 ;; vrshr_n
3545
3546 (define_insn "aarch64_<sur>shr_n<mode>"
3547 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3548 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "w")
3549 (match_operand:SI 2
3550 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3551 VRSHR_N))]
3552 "TARGET_SIMD"
3553 "<sur>shr\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
3554 [(set_attr "type" "neon_sat_shift_imm<q>")]
3555 )
3556
3557 ;; v(r)sra_n
3558
3559 (define_insn "aarch64_<sur>sra_n<mode>"
3560 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3561 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3562 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3563 (match_operand:SI 3
3564 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3565 VSRA))]
3566 "TARGET_SIMD"
3567 "<sur>sra\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
3568 [(set_attr "type" "neon_shift_acc<q>")]
3569 )
3570
3571 ;; vs<lr>i_n
3572
3573 (define_insn "aarch64_<sur>s<lr>i_n<mode>"
3574 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
3575 (unspec:VSDQ_I_DI [(match_operand:VSDQ_I_DI 1 "register_operand" "0")
3576 (match_operand:VSDQ_I_DI 2 "register_operand" "w")
3577 (match_operand:SI 3
3578 "aarch64_simd_shift_imm_<offsetlr><ve_mode>" "i")]
3579 VSLRI))]
3580 "TARGET_SIMD"
3581 "s<lr>i\\t%<v>0<Vmtype>, %<v>2<Vmtype>, %3"
3582 [(set_attr "type" "neon_shift_imm<q>")]
3583 )
3584
3585 ;; vqshl(u)
3586
3587 (define_insn "aarch64_<sur>qshl<u>_n<mode>"
3588 [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
3589 (unspec:VSDQ_I [(match_operand:VSDQ_I 1 "register_operand" "w")
3590 (match_operand:SI 2
3591 "aarch64_simd_shift_imm_<ve_mode>" "i")]
3592 VQSHL_N))]
3593 "TARGET_SIMD"
3594 "<sur>qshl<u>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2"
3595 [(set_attr "type" "neon_sat_shift_imm<q>")]
3596 )
3597
3598
3599 ;; vq(r)shr(u)n_n
3600
3601 (define_insn "aarch64_<sur>q<r>shr<u>n_n<mode>"
3602 [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
3603 (unspec:<VNARROWQ> [(match_operand:VSQN_HSDI 1 "register_operand" "w")
3604 (match_operand:SI 2
3605 "aarch64_simd_shift_imm_offset_<ve_mode>" "i")]
3606 VQSHRN_N))]
3607 "TARGET_SIMD"
3608 "<sur>q<r>shr<u>n\\t%<vn2>0<Vmntype>, %<v>1<Vmtype>, %2"
3609 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3610 )
3611
3612
3613 ;; cm(eq|ge|gt|lt|le)
3614 ;; Note, we have constraints for Dz and Z as different expanders
3615 ;; have different ideas of what should be passed to this pattern.
3616
3617 (define_insn "aarch64_cm<optab><mode>"
3618 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
3619 (neg:<V_cmp_result>
3620 (COMPARISONS:<V_cmp_result>
3621 (match_operand:VDQ_I 1 "register_operand" "w,w")
3622 (match_operand:VDQ_I 2 "aarch64_simd_reg_or_zero" "w,ZDz")
3623 )))]
3624 "TARGET_SIMD"
3625 "@
3626 cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
3627 cm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, #0"
3628 [(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
3629 )
3630
3631 (define_insn_and_split "aarch64_cm<optab>di"
3632 [(set (match_operand:DI 0 "register_operand" "=w,w,r")
3633 (neg:DI
3634 (COMPARISONS:DI
3635 (match_operand:DI 1 "register_operand" "w,w,r")
3636 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
3637 )))
3638 (clobber (reg:CC CC_REGNUM))]
3639 "TARGET_SIMD"
3640 "#"
3641 "reload_completed"
3642 [(set (match_operand:DI 0 "register_operand")
3643 (neg:DI
3644 (COMPARISONS:DI
3645 (match_operand:DI 1 "register_operand")
3646 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
3647 )))]
3648 {
3649 /* If we are in the general purpose register file,
3650 we split to a sequence of comparison and store. */
3651 if (GP_REGNUM_P (REGNO (operands[0]))
3652 && GP_REGNUM_P (REGNO (operands[1])))
3653 {
3654 machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
3655 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
3656 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
3657 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3658 DONE;
3659 }
3660 /* Otherwise, we expand to a similar pattern which does not
3661 clobber CC_REGNUM. */
3662 }
3663 [(set_attr "type" "neon_compare, neon_compare_zero, multiple")]
3664 )
3665
3666 (define_insn "*aarch64_cm<optab>di"
3667 [(set (match_operand:DI 0 "register_operand" "=w,w")
3668 (neg:DI
3669 (COMPARISONS:DI
3670 (match_operand:DI 1 "register_operand" "w,w")
3671 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz")
3672 )))]
3673 "TARGET_SIMD && reload_completed"
3674 "@
3675 cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
3676 cm<optab>\t%d0, %d1, #0"
3677 [(set_attr "type" "neon_compare, neon_compare_zero")]
3678 )
3679
3680 ;; cm(hs|hi)
3681
3682 (define_insn "aarch64_cm<optab><mode>"
3683 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3684 (neg:<V_cmp_result>
3685 (UCOMPARISONS:<V_cmp_result>
3686 (match_operand:VDQ_I 1 "register_operand" "w")
3687 (match_operand:VDQ_I 2 "register_operand" "w")
3688 )))]
3689 "TARGET_SIMD"
3690 "cm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
3691 [(set_attr "type" "neon_compare<q>")]
3692 )
3693
3694 (define_insn_and_split "aarch64_cm<optab>di"
3695 [(set (match_operand:DI 0 "register_operand" "=w,r")
3696 (neg:DI
3697 (UCOMPARISONS:DI
3698 (match_operand:DI 1 "register_operand" "w,r")
3699 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
3700 )))
3701 (clobber (reg:CC CC_REGNUM))]
3702 "TARGET_SIMD"
3703 "#"
3704 "reload_completed"
3705 [(set (match_operand:DI 0 "register_operand")
3706 (neg:DI
3707 (UCOMPARISONS:DI
3708 (match_operand:DI 1 "register_operand")
3709 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
3710 )))]
3711 {
3712 /* If we are in the general purpose register file,
3713 we split to a sequence of comparison and store. */
3714 if (GP_REGNUM_P (REGNO (operands[0]))
3715 && GP_REGNUM_P (REGNO (operands[1])))
3716 {
3717 machine_mode mode = CCmode;
3718 rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
3719 rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
3720 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3721 DONE;
3722 }
3723 /* Otherwise, we expand to a similar pattern which does not
3724 clobber CC_REGNUM. */
3725 }
3726 [(set_attr "type" "neon_compare,multiple")]
3727 )
3728
3729 (define_insn "*aarch64_cm<optab>di"
3730 [(set (match_operand:DI 0 "register_operand" "=w")
3731 (neg:DI
3732 (UCOMPARISONS:DI
3733 (match_operand:DI 1 "register_operand" "w")
3734 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w")
3735 )))]
3736 "TARGET_SIMD && reload_completed"
3737 "cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>"
3738 [(set_attr "type" "neon_compare")]
3739 )
3740
3741 ;; cmtst
3742
3743 ;; Although neg (ne (and x y) 0) is the natural way of expressing a cmtst,
3744 ;; we don't have any insns using ne, and aarch64_vcond_internal outputs
3745 ;; not (neg (eq (and x y) 0))
3746 ;; which is rewritten by simplify_rtx as
3747 ;; plus (eq (and x y) 0) -1.
3748
3749 (define_insn "aarch64_cmtst<mode>"
3750 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3751 (plus:<V_cmp_result>
3752 (eq:<V_cmp_result>
3753 (and:VDQ_I
3754 (match_operand:VDQ_I 1 "register_operand" "w")
3755 (match_operand:VDQ_I 2 "register_operand" "w"))
3756 (match_operand:VDQ_I 3 "aarch64_simd_imm_zero"))
3757 (match_operand:<V_cmp_result> 4 "aarch64_simd_imm_minus_one")))
3758 ]
3759 "TARGET_SIMD"
3760 "cmtst\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3761 [(set_attr "type" "neon_tst<q>")]
3762 )
3763
3764 (define_insn_and_split "aarch64_cmtstdi"
3765 [(set (match_operand:DI 0 "register_operand" "=w,r")
3766 (neg:DI
3767 (ne:DI
3768 (and:DI
3769 (match_operand:DI 1 "register_operand" "w,r")
3770 (match_operand:DI 2 "register_operand" "w,r"))
3771 (const_int 0))))
3772 (clobber (reg:CC CC_REGNUM))]
3773 "TARGET_SIMD"
3774 "#"
3775 "reload_completed"
3776 [(set (match_operand:DI 0 "register_operand")
3777 (neg:DI
3778 (ne:DI
3779 (and:DI
3780 (match_operand:DI 1 "register_operand")
3781 (match_operand:DI 2 "register_operand"))
3782 (const_int 0))))]
3783 {
3784 /* If we are in the general purpose register file,
3785 we split to a sequence of comparison and store. */
3786 if (GP_REGNUM_P (REGNO (operands[0]))
3787 && GP_REGNUM_P (REGNO (operands[1])))
3788 {
3789 rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
3790 machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
3791 rtx cc_reg = aarch64_gen_compare_reg (NE, and_tree, const0_rtx);
3792 rtx comparison = gen_rtx_NE (mode, and_tree, const0_rtx);
3793 emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
3794 DONE;
3795 }
3796 /* Otherwise, we expand to a similar pattern which does not
3797 clobber CC_REGNUM. */
3798 }
3799 [(set_attr "type" "neon_tst,multiple")]
3800 )
3801
3802 (define_insn "*aarch64_cmtstdi"
3803 [(set (match_operand:DI 0 "register_operand" "=w")
3804 (neg:DI
3805 (ne:DI
3806 (and:DI
3807 (match_operand:DI 1 "register_operand" "w")
3808 (match_operand:DI 2 "register_operand" "w"))
3809 (const_int 0))))]
3810 "TARGET_SIMD"
3811 "cmtst\t%d0, %d1, %d2"
3812 [(set_attr "type" "neon_tst")]
3813 )
3814
3815 ;; fcm(eq|ge|gt|le|lt)
3816
3817 (define_insn "aarch64_cm<optab><mode>"
3818 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w,w")
3819 (neg:<V_cmp_result>
3820 (COMPARISONS:<V_cmp_result>
3821 (match_operand:VALLF 1 "register_operand" "w,w")
3822 (match_operand:VALLF 2 "aarch64_simd_reg_or_zero" "w,YDz")
3823 )))]
3824 "TARGET_SIMD"
3825 "@
3826 fcm<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>
3827 fcm<optab>\t%<v>0<Vmtype>, %<v>1<Vmtype>, 0"
3828 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
3829 )
3830
3831 ;; fac(ge|gt)
3832 ;; Note we can also handle what would be fac(le|lt) by
3833 ;; generating fac(ge|gt).
3834
3835 (define_insn "*aarch64_fac<optab><mode>"
3836 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
3837 (neg:<V_cmp_result>
3838 (FAC_COMPARISONS:<V_cmp_result>
3839 (abs:VALLF (match_operand:VALLF 1 "register_operand" "w"))
3840 (abs:VALLF (match_operand:VALLF 2 "register_operand" "w"))
3841 )))]
3842 "TARGET_SIMD"
3843 "fac<n_optab>\t%<v>0<Vmtype>, %<v><cmp_1><Vmtype>, %<v><cmp_2><Vmtype>"
3844 [(set_attr "type" "neon_fp_compare_<Vetype><q>")]
3845 )
3846
3847 ;; addp
3848
3849 (define_insn "aarch64_addp<mode>"
3850 [(set (match_operand:VD_BHSI 0 "register_operand" "=w")
3851 (unspec:VD_BHSI
3852 [(match_operand:VD_BHSI 1 "register_operand" "w")
3853 (match_operand:VD_BHSI 2 "register_operand" "w")]
3854 UNSPEC_ADDP))]
3855 "TARGET_SIMD"
3856 "addp\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
3857 [(set_attr "type" "neon_reduc_add<q>")]
3858 )
3859
3860 (define_insn "aarch64_addpdi"
3861 [(set (match_operand:DI 0 "register_operand" "=w")
3862 (unspec:DI
3863 [(match_operand:V2DI 1 "register_operand" "w")]
3864 UNSPEC_ADDP))]
3865 "TARGET_SIMD"
3866 "addp\t%d0, %1.2d"
3867 [(set_attr "type" "neon_reduc_add")]
3868 )
3869
3870 ;; sqrt
3871
3872 (define_insn "sqrt<mode>2"
3873 [(set (match_operand:VDQF 0 "register_operand" "=w")
3874 (sqrt:VDQF (match_operand:VDQF 1 "register_operand" "w")))]
3875 "TARGET_SIMD"
3876 "fsqrt\\t%0.<Vtype>, %1.<Vtype>"
3877 [(set_attr "type" "neon_fp_sqrt_<Vetype><q>")]
3878 )
3879
3880 ;; Patterns for vector struct loads and stores.
3881
3882 (define_insn "vec_load_lanesoi<mode>"
3883 [(set (match_operand:OI 0 "register_operand" "=w")
3884 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
3885 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3886 UNSPEC_LD2))]
3887 "TARGET_SIMD"
3888 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
3889 [(set_attr "type" "neon_load2_2reg<q>")]
3890 )
3891
3892 (define_insn "aarch64_simd_ld2r<mode>"
3893 [(set (match_operand:OI 0 "register_operand" "=w")
3894 (unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
3895 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
3896 UNSPEC_LD2_DUP))]
3897 "TARGET_SIMD"
3898 "ld2r\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
3899 [(set_attr "type" "neon_load2_all_lanes<q>")]
3900 )
3901
3902 (define_insn "aarch64_vec_load_lanesoi_lane<mode>"
3903 [(set (match_operand:OI 0 "register_operand" "=w")
3904 (unspec:OI [(match_operand:<V_TWO_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
3905 (match_operand:OI 2 "register_operand" "0")
3906 (match_operand:SI 3 "immediate_operand" "i")
3907 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
3908 UNSPEC_LD2_LANE))]
3909 "TARGET_SIMD"
3910 "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"
3911 [(set_attr "type" "neon_load2_one_lane")]
3912 )
3913
3914 (define_insn "vec_store_lanesoi<mode>"
3915 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
3916 (unspec:OI [(match_operand:OI 1 "register_operand" "w")
3917 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3918 UNSPEC_ST2))]
3919 "TARGET_SIMD"
3920 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
3921 [(set_attr "type" "neon_store2_2reg<q>")]
3922 )
3923
3924 (define_insn "vec_store_lanesoi_lane<mode>"
3925 [(set (match_operand:<V_TWO_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
3926 (unspec:<V_TWO_ELEM> [(match_operand:OI 1 "register_operand" "w")
3927 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
3928 (match_operand:SI 2 "immediate_operand" "i")]
3929 UNSPEC_ST2_LANE))]
3930 "TARGET_SIMD"
3931 "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"
3932 [(set_attr "type" "neon_store3_one_lane<q>")]
3933 )
3934
3935 (define_insn "vec_load_lanesci<mode>"
3936 [(set (match_operand:CI 0 "register_operand" "=w")
3937 (unspec:CI [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
3938 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3939 UNSPEC_LD3))]
3940 "TARGET_SIMD"
3941 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
3942 [(set_attr "type" "neon_load3_3reg<q>")]
3943 )
3944
3945 (define_insn "aarch64_simd_ld3r<mode>"
3946 [(set (match_operand:CI 0 "register_operand" "=w")
3947 (unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
3948 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
3949 UNSPEC_LD3_DUP))]
3950 "TARGET_SIMD"
3951 "ld3r\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
3952 [(set_attr "type" "neon_load3_all_lanes<q>")]
3953 )
3954
3955 (define_insn "aarch64_vec_load_lanesci_lane<mode>"
3956 [(set (match_operand:CI 0 "register_operand" "=w")
3957 (unspec:CI [(match_operand:<V_THREE_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
3958 (match_operand:CI 2 "register_operand" "0")
3959 (match_operand:SI 3 "immediate_operand" "i")
3960 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3961 UNSPEC_LD3_LANE))]
3962 "TARGET_SIMD"
3963 "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"
3964 [(set_attr "type" "neon_load3_one_lane")]
3965 )
3966
3967 (define_insn "vec_store_lanesci<mode>"
3968 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
3969 (unspec:CI [(match_operand:CI 1 "register_operand" "w")
3970 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3971 UNSPEC_ST3))]
3972 "TARGET_SIMD"
3973 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
3974 [(set_attr "type" "neon_store3_3reg<q>")]
3975 )
3976
3977 (define_insn "vec_store_lanesci_lane<mode>"
3978 [(set (match_operand:<V_THREE_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
3979 (unspec:<V_THREE_ELEM> [(match_operand:CI 1 "register_operand" "w")
3980 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
3981 (match_operand:SI 2 "immediate_operand" "i")]
3982 UNSPEC_ST3_LANE))]
3983 "TARGET_SIMD"
3984 "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"
3985 [(set_attr "type" "neon_store3_one_lane<q>")]
3986 )
3987
3988 (define_insn "vec_load_lanesxi<mode>"
3989 [(set (match_operand:XI 0 "register_operand" "=w")
3990 (unspec:XI [(match_operand:XI 1 "aarch64_simd_struct_operand" "Utv")
3991 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
3992 UNSPEC_LD4))]
3993 "TARGET_SIMD"
3994 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
3995 [(set_attr "type" "neon_load4_4reg<q>")]
3996 )
3997
3998 (define_insn "aarch64_simd_ld4r<mode>"
3999 [(set (match_operand:XI 0 "register_operand" "=w")
4000 (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
4001 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4002 UNSPEC_LD4_DUP))]
4003 "TARGET_SIMD"
4004 "ld4r\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4005 [(set_attr "type" "neon_load4_all_lanes<q>")]
4006 )
4007
4008 (define_insn "aarch64_vec_load_lanesxi_lane<mode>"
4009 [(set (match_operand:XI 0 "register_operand" "=w")
4010 (unspec:XI [(match_operand:<V_FOUR_ELEM> 1 "aarch64_simd_struct_operand" "Utv")
4011 (match_operand:XI 2 "register_operand" "0")
4012 (match_operand:SI 3 "immediate_operand" "i")
4013 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4014 UNSPEC_LD4_LANE))]
4015 "TARGET_SIMD"
4016 "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"
4017 [(set_attr "type" "neon_load4_one_lane")]
4018 )
4019
4020 (define_insn "vec_store_lanesxi<mode>"
4021 [(set (match_operand:XI 0 "aarch64_simd_struct_operand" "=Utv")
4022 (unspec:XI [(match_operand:XI 1 "register_operand" "w")
4023 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4024 UNSPEC_ST4))]
4025 "TARGET_SIMD"
4026 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4027 [(set_attr "type" "neon_store4_4reg<q>")]
4028 )
4029
4030 (define_insn "vec_store_lanesxi_lane<mode>"
4031 [(set (match_operand:<V_FOUR_ELEM> 0 "aarch64_simd_struct_operand" "=Utv")
4032 (unspec:<V_FOUR_ELEM> [(match_operand:XI 1 "register_operand" "w")
4033 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4034 (match_operand:SI 2 "immediate_operand" "i")]
4035 UNSPEC_ST4_LANE))]
4036 "TARGET_SIMD"
4037 "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"
4038 [(set_attr "type" "neon_store4_one_lane<q>")]
4039 )
4040
4041 ;; Reload patterns for AdvSIMD register list operands.
4042
4043 (define_expand "mov<mode>"
4044 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "")
4045 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" ""))]
4046 "TARGET_SIMD"
4047 {
4048 if (can_create_pseudo_p ())
4049 {
4050 if (GET_CODE (operands[0]) != REG)
4051 operands[1] = force_reg (<MODE>mode, operands[1]);
4052 }
4053 })
4054
4055 (define_insn "*aarch64_mov<mode>"
4056 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4057 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4058 "TARGET_SIMD
4059 && (register_operand (operands[0], <MODE>mode)
4060 || register_operand (operands[1], <MODE>mode))"
4061
4062 {
4063 switch (which_alternative)
4064 {
4065 case 0: return "#";
4066 case 1: return "st1\\t{%S1.16b - %<Vendreg>1.16b}, %0";
4067 case 2: return "ld1\\t{%S0.16b - %<Vendreg>0.16b}, %1";
4068 default: gcc_unreachable ();
4069 }
4070 }
4071 [(set_attr "type" "neon_move,neon_store<nregs>_<nregs>reg_q,\
4072 neon_load<nregs>_<nregs>reg_q")
4073 (set (attr "length") (symbol_ref "aarch64_simd_attr_length_move (insn)"))]
4074 )
4075
4076 (define_insn "aarch64_be_ld1<mode>"
4077 [(set (match_operand:VALLDI 0 "register_operand" "=w")
4078 (unspec:VALLDI [(match_operand:VALLDI 1 "aarch64_simd_struct_operand" "Utv")]
4079 UNSPEC_LD1))]
4080 "TARGET_SIMD"
4081 "ld1\\t{%0<Vmtype>}, %1"
4082 [(set_attr "type" "neon_load1_1reg<q>")]
4083 )
4084
4085 (define_insn "aarch64_be_st1<mode>"
4086 [(set (match_operand:VALLDI 0 "aarch64_simd_struct_operand" "=Utv")
4087 (unspec:VALLDI [(match_operand:VALLDI 1 "register_operand" "w")]
4088 UNSPEC_ST1))]
4089 "TARGET_SIMD"
4090 "st1\\t{%1<Vmtype>}, %0"
4091 [(set_attr "type" "neon_store1_1reg<q>")]
4092 )
4093
4094 (define_split
4095 [(set (match_operand:OI 0 "register_operand" "")
4096 (match_operand:OI 1 "register_operand" ""))]
4097 "TARGET_SIMD && reload_completed"
4098 [(set (match_dup 0) (match_dup 1))
4099 (set (match_dup 2) (match_dup 3))]
4100 {
4101 int rdest = REGNO (operands[0]);
4102 int rsrc = REGNO (operands[1]);
4103 rtx dest[2], src[2];
4104
4105 dest[0] = gen_rtx_REG (TFmode, rdest);
4106 src[0] = gen_rtx_REG (TFmode, rsrc);
4107 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
4108 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
4109
4110 aarch64_simd_disambiguate_copy (operands, dest, src, 2);
4111 })
4112
4113 (define_split
4114 [(set (match_operand:CI 0 "register_operand" "")
4115 (match_operand:CI 1 "register_operand" ""))]
4116 "TARGET_SIMD && reload_completed"
4117 [(set (match_dup 0) (match_dup 1))
4118 (set (match_dup 2) (match_dup 3))
4119 (set (match_dup 4) (match_dup 5))]
4120 {
4121 int rdest = REGNO (operands[0]);
4122 int rsrc = REGNO (operands[1]);
4123 rtx dest[3], src[3];
4124
4125 dest[0] = gen_rtx_REG (TFmode, rdest);
4126 src[0] = gen_rtx_REG (TFmode, rsrc);
4127 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
4128 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
4129 dest[2] = gen_rtx_REG (TFmode, rdest + 2);
4130 src[2] = gen_rtx_REG (TFmode, rsrc + 2);
4131
4132 aarch64_simd_disambiguate_copy (operands, dest, src, 3);
4133 })
4134
4135 (define_split
4136 [(set (match_operand:XI 0 "register_operand" "")
4137 (match_operand:XI 1 "register_operand" ""))]
4138 "TARGET_SIMD && reload_completed"
4139 [(set (match_dup 0) (match_dup 1))
4140 (set (match_dup 2) (match_dup 3))
4141 (set (match_dup 4) (match_dup 5))
4142 (set (match_dup 6) (match_dup 7))]
4143 {
4144 int rdest = REGNO (operands[0]);
4145 int rsrc = REGNO (operands[1]);
4146 rtx dest[4], src[4];
4147
4148 dest[0] = gen_rtx_REG (TFmode, rdest);
4149 src[0] = gen_rtx_REG (TFmode, rsrc);
4150 dest[1] = gen_rtx_REG (TFmode, rdest + 1);
4151 src[1] = gen_rtx_REG (TFmode, rsrc + 1);
4152 dest[2] = gen_rtx_REG (TFmode, rdest + 2);
4153 src[2] = gen_rtx_REG (TFmode, rsrc + 2);
4154 dest[3] = gen_rtx_REG (TFmode, rdest + 3);
4155 src[3] = gen_rtx_REG (TFmode, rsrc + 3);
4156
4157 aarch64_simd_disambiguate_copy (operands, dest, src, 4);
4158 })
4159
4160 (define_expand "aarch64_ld2r<mode>"
4161 [(match_operand:OI 0 "register_operand" "=w")
4162 (match_operand:DI 1 "register_operand" "w")
4163 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4164 "TARGET_SIMD"
4165 {
4166 machine_mode mode = <V_TWO_ELEM>mode;
4167 rtx mem = gen_rtx_MEM (mode, operands[1]);
4168
4169 emit_insn (gen_aarch64_simd_ld2r<mode> (operands[0], mem));
4170 DONE;
4171 })
4172
4173 (define_expand "aarch64_ld3r<mode>"
4174 [(match_operand:CI 0 "register_operand" "=w")
4175 (match_operand:DI 1 "register_operand" "w")
4176 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4177 "TARGET_SIMD"
4178 {
4179 machine_mode mode = <V_THREE_ELEM>mode;
4180 rtx mem = gen_rtx_MEM (mode, operands[1]);
4181
4182 emit_insn (gen_aarch64_simd_ld3r<mode> (operands[0], mem));
4183 DONE;
4184 })
4185
4186 (define_expand "aarch64_ld4r<mode>"
4187 [(match_operand:XI 0 "register_operand" "=w")
4188 (match_operand:DI 1 "register_operand" "w")
4189 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4190 "TARGET_SIMD"
4191 {
4192 machine_mode mode = <V_FOUR_ELEM>mode;
4193 rtx mem = gen_rtx_MEM (mode, operands[1]);
4194
4195 emit_insn (gen_aarch64_simd_ld4r<mode> (operands[0],mem));
4196 DONE;
4197 })
4198
4199 (define_insn "aarch64_ld2<mode>_dreg"
4200 [(set (match_operand:OI 0 "register_operand" "=w")
4201 (subreg:OI
4202 (vec_concat:<VRL2>
4203 (vec_concat:<VDBL>
4204 (unspec:VD [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
4205 UNSPEC_LD2)
4206 (vec_duplicate:VD (const_int 0)))
4207 (vec_concat:<VDBL>
4208 (unspec:VD [(match_dup 1)]
4209 UNSPEC_LD2)
4210 (vec_duplicate:VD (const_int 0)))) 0))]
4211 "TARGET_SIMD"
4212 "ld2\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
4213 [(set_attr "type" "neon_load2_2reg<q>")]
4214 )
4215
4216 (define_insn "aarch64_ld2<mode>_dreg"
4217 [(set (match_operand:OI 0 "register_operand" "=w")
4218 (subreg:OI
4219 (vec_concat:<VRL2>
4220 (vec_concat:<VDBL>
4221 (unspec:DX [(match_operand:TI 1 "aarch64_simd_struct_operand" "Utv")]
4222 UNSPEC_LD2)
4223 (const_int 0))
4224 (vec_concat:<VDBL>
4225 (unspec:DX [(match_dup 1)]
4226 UNSPEC_LD2)
4227 (const_int 0))) 0))]
4228 "TARGET_SIMD"
4229 "ld1\\t{%S0.1d - %T0.1d}, %1"
4230 [(set_attr "type" "neon_load1_2reg<q>")]
4231 )
4232
4233 (define_insn "aarch64_ld3<mode>_dreg"
4234 [(set (match_operand:CI 0 "register_operand" "=w")
4235 (subreg:CI
4236 (vec_concat:<VRL3>
4237 (vec_concat:<VRL2>
4238 (vec_concat:<VDBL>
4239 (unspec:VD [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
4240 UNSPEC_LD3)
4241 (vec_duplicate:VD (const_int 0)))
4242 (vec_concat:<VDBL>
4243 (unspec:VD [(match_dup 1)]
4244 UNSPEC_LD3)
4245 (vec_duplicate:VD (const_int 0))))
4246 (vec_concat:<VDBL>
4247 (unspec:VD [(match_dup 1)]
4248 UNSPEC_LD3)
4249 (vec_duplicate:VD (const_int 0)))) 0))]
4250 "TARGET_SIMD"
4251 "ld3\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
4252 [(set_attr "type" "neon_load3_3reg<q>")]
4253 )
4254
4255 (define_insn "aarch64_ld3<mode>_dreg"
4256 [(set (match_operand:CI 0 "register_operand" "=w")
4257 (subreg:CI
4258 (vec_concat:<VRL3>
4259 (vec_concat:<VRL2>
4260 (vec_concat:<VDBL>
4261 (unspec:DX [(match_operand:EI 1 "aarch64_simd_struct_operand" "Utv")]
4262 UNSPEC_LD3)
4263 (const_int 0))
4264 (vec_concat:<VDBL>
4265 (unspec:DX [(match_dup 1)]
4266 UNSPEC_LD3)
4267 (const_int 0)))
4268 (vec_concat:<VDBL>
4269 (unspec:DX [(match_dup 1)]
4270 UNSPEC_LD3)
4271 (const_int 0))) 0))]
4272 "TARGET_SIMD"
4273 "ld1\\t{%S0.1d - %U0.1d}, %1"
4274 [(set_attr "type" "neon_load1_3reg<q>")]
4275 )
4276
4277 (define_insn "aarch64_ld4<mode>_dreg"
4278 [(set (match_operand:XI 0 "register_operand" "=w")
4279 (subreg:XI
4280 (vec_concat:<VRL4>
4281 (vec_concat:<VRL2>
4282 (vec_concat:<VDBL>
4283 (unspec:VD [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
4284 UNSPEC_LD4)
4285 (vec_duplicate:VD (const_int 0)))
4286 (vec_concat:<VDBL>
4287 (unspec:VD [(match_dup 1)]
4288 UNSPEC_LD4)
4289 (vec_duplicate:VD (const_int 0))))
4290 (vec_concat:<VRL2>
4291 (vec_concat:<VDBL>
4292 (unspec:VD [(match_dup 1)]
4293 UNSPEC_LD4)
4294 (vec_duplicate:VD (const_int 0)))
4295 (vec_concat:<VDBL>
4296 (unspec:VD [(match_dup 1)]
4297 UNSPEC_LD4)
4298 (vec_duplicate:VD (const_int 0))))) 0))]
4299 "TARGET_SIMD"
4300 "ld4\\t{%S0.<Vtype> - %V0.<Vtype>}, %1"
4301 [(set_attr "type" "neon_load4_4reg<q>")]
4302 )
4303
4304 (define_insn "aarch64_ld4<mode>_dreg"
4305 [(set (match_operand:XI 0 "register_operand" "=w")
4306 (subreg:XI
4307 (vec_concat:<VRL4>
4308 (vec_concat:<VRL2>
4309 (vec_concat:<VDBL>
4310 (unspec:DX [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")]
4311 UNSPEC_LD4)
4312 (const_int 0))
4313 (vec_concat:<VDBL>
4314 (unspec:DX [(match_dup 1)]
4315 UNSPEC_LD4)
4316 (const_int 0)))
4317 (vec_concat:<VRL2>
4318 (vec_concat:<VDBL>
4319 (unspec:DX [(match_dup 1)]
4320 UNSPEC_LD4)
4321 (const_int 0))
4322 (vec_concat:<VDBL>
4323 (unspec:DX [(match_dup 1)]
4324 UNSPEC_LD4)
4325 (const_int 0)))) 0))]
4326 "TARGET_SIMD"
4327 "ld1\\t{%S0.1d - %V0.1d}, %1"
4328 [(set_attr "type" "neon_load1_4reg<q>")]
4329 )
4330
4331 (define_expand "aarch64_ld<VSTRUCT:nregs><VDC:mode>"
4332 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4333 (match_operand:DI 1 "register_operand" "r")
4334 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4335 "TARGET_SIMD"
4336 {
4337 machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
4338 rtx mem = gen_rtx_MEM (mode, operands[1]);
4339
4340 emit_insn (gen_aarch64_ld<VSTRUCT:nregs><VDC:mode>_dreg (operands[0], mem));
4341 DONE;
4342 })
4343
4344 (define_expand "aarch64_ld1<VALL:mode>"
4345 [(match_operand:VALL 0 "register_operand")
4346 (match_operand:DI 1 "register_operand")]
4347 "TARGET_SIMD"
4348 {
4349 machine_mode mode = <VALL:MODE>mode;
4350 rtx mem = gen_rtx_MEM (mode, operands[1]);
4351
4352 if (BYTES_BIG_ENDIAN)
4353 emit_insn (gen_aarch64_be_ld1<VALL:mode> (operands[0], mem));
4354 else
4355 emit_move_insn (operands[0], mem);
4356 DONE;
4357 })
4358
4359 (define_expand "aarch64_ld<VSTRUCT:nregs><VQ:mode>"
4360 [(match_operand:VSTRUCT 0 "register_operand" "=w")
4361 (match_operand:DI 1 "register_operand" "r")
4362 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4363 "TARGET_SIMD"
4364 {
4365 machine_mode mode = <VSTRUCT:MODE>mode;
4366 rtx mem = gen_rtx_MEM (mode, operands[1]);
4367
4368 emit_insn (gen_vec_load_lanes<VSTRUCT:mode><VQ:mode> (operands[0], mem));
4369 DONE;
4370 })
4371
4372 (define_expand "aarch64_ld2_lane<mode>"
4373 [(match_operand:OI 0 "register_operand" "=w")
4374 (match_operand:DI 1 "register_operand" "w")
4375 (match_operand:OI 2 "register_operand" "0")
4376 (match_operand:SI 3 "immediate_operand" "i")
4377 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4378 "TARGET_SIMD"
4379 {
4380 machine_mode mode = <V_TWO_ELEM>mode;
4381 rtx mem = gen_rtx_MEM (mode, operands[1]);
4382
4383 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
4384 NULL);
4385 emit_insn (gen_aarch64_vec_load_lanesoi_lane<mode> (operands[0],
4386 mem,
4387 operands[2],
4388 operands[3]));
4389 DONE;
4390 })
4391
4392 (define_expand "aarch64_ld3_lane<mode>"
4393 [(match_operand:CI 0 "register_operand" "=w")
4394 (match_operand:DI 1 "register_operand" "w")
4395 (match_operand:CI 2 "register_operand" "0")
4396 (match_operand:SI 3 "immediate_operand" "i")
4397 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4398 "TARGET_SIMD"
4399 {
4400 machine_mode mode = <V_THREE_ELEM>mode;
4401 rtx mem = gen_rtx_MEM (mode, operands[1]);
4402
4403 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
4404 NULL);
4405 emit_insn (gen_aarch64_vec_load_lanesci_lane<mode> (operands[0],
4406 mem,
4407 operands[2],
4408 operands[3]));
4409 DONE;
4410 })
4411
4412 (define_expand "aarch64_ld4_lane<mode>"
4413 [(match_operand:XI 0 "register_operand" "=w")
4414 (match_operand:DI 1 "register_operand" "w")
4415 (match_operand:XI 2 "register_operand" "0")
4416 (match_operand:SI 3 "immediate_operand" "i")
4417 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4418 "TARGET_SIMD"
4419 {
4420 machine_mode mode = <V_FOUR_ELEM>mode;
4421 rtx mem = gen_rtx_MEM (mode, operands[1]);
4422
4423 aarch64_simd_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<VCONQ>mode),
4424 NULL);
4425 emit_insn (gen_aarch64_vec_load_lanesxi_lane<mode> (operands[0],
4426 mem,
4427 operands[2],
4428 operands[3]));
4429 DONE;
4430 })
4431
4432
4433
4434 ;; Expanders for builtins to extract vector registers from large
4435 ;; opaque integer modes.
4436
4437 ;; D-register list.
4438
4439 (define_expand "aarch64_get_dreg<VSTRUCT:mode><VDC:mode>"
4440 [(match_operand:VDC 0 "register_operand" "=w")
4441 (match_operand:VSTRUCT 1 "register_operand" "w")
4442 (match_operand:SI 2 "immediate_operand" "i")]
4443 "TARGET_SIMD"
4444 {
4445 int part = INTVAL (operands[2]);
4446 rtx temp = gen_reg_rtx (<VDC:VDBL>mode);
4447 int offset = part * 16;
4448
4449 emit_move_insn (temp, gen_rtx_SUBREG (<VDC:VDBL>mode, operands[1], offset));
4450 emit_move_insn (operands[0], gen_lowpart (<VDC:MODE>mode, temp));
4451 DONE;
4452 })
4453
4454 ;; Q-register list.
4455
4456 (define_expand "aarch64_get_qreg<VSTRUCT:mode><VQ:mode>"
4457 [(match_operand:VQ 0 "register_operand" "=w")
4458 (match_operand:VSTRUCT 1 "register_operand" "w")
4459 (match_operand:SI 2 "immediate_operand" "i")]
4460 "TARGET_SIMD"
4461 {
4462 int part = INTVAL (operands[2]);
4463 int offset = part * 16;
4464
4465 emit_move_insn (operands[0],
4466 gen_rtx_SUBREG (<VQ:MODE>mode, operands[1], offset));
4467 DONE;
4468 })
4469
4470 ;; Permuted-store expanders for neon intrinsics.
4471
4472 ;; Permute instructions
4473
4474 ;; vec_perm support
4475
4476 (define_expand "vec_perm_const<mode>"
4477 [(match_operand:VALL 0 "register_operand")
4478 (match_operand:VALL 1 "register_operand")
4479 (match_operand:VALL 2 "register_operand")
4480 (match_operand:<V_cmp_result> 3)]
4481 "TARGET_SIMD"
4482 {
4483 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
4484 operands[2], operands[3]))
4485 DONE;
4486 else
4487 FAIL;
4488 })
4489
4490 (define_expand "vec_perm<mode>"
4491 [(match_operand:VB 0 "register_operand")
4492 (match_operand:VB 1 "register_operand")
4493 (match_operand:VB 2 "register_operand")
4494 (match_operand:VB 3 "register_operand")]
4495 "TARGET_SIMD"
4496 {
4497 aarch64_expand_vec_perm (operands[0], operands[1],
4498 operands[2], operands[3]);
4499 DONE;
4500 })
4501
4502 (define_insn "aarch64_tbl1<mode>"
4503 [(set (match_operand:VB 0 "register_operand" "=w")
4504 (unspec:VB [(match_operand:V16QI 1 "register_operand" "w")
4505 (match_operand:VB 2 "register_operand" "w")]
4506 UNSPEC_TBL))]
4507 "TARGET_SIMD"
4508 "tbl\\t%0.<Vtype>, {%1.16b}, %2.<Vtype>"
4509 [(set_attr "type" "neon_tbl1<q>")]
4510 )
4511
4512 ;; Two source registers.
4513
4514 (define_insn "aarch64_tbl2v16qi"
4515 [(set (match_operand:V16QI 0 "register_operand" "=w")
4516 (unspec:V16QI [(match_operand:OI 1 "register_operand" "w")
4517 (match_operand:V16QI 2 "register_operand" "w")]
4518 UNSPEC_TBL))]
4519 "TARGET_SIMD"
4520 "tbl\\t%0.16b, {%S1.16b - %T1.16b}, %2.16b"
4521 [(set_attr "type" "neon_tbl2_q")]
4522 )
4523
4524 (define_insn_and_split "aarch64_combinev16qi"
4525 [(set (match_operand:OI 0 "register_operand" "=w")
4526 (unspec:OI [(match_operand:V16QI 1 "register_operand" "w")
4527 (match_operand:V16QI 2 "register_operand" "w")]
4528 UNSPEC_CONCAT))]
4529 "TARGET_SIMD"
4530 "#"
4531 "&& reload_completed"
4532 [(const_int 0)]
4533 {
4534 aarch64_split_combinev16qi (operands);
4535 DONE;
4536 }
4537 [(set_attr "type" "multiple")]
4538 )
4539
4540 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
4541 [(set (match_operand:VALL 0 "register_operand" "=w")
4542 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
4543 (match_operand:VALL 2 "register_operand" "w")]
4544 PERMUTE))]
4545 "TARGET_SIMD"
4546 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
4547 [(set_attr "type" "neon_permute<q>")]
4548 )
4549
4550 ;; Note immediate (third) operand is lane index not byte index.
4551 (define_insn "aarch64_ext<mode>"
4552 [(set (match_operand:VALL 0 "register_operand" "=w")
4553 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")
4554 (match_operand:VALL 2 "register_operand" "w")
4555 (match_operand:SI 3 "immediate_operand" "i")]
4556 UNSPEC_EXT))]
4557 "TARGET_SIMD"
4558 {
4559 operands[3] = GEN_INT (INTVAL (operands[3])
4560 * GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)));
4561 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
4562 }
4563 [(set_attr "type" "neon_ext<q>")]
4564 )
4565
4566 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
4567 [(set (match_operand:VALL 0 "register_operand" "=w")
4568 (unspec:VALL [(match_operand:VALL 1 "register_operand" "w")]
4569 REVERSE))]
4570 "TARGET_SIMD"
4571 "rev<REVERSE:rev_op>\\t%0.<Vtype>, %1.<Vtype>"
4572 [(set_attr "type" "neon_rev<q>")]
4573 )
4574
4575 (define_insn "aarch64_st2<mode>_dreg"
4576 [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
4577 (unspec:TI [(match_operand:OI 1 "register_operand" "w")
4578 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4579 UNSPEC_ST2))]
4580 "TARGET_SIMD"
4581 "st2\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
4582 [(set_attr "type" "neon_store2_2reg")]
4583 )
4584
4585 (define_insn "aarch64_st2<mode>_dreg"
4586 [(set (match_operand:TI 0 "aarch64_simd_struct_operand" "=Utv")
4587 (unspec:TI [(match_operand:OI 1 "register_operand" "w")
4588 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4589 UNSPEC_ST2))]
4590 "TARGET_SIMD"
4591 "st1\\t{%S1.1d - %T1.1d}, %0"
4592 [(set_attr "type" "neon_store1_2reg")]
4593 )
4594
4595 (define_insn "aarch64_st3<mode>_dreg"
4596 [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
4597 (unspec:EI [(match_operand:CI 1 "register_operand" "w")
4598 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4599 UNSPEC_ST3))]
4600 "TARGET_SIMD"
4601 "st3\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
4602 [(set_attr "type" "neon_store3_3reg")]
4603 )
4604
4605 (define_insn "aarch64_st3<mode>_dreg"
4606 [(set (match_operand:EI 0 "aarch64_simd_struct_operand" "=Utv")
4607 (unspec:EI [(match_operand:CI 1 "register_operand" "w")
4608 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4609 UNSPEC_ST3))]
4610 "TARGET_SIMD"
4611 "st1\\t{%S1.1d - %U1.1d}, %0"
4612 [(set_attr "type" "neon_store1_3reg")]
4613 )
4614
4615 (define_insn "aarch64_st4<mode>_dreg"
4616 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4617 (unspec:OI [(match_operand:XI 1 "register_operand" "w")
4618 (unspec:VD [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4619 UNSPEC_ST4))]
4620 "TARGET_SIMD"
4621 "st4\\t{%S1.<Vtype> - %V1.<Vtype>}, %0"
4622 [(set_attr "type" "neon_store4_4reg")]
4623 )
4624
4625 (define_insn "aarch64_st4<mode>_dreg"
4626 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
4627 (unspec:OI [(match_operand:XI 1 "register_operand" "w")
4628 (unspec:DX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4629 UNSPEC_ST4))]
4630 "TARGET_SIMD"
4631 "st1\\t{%S1.1d - %V1.1d}, %0"
4632 [(set_attr "type" "neon_store1_4reg")]
4633 )
4634
4635 (define_expand "aarch64_st<VSTRUCT:nregs><VDC:mode>"
4636 [(match_operand:DI 0 "register_operand" "r")
4637 (match_operand:VSTRUCT 1 "register_operand" "w")
4638 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4639 "TARGET_SIMD"
4640 {
4641 machine_mode mode = <VSTRUCT:VSTRUCT_DREG>mode;
4642 rtx mem = gen_rtx_MEM (mode, operands[0]);
4643
4644 emit_insn (gen_aarch64_st<VSTRUCT:nregs><VDC:mode>_dreg (mem, operands[1]));
4645 DONE;
4646 })
4647
4648 (define_expand "aarch64_st<VSTRUCT:nregs><VQ:mode>"
4649 [(match_operand:DI 0 "register_operand" "r")
4650 (match_operand:VSTRUCT 1 "register_operand" "w")
4651 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4652 "TARGET_SIMD"
4653 {
4654 machine_mode mode = <VSTRUCT:MODE>mode;
4655 rtx mem = gen_rtx_MEM (mode, operands[0]);
4656
4657 emit_insn (gen_vec_store_lanes<VSTRUCT:mode><VQ:mode> (mem, operands[1]));
4658 DONE;
4659 })
4660
4661 (define_expand "aarch64_st2_lane<VQ:mode>"
4662 [(match_operand:DI 0 "register_operand" "r")
4663 (match_operand:OI 1 "register_operand" "w")
4664 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4665 (match_operand:SI 2 "immediate_operand")]
4666 "TARGET_SIMD"
4667 {
4668 machine_mode mode = <V_TWO_ELEM>mode;
4669 rtx mem = gen_rtx_MEM (mode, operands[0]);
4670 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4671
4672 emit_insn (gen_vec_store_lanesoi_lane<VQ:mode> (mem,
4673 operands[1],
4674 operands[2]));
4675 DONE;
4676 })
4677
4678 (define_expand "aarch64_st3_lane<VQ:mode>"
4679 [(match_operand:DI 0 "register_operand" "r")
4680 (match_operand:CI 1 "register_operand" "w")
4681 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4682 (match_operand:SI 2 "immediate_operand")]
4683 "TARGET_SIMD"
4684 {
4685 machine_mode mode = <V_THREE_ELEM>mode;
4686 rtx mem = gen_rtx_MEM (mode, operands[0]);
4687 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4688
4689 emit_insn (gen_vec_store_lanesci_lane<VQ:mode> (mem,
4690 operands[1],
4691 operands[2]));
4692 DONE;
4693 })
4694
4695 (define_expand "aarch64_st4_lane<VQ:mode>"
4696 [(match_operand:DI 0 "register_operand" "r")
4697 (match_operand:XI 1 "register_operand" "w")
4698 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4699 (match_operand:SI 2 "immediate_operand")]
4700 "TARGET_SIMD"
4701 {
4702 machine_mode mode = <V_FOUR_ELEM>mode;
4703 rtx mem = gen_rtx_MEM (mode, operands[0]);
4704 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])));
4705
4706 emit_insn (gen_vec_store_lanesxi_lane<VQ:mode> (mem,
4707 operands[1],
4708 operands[2]));
4709 DONE;
4710 })
4711
4712 (define_expand "aarch64_st1<VALL:mode>"
4713 [(match_operand:DI 0 "register_operand")
4714 (match_operand:VALL 1 "register_operand")]
4715 "TARGET_SIMD"
4716 {
4717 machine_mode mode = <VALL:MODE>mode;
4718 rtx mem = gen_rtx_MEM (mode, operands[0]);
4719
4720 if (BYTES_BIG_ENDIAN)
4721 emit_insn (gen_aarch64_be_st1<VALL:mode> (mem, operands[1]));
4722 else
4723 emit_move_insn (mem, operands[1]);
4724 DONE;
4725 })
4726
4727 ;; Expander for builtins to insert vector registers into large
4728 ;; opaque integer modes.
4729
4730 ;; Q-register list. We don't need a D-reg inserter as we zero
4731 ;; extend them in arm_neon.h and insert the resulting Q-regs.
4732
4733 (define_expand "aarch64_set_qreg<VSTRUCT:mode><VQ:mode>"
4734 [(match_operand:VSTRUCT 0 "register_operand" "+w")
4735 (match_operand:VSTRUCT 1 "register_operand" "0")
4736 (match_operand:VQ 2 "register_operand" "w")
4737 (match_operand:SI 3 "immediate_operand" "i")]
4738 "TARGET_SIMD"
4739 {
4740 int part = INTVAL (operands[3]);
4741 int offset = part * 16;
4742
4743 emit_move_insn (operands[0], operands[1]);
4744 emit_move_insn (gen_rtx_SUBREG (<VQ:MODE>mode, operands[0], offset),
4745 operands[2]);
4746 DONE;
4747 })
4748
4749 ;; Standard pattern name vec_init<mode>.
4750
4751 (define_expand "vec_init<mode>"
4752 [(match_operand:VALL 0 "register_operand" "")
4753 (match_operand 1 "" "")]
4754 "TARGET_SIMD"
4755 {
4756 aarch64_expand_vector_init (operands[0], operands[1]);
4757 DONE;
4758 })
4759
4760 (define_insn "*aarch64_simd_ld1r<mode>"
4761 [(set (match_operand:VALL 0 "register_operand" "=w")
4762 (vec_duplicate:VALL
4763 (match_operand:<VEL> 1 "aarch64_simd_struct_operand" "Utv")))]
4764 "TARGET_SIMD"
4765 "ld1r\\t{%0.<Vtype>}, %1"
4766 [(set_attr "type" "neon_load1_all_lanes")]
4767 )
4768
4769 (define_insn "aarch64_frecpe<mode>"
4770 [(set (match_operand:VDQF 0 "register_operand" "=w")
4771 (unspec:VDQF [(match_operand:VDQF 1 "register_operand" "w")]
4772 UNSPEC_FRECPE))]
4773 "TARGET_SIMD"
4774 "frecpe\\t%0.<Vtype>, %1.<Vtype>"
4775 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")]
4776 )
4777
4778 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>"
4779 [(set (match_operand:GPF 0 "register_operand" "=w")
4780 (unspec:GPF [(match_operand:GPF 1 "register_operand" "w")]
4781 FRECP))]
4782 "TARGET_SIMD"
4783 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1"
4784 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF:Vetype><GPF:q>")]
4785 )
4786
4787 (define_insn "aarch64_frecps<mode>"
4788 [(set (match_operand:VALLF 0 "register_operand" "=w")
4789 (unspec:VALLF [(match_operand:VALLF 1 "register_operand" "w")
4790 (match_operand:VALLF 2 "register_operand" "w")]
4791 UNSPEC_FRECPS))]
4792 "TARGET_SIMD"
4793 "frecps\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
4794 [(set_attr "type" "neon_fp_recps_<Vetype><q>")]
4795 )
4796
4797 (define_insn "aarch64_urecpe<mode>"
4798 [(set (match_operand:VDQ_SI 0 "register_operand" "=w")
4799 (unspec:VDQ_SI [(match_operand:VDQ_SI 1 "register_operand" "w")]
4800 UNSPEC_URECPE))]
4801 "TARGET_SIMD"
4802 "urecpe\\t%0.<Vtype>, %1.<Vtype>"
4803 [(set_attr "type" "neon_fp_recpe_<Vetype><q>")])
4804
4805 ;; Standard pattern name vec_extract<mode>.
4806
4807 (define_expand "vec_extract<mode>"
4808 [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "")
4809 (match_operand:VALL 1 "register_operand" "")
4810 (match_operand:SI 2 "immediate_operand" "")]
4811 "TARGET_SIMD"
4812 {
4813 emit_insn
4814 (gen_aarch64_get_lane<mode> (operands[0], operands[1], operands[2]));
4815 DONE;
4816 })
4817
4818 ;; aes
4819
4820 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
4821 [(set (match_operand:V16QI 0 "register_operand" "=w")
4822 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
4823 (match_operand:V16QI 2 "register_operand" "w")]
4824 CRYPTO_AES))]
4825 "TARGET_SIMD && TARGET_CRYPTO"
4826 "aes<aes_op>\\t%0.16b, %2.16b"
4827 [(set_attr "type" "crypto_aese")]
4828 )
4829
4830 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
4831 [(set (match_operand:V16QI 0 "register_operand" "=w")
4832 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "w")]
4833 CRYPTO_AESMC))]
4834 "TARGET_SIMD && TARGET_CRYPTO"
4835 "aes<aesmc_op>\\t%0.16b, %1.16b"
4836 [(set_attr "type" "crypto_aesmc")]
4837 )
4838
4839 ;; sha1
4840
4841 (define_insn "aarch64_crypto_sha1hsi"
4842 [(set (match_operand:SI 0 "register_operand" "=w")
4843 (unspec:SI [(match_operand:SI 1
4844 "register_operand" "w")]
4845 UNSPEC_SHA1H))]
4846 "TARGET_SIMD && TARGET_CRYPTO"
4847 "sha1h\\t%s0, %s1"
4848 [(set_attr "type" "crypto_sha1_fast")]
4849 )
4850
4851 (define_insn "aarch64_crypto_sha1su1v4si"
4852 [(set (match_operand:V4SI 0 "register_operand" "=w")
4853 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4854 (match_operand:V4SI 2 "register_operand" "w")]
4855 UNSPEC_SHA1SU1))]
4856 "TARGET_SIMD && TARGET_CRYPTO"
4857 "sha1su1\\t%0.4s, %2.4s"
4858 [(set_attr "type" "crypto_sha1_fast")]
4859 )
4860
4861 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
4862 [(set (match_operand:V4SI 0 "register_operand" "=w")
4863 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4864 (match_operand:SI 2 "register_operand" "w")
4865 (match_operand:V4SI 3 "register_operand" "w")]
4866 CRYPTO_SHA1))]
4867 "TARGET_SIMD && TARGET_CRYPTO"
4868 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
4869 [(set_attr "type" "crypto_sha1_slow")]
4870 )
4871
4872 (define_insn "aarch64_crypto_sha1su0v4si"
4873 [(set (match_operand:V4SI 0 "register_operand" "=w")
4874 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4875 (match_operand:V4SI 2 "register_operand" "w")
4876 (match_operand:V4SI 3 "register_operand" "w")]
4877 UNSPEC_SHA1SU0))]
4878 "TARGET_SIMD && TARGET_CRYPTO"
4879 "sha1su0\\t%0.4s, %2.4s, %3.4s"
4880 [(set_attr "type" "crypto_sha1_xor")]
4881 )
4882
4883 ;; sha256
4884
4885 (define_insn "aarch64_crypto_sha256h<sha256_op>v4si"
4886 [(set (match_operand:V4SI 0 "register_operand" "=w")
4887 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4888 (match_operand:V4SI 2 "register_operand" "w")
4889 (match_operand:V4SI 3 "register_operand" "w")]
4890 CRYPTO_SHA256))]
4891 "TARGET_SIMD && TARGET_CRYPTO"
4892 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
4893 [(set_attr "type" "crypto_sha256_slow")]
4894 )
4895
4896 (define_insn "aarch64_crypto_sha256su0v4si"
4897 [(set (match_operand:V4SI 0 "register_operand" "=w")
4898 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4899 (match_operand:V4SI 2 "register_operand" "w")]
4900 UNSPEC_SHA256SU0))]
4901 "TARGET_SIMD &&TARGET_CRYPTO"
4902 "sha256su0\\t%0.4s, %2.4s"
4903 [(set_attr "type" "crypto_sha256_fast")]
4904 )
4905
4906 (define_insn "aarch64_crypto_sha256su1v4si"
4907 [(set (match_operand:V4SI 0 "register_operand" "=w")
4908 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
4909 (match_operand:V4SI 2 "register_operand" "w")
4910 (match_operand:V4SI 3 "register_operand" "w")]
4911 UNSPEC_SHA256SU1))]
4912 "TARGET_SIMD &&TARGET_CRYPTO"
4913 "sha256su1\\t%0.4s, %2.4s, %3.4s"
4914 [(set_attr "type" "crypto_sha256_slow")]
4915 )
4916
4917 ;; pmull
4918
4919 (define_insn "aarch64_crypto_pmulldi"
4920 [(set (match_operand:TI 0 "register_operand" "=w")
4921 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
4922 (match_operand:DI 2 "register_operand" "w")]
4923 UNSPEC_PMULL))]
4924 "TARGET_SIMD && TARGET_CRYPTO"
4925 "pmull\\t%0.1q, %1.1d, %2.1d"
4926 [(set_attr "type" "neon_mul_d_long")]
4927 )
4928
4929 (define_insn "aarch64_crypto_pmullv2di"
4930 [(set (match_operand:TI 0 "register_operand" "=w")
4931 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
4932 (match_operand:V2DI 2 "register_operand" "w")]
4933 UNSPEC_PMULL2))]
4934 "TARGET_SIMD && TARGET_CRYPTO"
4935 "pmull2\\t%0.1q, %1.2d, %2.2d"
4936 [(set_attr "type" "neon_mul_d_long")]
4937 )