1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r, ?Us,*r")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, Usi,r,*r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
61 default: return output_move_double (operands, true, NULL);
64 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
65 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,\
66 neon_load1_2reg, neon_store1_2reg, multiple")
67 (set_attr "length" "4,4,4,4,4,4,8,8,8")
68 (set_attr "arm_pool_range" "*,*,*,1020,*,*,1020,*,*")
69 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,1018,*,*")
70 (set_attr "neg_pool_range" "*,*,*,1004,*,*,1004,*,*")])
72 (define_insn "*neon_mov<mode>"
73 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
74 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
75 (match_operand:VQXMOV 1 "general_operand"
76 " w,w, Dn,Uni, w, r, r, Usi, r"))]
78 && (register_operand (operands[0], <MODE>mode)
79 || register_operand (operands[1], <MODE>mode))"
81 if (which_alternative == 2)
84 static char templ[40];
86 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
87 &operands[1], &width);
89 gcc_assert (is_valid != 0);
92 return "vmov.f32\t%q0, %1 @ <mode>";
94 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
99 switch (which_alternative)
101 case 0: return "vmov\t%q0, %q1 @ <mode>";
102 case 1: case 3: return output_move_neon (operands);
103 case 2: gcc_unreachable ();
104 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
105 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
106 default: return output_move_quad (operands);
109 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
110 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
111 mov_reg,neon_load1_4reg,neon_store1_4reg")
112 (set_attr "length" "4,8,4,8,8,8,16,8,16")
113 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
114 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
115 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
117 /* We define these mov expanders to match the standard mov$a optab to prevent
118 the mid-end from trying to do a subreg for these modes which is the most
119 inefficient way to expand the move. Also big-endian subreg's aren't
120 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
121 Without these RTL generation patterns the mid-end would attempt to take a
122 sub-reg and may ICE if it can't. */
124 (define_expand "movti"
125 [(set (match_operand:TI 0 "nonimmediate_operand" "")
126 (match_operand:TI 1 "general_operand" ""))]
129 if (can_create_pseudo_p ())
131 if (!REG_P (operands[0]))
132 operands[1] = force_reg (TImode, operands[1]);
136 (define_expand "mov<mode>"
137 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
138 (match_operand:VSTRUCT 1 "general_operand" ""))]
141 if (can_create_pseudo_p ())
143 if (!REG_P (operands[0]))
144 operands[1] = force_reg (<MODE>mode, operands[1]);
148 (define_expand "mov<mode>"
149 [(set (match_operand:VH 0 "s_register_operand")
150 (match_operand:VH 1 "s_register_operand"))]
153 if (can_create_pseudo_p ())
155 if (!REG_P (operands[0]))
156 operands[1] = force_reg (<MODE>mode, operands[1]);
160 (define_insn "*neon_mov<mode>"
161 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
162 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
164 && (register_operand (operands[0], <MODE>mode)
165 || register_operand (operands[1], <MODE>mode))"
167 switch (which_alternative)
170 case 1: case 2: return output_move_neon (operands);
171 default: gcc_unreachable ();
174 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
175 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
178 [(set (match_operand:EI 0 "s_register_operand" "")
179 (match_operand:EI 1 "s_register_operand" ""))]
180 "TARGET_NEON && reload_completed"
181 [(set (match_dup 0) (match_dup 1))
182 (set (match_dup 2) (match_dup 3))]
184 int rdest = REGNO (operands[0]);
185 int rsrc = REGNO (operands[1]);
188 dest[0] = gen_rtx_REG (TImode, rdest);
189 src[0] = gen_rtx_REG (TImode, rsrc);
190 dest[1] = gen_rtx_REG (DImode, rdest + 4);
191 src[1] = gen_rtx_REG (DImode, rsrc + 4);
193 neon_disambiguate_copy (operands, dest, src, 2);
197 [(set (match_operand:OI 0 "s_register_operand" "")
198 (match_operand:OI 1 "s_register_operand" ""))]
199 "TARGET_NEON && reload_completed"
200 [(set (match_dup 0) (match_dup 1))
201 (set (match_dup 2) (match_dup 3))]
203 int rdest = REGNO (operands[0]);
204 int rsrc = REGNO (operands[1]);
207 dest[0] = gen_rtx_REG (TImode, rdest);
208 src[0] = gen_rtx_REG (TImode, rsrc);
209 dest[1] = gen_rtx_REG (TImode, rdest + 4);
210 src[1] = gen_rtx_REG (TImode, rsrc + 4);
212 neon_disambiguate_copy (operands, dest, src, 2);
216 [(set (match_operand:CI 0 "s_register_operand" "")
217 (match_operand:CI 1 "s_register_operand" ""))]
218 "TARGET_NEON && reload_completed"
219 [(set (match_dup 0) (match_dup 1))
220 (set (match_dup 2) (match_dup 3))
221 (set (match_dup 4) (match_dup 5))]
223 int rdest = REGNO (operands[0]);
224 int rsrc = REGNO (operands[1]);
227 dest[0] = gen_rtx_REG (TImode, rdest);
228 src[0] = gen_rtx_REG (TImode, rsrc);
229 dest[1] = gen_rtx_REG (TImode, rdest + 4);
230 src[1] = gen_rtx_REG (TImode, rsrc + 4);
231 dest[2] = gen_rtx_REG (TImode, rdest + 8);
232 src[2] = gen_rtx_REG (TImode, rsrc + 8);
234 neon_disambiguate_copy (operands, dest, src, 3);
238 [(set (match_operand:XI 0 "s_register_operand" "")
239 (match_operand:XI 1 "s_register_operand" ""))]
240 "TARGET_NEON && reload_completed"
241 [(set (match_dup 0) (match_dup 1))
242 (set (match_dup 2) (match_dup 3))
243 (set (match_dup 4) (match_dup 5))
244 (set (match_dup 6) (match_dup 7))]
246 int rdest = REGNO (operands[0]);
247 int rsrc = REGNO (operands[1]);
250 dest[0] = gen_rtx_REG (TImode, rdest);
251 src[0] = gen_rtx_REG (TImode, rsrc);
252 dest[1] = gen_rtx_REG (TImode, rdest + 4);
253 src[1] = gen_rtx_REG (TImode, rsrc + 4);
254 dest[2] = gen_rtx_REG (TImode, rdest + 8);
255 src[2] = gen_rtx_REG (TImode, rsrc + 8);
256 dest[3] = gen_rtx_REG (TImode, rdest + 12);
257 src[3] = gen_rtx_REG (TImode, rsrc + 12);
259 neon_disambiguate_copy (operands, dest, src, 4);
262 (define_expand "movmisalign<mode>"
263 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
264 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
265 UNSPEC_MISALIGNED_ACCESS))]
266 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
269 /* This pattern is not permitted to fail during expansion: if both arguments
270 are non-registers (e.g. memory := constant, which can be created by the
271 auto-vectorizer), force operand 1 into a register. */
272 if (!s_register_operand (operands[0], <MODE>mode)
273 && !s_register_operand (operands[1], <MODE>mode))
274 operands[1] = force_reg (<MODE>mode, operands[1]);
276 if (s_register_operand (operands[0], <MODE>mode))
277 adjust_mem = operands[1];
279 adjust_mem = operands[0];
281 /* Legitimize address. */
282 if (!neon_vector_mem_operand (adjust_mem, 2, true))
283 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
287 (define_insn "*movmisalign<mode>_neon_store"
288 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
289 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
290 UNSPEC_MISALIGNED_ACCESS))]
291 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
292 "vst1.<V_sz_elem>\t{%P1}, %A0"
293 [(set_attr "type" "neon_store1_1reg<q>")])
295 (define_insn "*movmisalign<mode>_neon_load"
296 [(set (match_operand:VDX 0 "s_register_operand" "=w")
297 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
299 UNSPEC_MISALIGNED_ACCESS))]
300 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
301 "vld1.<V_sz_elem>\t{%P0}, %A1"
302 [(set_attr "type" "neon_load1_1reg<q>")])
304 (define_insn "*movmisalign<mode>_neon_store"
305 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
306 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
307 UNSPEC_MISALIGNED_ACCESS))]
308 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
309 "vst1.<V_sz_elem>\t{%q1}, %A0"
310 [(set_attr "type" "neon_store1_1reg<q>")])
312 (define_insn "*movmisalign<mode>_neon_load"
313 [(set (match_operand:VQX 0 "s_register_operand" "=w")
314 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
316 UNSPEC_MISALIGNED_ACCESS))]
317 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
318 "vld1.<V_sz_elem>\t{%q0}, %A1"
319 [(set_attr "type" "neon_load1_1reg<q>")])
321 (define_insn "vec_set<mode>_internal"
322 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
324 (vec_duplicate:VD_LANE
325 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
326 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
327 (match_operand:SI 2 "immediate_operand" "i,i")))]
330 int elt = ffs ((int) INTVAL (operands[2])) - 1;
331 if (BYTES_BIG_ENDIAN)
332 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
333 operands[2] = GEN_INT (elt);
335 if (which_alternative == 0)
336 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
338 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
340 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
342 (define_insn "vec_set<mode>_internal"
343 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
346 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
347 (match_operand:VQ2 3 "s_register_operand" "0,0")
348 (match_operand:SI 2 "immediate_operand" "i,i")))]
351 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
352 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
353 int elt = elem % half_elts;
354 int hi = (elem / half_elts) * 2;
355 int regno = REGNO (operands[0]);
357 if (BYTES_BIG_ENDIAN)
358 elt = half_elts - 1 - elt;
360 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
361 operands[2] = GEN_INT (elt);
363 if (which_alternative == 0)
364 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
366 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
368 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
371 (define_insn "vec_setv2di_internal"
372 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
375 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
376 (match_operand:V2DI 3 "s_register_operand" "0,0")
377 (match_operand:SI 2 "immediate_operand" "i,i")))]
380 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
381 int regno = REGNO (operands[0]) + 2 * elem;
383 operands[0] = gen_rtx_REG (DImode, regno);
385 if (which_alternative == 0)
386 return "vld1.64\t%P0, %A1";
388 return "vmov\t%P0, %Q1, %R1";
390 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
393 (define_expand "vec_set<mode>"
394 [(match_operand:VDQ 0 "s_register_operand" "")
395 (match_operand:<V_elem> 1 "s_register_operand" "")
396 (match_operand:SI 2 "immediate_operand" "")]
399 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
400 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
401 GEN_INT (elem), operands[0]));
405 (define_insn "vec_extract<mode><V_elem_l>"
406 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
408 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
409 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
412 if (BYTES_BIG_ENDIAN)
414 int elt = INTVAL (operands[2]);
415 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
416 operands[2] = GEN_INT (elt);
419 if (which_alternative == 0)
420 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
422 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
424 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
427 (define_insn "vec_extract<mode><V_elem_l>"
428 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
430 (match_operand:VQ2 1 "s_register_operand" "w,w")
431 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
434 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
435 int elt = INTVAL (operands[2]) % half_elts;
436 int hi = (INTVAL (operands[2]) / half_elts) * 2;
437 int regno = REGNO (operands[1]);
439 if (BYTES_BIG_ENDIAN)
440 elt = half_elts - 1 - elt;
442 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
443 operands[2] = GEN_INT (elt);
445 if (which_alternative == 0)
446 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
448 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
450 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
453 (define_insn "vec_extractv2didi"
454 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
456 (match_operand:V2DI 1 "s_register_operand" "w,w")
457 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
460 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
462 operands[1] = gen_rtx_REG (DImode, regno);
464 if (which_alternative == 0)
465 return "vst1.64\t{%P1}, %A0 @ v2di";
467 return "vmov\t%Q0, %R0, %P1 @ v2di";
469 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
472 (define_expand "vec_init<mode><V_elem_l>"
473 [(match_operand:VDQ 0 "s_register_operand" "")
474 (match_operand 1 "" "")]
477 neon_expand_vector_init (operands[0], operands[1]);
481 ;; Doubleword and quadword arithmetic.
483 ;; NOTE: some other instructions also support 64-bit integer
484 ;; element size, which we could potentially use for "long long" operations.
486 (define_insn "*add<mode>3_neon"
487 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
488 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
489 (match_operand:VDQ 2 "s_register_operand" "w")))]
490 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
491 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
493 (if_then_else (match_test "<Is_float_mode>")
494 (const_string "neon_fp_addsub_s<q>")
495 (const_string "neon_add<q>")))]
498 ;; As with SFmode, full support for HFmode vector arithmetic is only available
499 ;; when flag-unsafe-math-optimizations is enabled.
501 (define_insn "add<mode>3"
503 (match_operand:VH 0 "s_register_operand" "=w")
505 (match_operand:VH 1 "s_register_operand" "w")
506 (match_operand:VH 2 "s_register_operand" "w")))]
507 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
508 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
510 (if_then_else (match_test "<Is_float_mode>")
511 (const_string "neon_fp_addsub_s<q>")
512 (const_string "neon_add<q>")))]
515 (define_insn "add<mode>3_fp16"
517 (match_operand:VH 0 "s_register_operand" "=w")
519 (match_operand:VH 1 "s_register_operand" "w")
520 (match_operand:VH 2 "s_register_operand" "w")))]
521 "TARGET_NEON_FP16INST"
522 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
524 (if_then_else (match_test "<Is_float_mode>")
525 (const_string "neon_fp_addsub_s<q>")
526 (const_string "neon_add<q>")))]
529 (define_insn "adddi3_neon"
530 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
531 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
532 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
533 (clobber (reg:CC CC_REGNUM))]
536 switch (which_alternative)
538 case 0: /* fall through */
539 case 3: return "vadd.i64\t%P0, %P1, %P2";
545 default: gcc_unreachable ();
548 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
549 multiple,multiple,multiple")
550 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
551 (set_attr "length" "*,8,8,*,8,8,8")
552 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
555 (define_insn "*sub<mode>3_neon"
556 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
557 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
558 (match_operand:VDQ 2 "s_register_operand" "w")))]
559 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
560 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
562 (if_then_else (match_test "<Is_float_mode>")
563 (const_string "neon_fp_addsub_s<q>")
564 (const_string "neon_sub<q>")))]
567 (define_insn "sub<mode>3"
569 (match_operand:VH 0 "s_register_operand" "=w")
571 (match_operand:VH 1 "s_register_operand" "w")
572 (match_operand:VH 2 "s_register_operand" "w")))]
573 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
574 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
575 [(set_attr "type" "neon_sub<q>")]
578 (define_insn "sub<mode>3_fp16"
580 (match_operand:VH 0 "s_register_operand" "=w")
582 (match_operand:VH 1 "s_register_operand" "w")
583 (match_operand:VH 2 "s_register_operand" "w")))]
584 "TARGET_NEON_FP16INST"
585 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
586 [(set_attr "type" "neon_sub<q>")]
589 (define_insn "subdi3_neon"
590 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
591 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
592 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
593 (clobber (reg:CC CC_REGNUM))]
596 switch (which_alternative)
598 case 0: /* fall through */
599 case 4: return "vsub.i64\t%P0, %P1, %P2";
600 case 1: /* fall through */
601 case 2: /* fall through */
602 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
603 default: gcc_unreachable ();
606 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
607 (set_attr "conds" "*,clob,clob,clob,*")
608 (set_attr "length" "*,8,8,8,*")
609 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
612 (define_insn "*mul<mode>3_neon"
613 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
614 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
615 (match_operand:VDQW 2 "s_register_operand" "w")))]
616 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
617 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 (if_then_else (match_test "<Is_float_mode>")
620 (const_string "neon_fp_mul_s<q>")
621 (const_string "neon_mul_<V_elem_ch><q>")))]
624 /* Perform division using multiply-by-reciprocal.
625 Reciprocal is calculated using Newton-Raphson method.
626 Enabled with -funsafe-math-optimizations -freciprocal-math
627 and disabled for -Os since it increases code size . */
629 (define_expand "div<mode>3"
630 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
631 (div:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w")
632 (match_operand:VCVTF 2 "s_register_operand" "w")))]
633 "TARGET_NEON && !optimize_size
634 && flag_reciprocal_math"
636 rtx rec = gen_reg_rtx (<MODE>mode);
637 rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
639 /* Reciprocal estimate. */
640 emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
642 /* Perform 2 iterations of newton-raphson method. */
643 for (int i = 0; i < 2; i++)
645 emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
646 emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
649 /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */
650 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
656 (define_insn "mul<mode>3add<mode>_neon"
657 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
658 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
659 (match_operand:VDQW 3 "s_register_operand" "w"))
660 (match_operand:VDQW 1 "s_register_operand" "0")))]
661 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
662 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
664 (if_then_else (match_test "<Is_float_mode>")
665 (const_string "neon_fp_mla_s<q>")
666 (const_string "neon_mla_<V_elem_ch><q>")))]
669 (define_insn "mul<mode>3add<mode>_neon"
670 [(set (match_operand:VH 0 "s_register_operand" "=w")
671 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
672 (match_operand:VH 3 "s_register_operand" "w"))
673 (match_operand:VH 1 "s_register_operand" "0")))]
674 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
675 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
676 [(set_attr "type" "neon_fp_mla_s<q>")]
679 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
680 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
681 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
682 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
683 (match_operand:VDQW 3 "s_register_operand" "w"))))]
684 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
685 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
687 (if_then_else (match_test "<Is_float_mode>")
688 (const_string "neon_fp_mla_s<q>")
689 (const_string "neon_mla_<V_elem_ch><q>")))]
692 ;; Fused multiply-accumulate
693 ;; We define each insn twice here:
694 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
695 ;; to be able to use when converting to FMA.
696 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
697 (define_insn "fma<VCVTF:mode>4"
698 [(set (match_operand:VCVTF 0 "register_operand" "=w")
699 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
700 (match_operand:VCVTF 2 "register_operand" "w")
701 (match_operand:VCVTF 3 "register_operand" "0")))]
702 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
703 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
704 [(set_attr "type" "neon_fp_mla_s<q>")]
707 (define_insn "fma<VCVTF:mode>4_intrinsic"
708 [(set (match_operand:VCVTF 0 "register_operand" "=w")
709 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
710 (match_operand:VCVTF 2 "register_operand" "w")
711 (match_operand:VCVTF 3 "register_operand" "0")))]
712 "TARGET_NEON && TARGET_FMA"
713 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
714 [(set_attr "type" "neon_fp_mla_s<q>")]
717 (define_insn "fma<VH:mode>4"
718 [(set (match_operand:VH 0 "register_operand" "=w")
720 (match_operand:VH 1 "register_operand" "w")
721 (match_operand:VH 2 "register_operand" "w")
722 (match_operand:VH 3 "register_operand" "0")))]
723 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
724 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
725 [(set_attr "type" "neon_fp_mla_s<q>")]
728 (define_insn "fma<VH:mode>4_intrinsic"
729 [(set (match_operand:VH 0 "register_operand" "=w")
731 (match_operand:VH 1 "register_operand" "w")
732 (match_operand:VH 2 "register_operand" "w")
733 (match_operand:VH 3 "register_operand" "0")))]
734 "TARGET_NEON_FP16INST"
735 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
736 [(set_attr "type" "neon_fp_mla_s<q>")]
739 (define_insn "*fmsub<VCVTF:mode>4"
740 [(set (match_operand:VCVTF 0 "register_operand" "=w")
741 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
742 (match_operand:VCVTF 2 "register_operand" "w")
743 (match_operand:VCVTF 3 "register_operand" "0")))]
744 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
745 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
746 [(set_attr "type" "neon_fp_mla_s<q>")]
749 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
750 [(set (match_operand:VCVTF 0 "register_operand" "=w")
752 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
753 (match_operand:VCVTF 2 "register_operand" "w")
754 (match_operand:VCVTF 3 "register_operand" "0")))]
755 "TARGET_NEON && TARGET_FMA"
756 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
757 [(set_attr "type" "neon_fp_mla_s<q>")]
760 (define_insn "fmsub<VH:mode>4_intrinsic"
761 [(set (match_operand:VH 0 "register_operand" "=w")
763 (neg:VH (match_operand:VH 1 "register_operand" "w"))
764 (match_operand:VH 2 "register_operand" "w")
765 (match_operand:VH 3 "register_operand" "0")))]
766 "TARGET_NEON_FP16INST"
767 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
768 [(set_attr "type" "neon_fp_mla_s<q>")]
771 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
772 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
773 (unspec:VCVTF [(match_operand:VCVTF 1
774 "s_register_operand" "w")]
776 "TARGET_NEON && TARGET_VFP5"
777 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
778 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
781 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
782 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
783 (FIXUORS:<V_cmp_result> (unspec:VCVTF
784 [(match_operand:VCVTF 1 "register_operand" "w")]
786 "TARGET_NEON && TARGET_VFP5"
787 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
788 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
789 (set_attr "predicable" "no")]
792 (define_insn "ior<mode>3"
793 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
794 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
795 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
798 switch (which_alternative)
800 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
801 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
802 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
803 default: gcc_unreachable ();
806 [(set_attr "type" "neon_logic<q>")]
809 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
810 ;; vorr. We support the pseudo-instruction vand instead, because that
811 ;; corresponds to the canonical form the middle-end expects to use for
812 ;; immediate bitwise-ANDs.
814 (define_insn "and<mode>3"
815 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
816 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
817 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
820 switch (which_alternative)
822 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
823 case 1: return neon_output_logic_immediate ("vand", &operands[2],
824 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
825 default: gcc_unreachable ();
828 [(set_attr "type" "neon_logic<q>")]
831 (define_insn "orn<mode>3_neon"
832 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
833 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
834 (match_operand:VDQ 1 "s_register_operand" "w")))]
836 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
837 [(set_attr "type" "neon_logic<q>")]
840 ;; TODO: investigate whether we should disable
841 ;; this and bicdi3_neon for the A8 in line with the other
843 (define_insn_and_split "orndi3_neon"
844 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
845 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
846 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
854 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
855 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
856 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
861 operands[3] = gen_highpart (SImode, operands[0]);
862 operands[0] = gen_lowpart (SImode, operands[0]);
863 operands[4] = gen_highpart (SImode, operands[2]);
864 operands[2] = gen_lowpart (SImode, operands[2]);
865 operands[5] = gen_highpart (SImode, operands[1]);
866 operands[1] = gen_lowpart (SImode, operands[1]);
870 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
871 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
875 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
876 (set_attr "length" "*,16,8,8")
877 (set_attr "arch" "any,a,t2,t2")]
880 (define_insn "bic<mode>3_neon"
881 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
882 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
883 (match_operand:VDQ 1 "s_register_operand" "w")))]
885 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
886 [(set_attr "type" "neon_logic<q>")]
889 ;; Compare to *anddi_notdi_di.
890 (define_insn "bicdi3_neon"
891 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
892 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
893 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
899 [(set_attr "type" "neon_logic,multiple,multiple")
900 (set_attr "length" "*,8,8")]
903 (define_insn "xor<mode>3"
904 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
905 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
906 (match_operand:VDQ 2 "s_register_operand" "w")))]
908 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
909 [(set_attr "type" "neon_logic<q>")]
912 (define_insn "one_cmpl<mode>2"
913 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
914 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
916 "vmvn\t%<V_reg>0, %<V_reg>1"
917 [(set_attr "type" "neon_move<q>")]
920 (define_insn "abs<mode>2"
921 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
922 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
924 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
926 (if_then_else (match_test "<Is_float_mode>")
927 (const_string "neon_fp_abs_s<q>")
928 (const_string "neon_abs<q>")))]
931 (define_insn "neg<mode>2"
932 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
933 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
935 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
937 (if_then_else (match_test "<Is_float_mode>")
938 (const_string "neon_fp_neg_s<q>")
939 (const_string "neon_neg<q>")))]
942 (define_insn "negdi2_neon"
943 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
944 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
945 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
946 (clobber (reg:CC CC_REGNUM))]
949 [(set_attr "length" "8")
950 (set_attr "type" "multiple")]
953 ; Split negdi2_neon for vfp registers
955 [(set (match_operand:DI 0 "s_register_operand" "")
956 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
957 (clobber (match_scratch:DI 2 ""))
958 (clobber (reg:CC CC_REGNUM))]
959 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
960 [(set (match_dup 2) (const_int 0))
961 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
962 (clobber (reg:CC CC_REGNUM))])]
964 if (!REG_P (operands[2]))
965 operands[2] = operands[0];
969 ; Split negdi2_neon for core registers
971 [(set (match_operand:DI 0 "s_register_operand" "")
972 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
973 (clobber (match_scratch:DI 2 ""))
974 (clobber (reg:CC CC_REGNUM))]
975 "TARGET_32BIT && reload_completed
976 && arm_general_register_operand (operands[0], DImode)"
977 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
978 (clobber (reg:CC CC_REGNUM))])]
982 (define_insn "<absneg_str><mode>2"
983 [(set (match_operand:VH 0 "s_register_operand" "=w")
984 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
985 "TARGET_NEON_FP16INST"
986 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
987 [(set_attr "type" "neon_abs<q>")]
990 (define_expand "neon_v<absneg_str><mode>"
992 (match_operand:VH 0 "s_register_operand")
993 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
994 "TARGET_NEON_FP16INST"
996 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
1000 (define_insn "neon_v<fp16_rnd_str><mode>"
1001 [(set (match_operand:VH 0 "s_register_operand" "=w")
1003 [(match_operand:VH 1 "s_register_operand" "w")]
1005 "TARGET_NEON_FP16INST"
1006 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
1007 [(set_attr "type" "neon_fp_round_s<q>")]
1010 (define_insn "neon_vrsqrte<mode>"
1011 [(set (match_operand:VH 0 "s_register_operand" "=w")
1013 [(match_operand:VH 1 "s_register_operand" "w")]
1015 "TARGET_NEON_FP16INST"
1016 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
1017 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
1020 (define_insn "*umin<mode>3_neon"
1021 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1022 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1023 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1025 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1026 [(set_attr "type" "neon_minmax<q>")]
1029 (define_insn "*umax<mode>3_neon"
1030 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1031 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1032 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1034 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1035 [(set_attr "type" "neon_minmax<q>")]
1038 (define_insn "*smin<mode>3_neon"
1039 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1040 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1041 (match_operand:VDQW 2 "s_register_operand" "w")))]
1043 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1045 (if_then_else (match_test "<Is_float_mode>")
1046 (const_string "neon_fp_minmax_s<q>")
1047 (const_string "neon_minmax<q>")))]
1050 (define_insn "*smax<mode>3_neon"
1051 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1052 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1053 (match_operand:VDQW 2 "s_register_operand" "w")))]
1055 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1057 (if_then_else (match_test "<Is_float_mode>")
1058 (const_string "neon_fp_minmax_s<q>")
1059 (const_string "neon_minmax<q>")))]
1062 ; TODO: V2DI shifts are current disabled because there are bugs in the
1063 ; generic vectorizer code. It ends up creating a V2DI constructor with
1066 (define_insn "vashl<mode>3"
1067 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1068 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1069 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1072 switch (which_alternative)
1074 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1075 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1077 VALID_NEON_QREG_MODE (<MODE>mode),
1079 default: gcc_unreachable ();
1082 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1085 (define_insn "vashr<mode>3_imm"
1086 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1087 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1088 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1091 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1092 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1095 [(set_attr "type" "neon_shift_imm<q>")]
1098 (define_insn "vlshr<mode>3_imm"
1099 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1100 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1101 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1104 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1105 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1108 [(set_attr "type" "neon_shift_imm<q>")]
1111 ; Used for implementing logical shift-right, which is a left-shift by a negative
1112 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1113 ; above, but using an unspec in case GCC tries anything tricky with negative
1116 (define_insn "ashl<mode>3_signed"
1117 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1118 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1119 (match_operand:VDQI 2 "s_register_operand" "w")]
1120 UNSPEC_ASHIFT_SIGNED))]
1122 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1123 [(set_attr "type" "neon_shift_reg<q>")]
1126 ; Used for implementing logical shift-right, which is a left-shift by a negative
1127 ; amount, with unsigned operands.
1129 (define_insn "ashl<mode>3_unsigned"
1130 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1131 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1132 (match_operand:VDQI 2 "s_register_operand" "w")]
1133 UNSPEC_ASHIFT_UNSIGNED))]
1135 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1136 [(set_attr "type" "neon_shift_reg<q>")]
1139 (define_expand "vashr<mode>3"
1140 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1141 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1142 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1145 if (s_register_operand (operands[2], <MODE>mode))
1147 rtx neg = gen_reg_rtx (<MODE>mode);
1148 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1149 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1152 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1156 (define_expand "vlshr<mode>3"
1157 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1158 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1159 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1162 if (s_register_operand (operands[2], <MODE>mode))
1164 rtx neg = gen_reg_rtx (<MODE>mode);
1165 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1166 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1169 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1175 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1176 ;; leaving the upper half uninitalized. This is OK since the shift
1177 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1178 ;; data flow analysis however, we pretend the full register is set
1180 (define_insn "neon_load_count"
1181 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1182 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1183 UNSPEC_LOAD_COUNT))]
1186 vld1.32\t{%P0[0]}, %A1
1187 vmov.32\t%P0[0], %1"
1188 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1191 (define_insn "ashldi3_neon_noclobber"
1192 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1193 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1194 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1195 "TARGET_NEON && reload_completed
1196 && (!CONST_INT_P (operands[2])
1197 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1199 vshl.u64\t%P0, %P1, %2
1200 vshl.u64\t%P0, %P1, %P2"
1201 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1204 (define_insn_and_split "ashldi3_neon"
1205 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r, ?w,?w")
1206 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w, w")
1207 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm, i")))
1208 (clobber (match_scratch:SI 3 "= X, X, &r, X, X, X, X"))
1209 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1210 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w, X"))
1211 (clobber (reg:CC_C CC_REGNUM))]
1214 "TARGET_NEON && reload_completed"
1218 if (IS_VFP_REGNUM (REGNO (operands[0])))
1220 if (CONST_INT_P (operands[2]))
1222 if (INTVAL (operands[2]) < 1)
1224 emit_insn (gen_movdi (operands[0], operands[1]));
1227 else if (INTVAL (operands[2]) > 63)
1228 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1232 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1233 operands[2] = operands[5];
1236 /* Ditch the unnecessary clobbers. */
1237 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1242 /* The shift expanders support either full overlap or no overlap. */
1243 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1244 || REGNO (operands[0]) == REGNO (operands[1]));
1246 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1247 operands[2], operands[3], operands[4]);
1251 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1252 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1253 (set_attr "type" "multiple")]
1256 ; The shift amount needs to be negated for right-shifts
1257 (define_insn "signed_shift_di3_neon"
1258 [(set (match_operand:DI 0 "s_register_operand" "=w")
1259 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1260 (match_operand:DI 2 "s_register_operand" " w")]
1261 UNSPEC_ASHIFT_SIGNED))]
1262 "TARGET_NEON && reload_completed"
1263 "vshl.s64\t%P0, %P1, %P2"
1264 [(set_attr "type" "neon_shift_reg")]
1267 ; The shift amount needs to be negated for right-shifts
1268 (define_insn "unsigned_shift_di3_neon"
1269 [(set (match_operand:DI 0 "s_register_operand" "=w")
1270 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1271 (match_operand:DI 2 "s_register_operand" " w")]
1272 UNSPEC_ASHIFT_UNSIGNED))]
1273 "TARGET_NEON && reload_completed"
1274 "vshl.u64\t%P0, %P1, %P2"
1275 [(set_attr "type" "neon_shift_reg")]
1278 (define_insn "ashrdi3_neon_imm_noclobber"
1279 [(set (match_operand:DI 0 "s_register_operand" "=w")
1280 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1281 (match_operand:DI 2 "const_int_operand" " i")))]
1282 "TARGET_NEON && reload_completed
1283 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1284 "vshr.s64\t%P0, %P1, %2"
1285 [(set_attr "type" "neon_shift_imm")]
1288 (define_insn "lshrdi3_neon_imm_noclobber"
1289 [(set (match_operand:DI 0 "s_register_operand" "=w")
1290 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1291 (match_operand:DI 2 "const_int_operand" " i")))]
1292 "TARGET_NEON && reload_completed
1293 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1294 "vshr.u64\t%P0, %P1, %2"
1295 [(set_attr "type" "neon_shift_imm")]
1300 (define_insn_and_split "<shift>di3_neon"
1301 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r,?w,?w")
1302 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1303 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1304 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1305 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1306 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1307 (clobber (reg:CC CC_REGNUM))]
1310 "TARGET_NEON && reload_completed"
1314 if (IS_VFP_REGNUM (REGNO (operands[0])))
1316 if (CONST_INT_P (operands[2]))
1318 if (INTVAL (operands[2]) < 1)
1320 emit_insn (gen_movdi (operands[0], operands[1]));
1323 else if (INTVAL (operands[2]) > 64)
1324 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1326 /* Ditch the unnecessary clobbers. */
1327 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1333 /* We must use a negative left-shift. */
1334 emit_insn (gen_negsi2 (operands[3], operands[2]));
1335 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1336 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1342 /* The shift expanders support either full overlap or no overlap. */
1343 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1344 || REGNO (operands[0]) == REGNO (operands[1]));
1346 /* This clobbers CC (ASHIFTRT by register only). */
1347 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1348 operands[2], operands[3], operands[4]);
1353 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1354 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1355 (set_attr "type" "multiple")]
1358 ;; Widening operations
1360 (define_expand "widen_ssum<mode>3"
1361 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1362 (plus:<V_double_width>
1363 (sign_extend:<V_double_width>
1364 (match_operand:VQI 1 "s_register_operand" ""))
1365 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1368 machine_mode mode = GET_MODE (operands[1]);
1371 p1 = arm_simd_vect_par_cnst_half (mode, false);
1372 p2 = arm_simd_vect_par_cnst_half (mode, true);
1374 if (operands[0] != operands[2])
1375 emit_move_insn (operands[0], operands[2]);
1377 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1381 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1389 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1390 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1391 (plus:<V_double_width>
1392 (sign_extend:<V_double_width>
1393 (vec_select:<V_HALF>
1394 (match_operand:VQI 1 "s_register_operand" "%w")
1395 (match_operand:VQI 2 "vect_par_constant_low" "")))
1396 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1399 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1400 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1402 [(set_attr "type" "neon_add_widen")])
1404 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1405 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1406 (plus:<V_double_width>
1407 (sign_extend:<V_double_width>
1408 (vec_select:<V_HALF>
1409 (match_operand:VQI 1 "s_register_operand" "%w")
1410 (match_operand:VQI 2 "vect_par_constant_high" "")))
1411 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1414 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1415 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1417 [(set_attr "type" "neon_add_widen")])
1419 (define_insn "widen_ssum<mode>3"
1420 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1422 (sign_extend:<V_widen>
1423 (match_operand:VW 1 "s_register_operand" "%w"))
1424 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1426 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1427 [(set_attr "type" "neon_add_widen")]
1430 (define_expand "widen_usum<mode>3"
1431 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1432 (plus:<V_double_width>
1433 (zero_extend:<V_double_width>
1434 (match_operand:VQI 1 "s_register_operand" ""))
1435 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1438 machine_mode mode = GET_MODE (operands[1]);
1441 p1 = arm_simd_vect_par_cnst_half (mode, false);
1442 p2 = arm_simd_vect_par_cnst_half (mode, true);
1444 if (operands[0] != operands[2])
1445 emit_move_insn (operands[0], operands[2]);
1447 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1451 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1459 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1460 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1461 (plus:<V_double_width>
1462 (zero_extend:<V_double_width>
1463 (vec_select:<V_HALF>
1464 (match_operand:VQI 1 "s_register_operand" "%w")
1465 (match_operand:VQI 2 "vect_par_constant_low" "")))
1466 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1469 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1470 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1472 [(set_attr "type" "neon_add_widen")])
1474 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1475 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1476 (plus:<V_double_width>
1477 (zero_extend:<V_double_width>
1478 (vec_select:<V_HALF>
1479 (match_operand:VQI 1 "s_register_operand" "%w")
1480 (match_operand:VQI 2 "vect_par_constant_high" "")))
1481 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1484 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1485 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1487 [(set_attr "type" "neon_add_widen")])
1489 (define_insn "widen_usum<mode>3"
1490 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1491 (plus:<V_widen> (zero_extend:<V_widen>
1492 (match_operand:VW 1 "s_register_operand" "%w"))
1493 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1495 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1496 [(set_attr "type" "neon_add_widen")]
1499 ;; Helpers for quad-word reduction operations
1501 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1502 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1503 ; N/2-element vector.
1505 (define_insn "quad_halves_<code>v4si"
1506 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1508 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1509 (parallel [(const_int 0) (const_int 1)]))
1510 (vec_select:V2SI (match_dup 1)
1511 (parallel [(const_int 2) (const_int 3)]))))]
1513 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1514 [(set_attr "vqh_mnem" "<VQH_mnem>")
1515 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1518 (define_insn "quad_halves_<code>v4sf"
1519 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1521 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1522 (parallel [(const_int 0) (const_int 1)]))
1523 (vec_select:V2SF (match_dup 1)
1524 (parallel [(const_int 2) (const_int 3)]))))]
1525 "TARGET_NEON && flag_unsafe_math_optimizations"
1526 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1527 [(set_attr "vqh_mnem" "<VQH_mnem>")
1528 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1531 (define_insn "quad_halves_<code>v8hi"
1532 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1534 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1535 (parallel [(const_int 0) (const_int 1)
1536 (const_int 2) (const_int 3)]))
1537 (vec_select:V4HI (match_dup 1)
1538 (parallel [(const_int 4) (const_int 5)
1539 (const_int 6) (const_int 7)]))))]
1541 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1542 [(set_attr "vqh_mnem" "<VQH_mnem>")
1543 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1546 (define_insn "quad_halves_<code>v16qi"
1547 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1549 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1550 (parallel [(const_int 0) (const_int 1)
1551 (const_int 2) (const_int 3)
1552 (const_int 4) (const_int 5)
1553 (const_int 6) (const_int 7)]))
1554 (vec_select:V8QI (match_dup 1)
1555 (parallel [(const_int 8) (const_int 9)
1556 (const_int 10) (const_int 11)
1557 (const_int 12) (const_int 13)
1558 (const_int 14) (const_int 15)]))))]
1560 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1561 [(set_attr "vqh_mnem" "<VQH_mnem>")
1562 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1565 (define_expand "move_hi_quad_<mode>"
1566 [(match_operand:ANY128 0 "s_register_operand" "")
1567 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1570 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1571 GET_MODE_SIZE (<V_HALF>mode)),
1576 (define_expand "move_lo_quad_<mode>"
1577 [(match_operand:ANY128 0 "s_register_operand" "")
1578 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1581 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1587 ;; Reduction operations
1589 (define_expand "reduc_plus_scal_<mode>"
1590 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1591 (match_operand:VD 1 "s_register_operand" "")]
1592 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1594 rtx vec = gen_reg_rtx (<MODE>mode);
1595 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1596 &gen_neon_vpadd_internal<mode>);
1597 /* The same result is actually computed into every element. */
1598 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1602 (define_expand "reduc_plus_scal_<mode>"
1603 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1604 (match_operand:VQ 1 "s_register_operand" "")]
1605 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1606 && !BYTES_BIG_ENDIAN"
1608 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1610 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1611 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1616 (define_expand "reduc_plus_scal_v2di"
1617 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1618 (match_operand:V2DI 1 "s_register_operand" "")]
1619 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1621 rtx vec = gen_reg_rtx (V2DImode);
1623 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1624 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1629 (define_insn "arm_reduc_plus_internal_v2di"
1630 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1631 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1633 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1634 "vadd.i64\t%e0, %e1, %f1"
1635 [(set_attr "type" "neon_add_q")]
1638 (define_expand "reduc_smin_scal_<mode>"
1639 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1640 (match_operand:VD 1 "s_register_operand" "")]
1641 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1643 rtx vec = gen_reg_rtx (<MODE>mode);
1645 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1646 &gen_neon_vpsmin<mode>);
1647 /* The result is computed into every element of the vector. */
1648 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1652 (define_expand "reduc_smin_scal_<mode>"
1653 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1654 (match_operand:VQ 1 "s_register_operand" "")]
1655 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1656 && !BYTES_BIG_ENDIAN"
1658 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1660 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1661 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1666 (define_expand "reduc_smax_scal_<mode>"
1667 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1668 (match_operand:VD 1 "s_register_operand" "")]
1669 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1671 rtx vec = gen_reg_rtx (<MODE>mode);
1672 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1673 &gen_neon_vpsmax<mode>);
1674 /* The result is computed into every element of the vector. */
1675 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1679 (define_expand "reduc_smax_scal_<mode>"
1680 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1681 (match_operand:VQ 1 "s_register_operand" "")]
1682 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1683 && !BYTES_BIG_ENDIAN"
1685 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1687 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1688 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1693 (define_expand "reduc_umin_scal_<mode>"
1694 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1695 (match_operand:VDI 1 "s_register_operand" "")]
1698 rtx vec = gen_reg_rtx (<MODE>mode);
1699 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1700 &gen_neon_vpumin<mode>);
1701 /* The result is computed into every element of the vector. */
1702 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1706 (define_expand "reduc_umin_scal_<mode>"
1707 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1708 (match_operand:VQI 1 "s_register_operand" "")]
1709 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1711 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1713 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1714 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1719 (define_expand "reduc_umax_scal_<mode>"
1720 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1721 (match_operand:VDI 1 "s_register_operand" "")]
1724 rtx vec = gen_reg_rtx (<MODE>mode);
1725 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1726 &gen_neon_vpumax<mode>);
1727 /* The result is computed into every element of the vector. */
1728 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1732 (define_expand "reduc_umax_scal_<mode>"
1733 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1734 (match_operand:VQI 1 "s_register_operand" "")]
1735 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1737 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1739 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1740 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1745 (define_insn "neon_vpadd_internal<mode>"
1746 [(set (match_operand:VD 0 "s_register_operand" "=w")
1747 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1748 (match_operand:VD 2 "s_register_operand" "w")]
1751 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1752 ;; Assume this schedules like vadd.
1754 (if_then_else (match_test "<Is_float_mode>")
1755 (const_string "neon_fp_reduc_add_s<q>")
1756 (const_string "neon_reduc_add<q>")))]
1759 (define_insn "neon_vpaddv4hf"
1761 (match_operand:V4HF 0 "s_register_operand" "=w")
1762 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1763 (match_operand:V4HF 2 "s_register_operand" "w")]
1765 "TARGET_NEON_FP16INST"
1766 "vpadd.f16\t%P0, %P1, %P2"
1767 [(set_attr "type" "neon_reduc_add")]
1770 (define_insn "neon_vpsmin<mode>"
1771 [(set (match_operand:VD 0 "s_register_operand" "=w")
1772 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1773 (match_operand:VD 2 "s_register_operand" "w")]
1776 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1778 (if_then_else (match_test "<Is_float_mode>")
1779 (const_string "neon_fp_reduc_minmax_s<q>")
1780 (const_string "neon_reduc_minmax<q>")))]
1783 (define_insn "neon_vpsmax<mode>"
1784 [(set (match_operand:VD 0 "s_register_operand" "=w")
1785 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1786 (match_operand:VD 2 "s_register_operand" "w")]
1789 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1791 (if_then_else (match_test "<Is_float_mode>")
1792 (const_string "neon_fp_reduc_minmax_s<q>")
1793 (const_string "neon_reduc_minmax<q>")))]
1796 (define_insn "neon_vpumin<mode>"
1797 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1798 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1799 (match_operand:VDI 2 "s_register_operand" "w")]
1802 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1803 [(set_attr "type" "neon_reduc_minmax<q>")]
1806 (define_insn "neon_vpumax<mode>"
1807 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1808 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1809 (match_operand:VDI 2 "s_register_operand" "w")]
1812 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1813 [(set_attr "type" "neon_reduc_minmax<q>")]
1816 ;; Saturating arithmetic
1818 ; NOTE: Neon supports many more saturating variants of instructions than the
1819 ; following, but these are all GCC currently understands.
1820 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1821 ; yet either, although these patterns may be used by intrinsics when they're
1824 (define_insn "*ss_add<mode>_neon"
1825 [(set (match_operand:VD 0 "s_register_operand" "=w")
1826 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1827 (match_operand:VD 2 "s_register_operand" "w")))]
1829 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1830 [(set_attr "type" "neon_qadd<q>")]
1833 (define_insn "*us_add<mode>_neon"
1834 [(set (match_operand:VD 0 "s_register_operand" "=w")
1835 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1836 (match_operand:VD 2 "s_register_operand" "w")))]
1838 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1839 [(set_attr "type" "neon_qadd<q>")]
1842 (define_insn "*ss_sub<mode>_neon"
1843 [(set (match_operand:VD 0 "s_register_operand" "=w")
1844 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1845 (match_operand:VD 2 "s_register_operand" "w")))]
1847 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1848 [(set_attr "type" "neon_qsub<q>")]
1851 (define_insn "*us_sub<mode>_neon"
1852 [(set (match_operand:VD 0 "s_register_operand" "=w")
1853 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1854 (match_operand:VD 2 "s_register_operand" "w")))]
1856 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1857 [(set_attr "type" "neon_qsub<q>")]
1860 ;; Conditional instructions. These are comparisons with conditional moves for
1861 ;; vectors. They perform the assignment:
1863 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1865 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1868 (define_expand "vcond<mode><mode>"
1869 [(set (match_operand:VDQW 0 "s_register_operand" "")
1871 (match_operator 3 "comparison_operator"
1872 [(match_operand:VDQW 4 "s_register_operand" "")
1873 (match_operand:VDQW 5 "nonmemory_operand" "")])
1874 (match_operand:VDQW 1 "s_register_operand" "")
1875 (match_operand:VDQW 2 "s_register_operand" "")))]
1876 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1879 int use_zero_form = 0;
1880 int swap_bsl_operands = 0;
1881 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1882 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1884 rtx (*base_comparison) (rtx, rtx, rtx);
1885 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1887 switch (GET_CODE (operands[3]))
1894 if (operands[5] == CONST0_RTX (<MODE>mode))
1901 if (!REG_P (operands[5]))
1902 operands[5] = force_reg (<MODE>mode, operands[5]);
1905 switch (GET_CODE (operands[3]))
1915 base_comparison = gen_neon_vcge<mode>;
1916 complimentary_comparison = gen_neon_vcgt<mode>;
1924 base_comparison = gen_neon_vcgt<mode>;
1925 complimentary_comparison = gen_neon_vcge<mode>;
1930 base_comparison = gen_neon_vceq<mode>;
1931 complimentary_comparison = gen_neon_vceq<mode>;
1937 switch (GET_CODE (operands[3]))
1944 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1945 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1951 Note that there also exist direct comparison against 0 forms,
1952 so catch those as a special case. */
1956 switch (GET_CODE (operands[3]))
1959 base_comparison = gen_neon_vclt<mode>;
1962 base_comparison = gen_neon_vcle<mode>;
1965 /* Do nothing, other zero form cases already have the correct
1972 emit_insn (base_comparison (mask, operands[4], operands[5]));
1974 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1981 /* Vector compare returns false for lanes which are unordered, so if we use
1982 the inverse of the comparison we actually want to emit, then
1983 swap the operands to BSL, we will end up with the correct result.
1984 Note that a NE NaN and NaN NE b are true for all a, b.
1986 Our transformations are:
1991 a NE b -> !(a EQ b) */
1994 emit_insn (base_comparison (mask, operands[4], operands[5]));
1996 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1998 swap_bsl_operands = 1;
2001 /* We check (a > b || b > a). combining these comparisons give us
2002 true iff !(a != b && a ORDERED b), swapping the operands to BSL
2003 will then give us (a == b || a UNORDERED b) as intended. */
2005 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
2006 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
2007 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2008 swap_bsl_operands = 1;
2011 /* Operands are ORDERED iff (a > b || b >= a).
2012 Swapping the operands to BSL will give the UNORDERED case. */
2013 swap_bsl_operands = 1;
2016 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
2017 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
2018 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
2024 if (swap_bsl_operands)
2025 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2028 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2033 (define_expand "vcondu<mode><mode>"
2034 [(set (match_operand:VDQIW 0 "s_register_operand" "")
2036 (match_operator 3 "arm_comparison_operator"
2037 [(match_operand:VDQIW 4 "s_register_operand" "")
2038 (match_operand:VDQIW 5 "s_register_operand" "")])
2039 (match_operand:VDQIW 1 "s_register_operand" "")
2040 (match_operand:VDQIW 2 "s_register_operand" "")))]
2044 int inverse = 0, immediate_zero = 0;
2046 mask = gen_reg_rtx (<V_cmp_result>mode);
2048 if (operands[5] == CONST0_RTX (<MODE>mode))
2050 else if (!REG_P (operands[5]))
2051 operands[5] = force_reg (<MODE>mode, operands[5]);
2053 switch (GET_CODE (operands[3]))
2056 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2060 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2064 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2069 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2071 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2076 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2078 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2082 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2091 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2094 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2100 ;; Patterns for builtins.
2102 ; good for plain vadd, vaddq.
2104 (define_expand "neon_vadd<mode>"
2105 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2106 (match_operand:VCVTF 1 "s_register_operand" "w")
2107 (match_operand:VCVTF 2 "s_register_operand" "w")]
2110 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2111 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2113 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2118 (define_expand "neon_vadd<mode>"
2119 [(match_operand:VH 0 "s_register_operand")
2120 (match_operand:VH 1 "s_register_operand")
2121 (match_operand:VH 2 "s_register_operand")]
2122 "TARGET_NEON_FP16INST"
2124 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2128 (define_expand "neon_vsub<mode>"
2129 [(match_operand:VH 0 "s_register_operand")
2130 (match_operand:VH 1 "s_register_operand")
2131 (match_operand:VH 2 "s_register_operand")]
2132 "TARGET_NEON_FP16INST"
2134 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2138 ; Note that NEON operations don't support the full IEEE 754 standard: in
2139 ; particular, denormal values are flushed to zero. This means that GCC cannot
2140 ; use those instructions for autovectorization, etc. unless
2141 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2142 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2143 ; header) must work in either case: if -funsafe-math-optimizations is given,
2144 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2145 ; expand to unspecs (which may potentially limit the extent to which they might
2146 ; be optimized by generic code).
2148 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2150 (define_insn "neon_vadd<mode>_unspec"
2151 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2152 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2153 (match_operand:VCVTF 2 "s_register_operand" "w")]
2156 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2158 (if_then_else (match_test "<Is_float_mode>")
2159 (const_string "neon_fp_addsub_s<q>")
2160 (const_string "neon_add<q>")))]
2163 (define_insn "neon_vaddl<sup><mode>"
2164 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2165 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2166 (match_operand:VDI 2 "s_register_operand" "w")]
2169 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2170 [(set_attr "type" "neon_add_long")]
2173 (define_insn "neon_vaddw<sup><mode>"
2174 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2175 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2176 (match_operand:VDI 2 "s_register_operand" "w")]
2179 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2180 [(set_attr "type" "neon_add_widen")]
2185 (define_insn "neon_v<r>hadd<sup><mode>"
2186 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2187 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2188 (match_operand:VDQIW 2 "s_register_operand" "w")]
2191 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2192 [(set_attr "type" "neon_add_halve_q")]
2195 (define_insn "neon_vqadd<sup><mode>"
2196 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2197 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2198 (match_operand:VDQIX 2 "s_register_operand" "w")]
2201 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2202 [(set_attr "type" "neon_qadd<q>")]
2205 (define_insn "neon_v<r>addhn<mode>"
2206 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2207 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2208 (match_operand:VN 2 "s_register_operand" "w")]
2211 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2212 [(set_attr "type" "neon_add_halve_narrow_q")]
2215 ;; Polynomial and Float multiplication.
2216 (define_insn "neon_vmul<pf><mode>"
2217 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2218 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2219 (match_operand:VPF 2 "s_register_operand" "w")]
2222 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2224 (if_then_else (match_test "<Is_float_mode>")
2225 (const_string "neon_fp_mul_s<q>")
2226 (const_string "neon_mul_<V_elem_ch><q>")))]
2229 (define_insn "mul<mode>3"
2231 (match_operand:VH 0 "s_register_operand" "=w")
2233 (match_operand:VH 1 "s_register_operand" "w")
2234 (match_operand:VH 2 "s_register_operand" "w")))]
2235 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2236 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2237 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2240 (define_insn "neon_vmulf<mode>"
2242 (match_operand:VH 0 "s_register_operand" "=w")
2244 (match_operand:VH 1 "s_register_operand" "w")
2245 (match_operand:VH 2 "s_register_operand" "w")))]
2246 "TARGET_NEON_FP16INST"
2247 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2248 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2251 (define_expand "neon_vmla<mode>"
2252 [(match_operand:VDQW 0 "s_register_operand" "=w")
2253 (match_operand:VDQW 1 "s_register_operand" "0")
2254 (match_operand:VDQW 2 "s_register_operand" "w")
2255 (match_operand:VDQW 3 "s_register_operand" "w")]
2258 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2259 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2260 operands[2], operands[3]));
2262 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2263 operands[2], operands[3]));
2267 (define_expand "neon_vfma<VCVTF:mode>"
2268 [(match_operand:VCVTF 0 "s_register_operand")
2269 (match_operand:VCVTF 1 "s_register_operand")
2270 (match_operand:VCVTF 2 "s_register_operand")
2271 (match_operand:VCVTF 3 "s_register_operand")]
2272 "TARGET_NEON && TARGET_FMA"
2274 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2279 (define_expand "neon_vfma<VH:mode>"
2280 [(match_operand:VH 0 "s_register_operand")
2281 (match_operand:VH 1 "s_register_operand")
2282 (match_operand:VH 2 "s_register_operand")
2283 (match_operand:VH 3 "s_register_operand")]
2284 "TARGET_NEON_FP16INST"
2286 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2291 (define_expand "neon_vfms<VCVTF:mode>"
2292 [(match_operand:VCVTF 0 "s_register_operand")
2293 (match_operand:VCVTF 1 "s_register_operand")
2294 (match_operand:VCVTF 2 "s_register_operand")
2295 (match_operand:VCVTF 3 "s_register_operand")]
2296 "TARGET_NEON && TARGET_FMA"
2298 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2303 (define_expand "neon_vfms<VH:mode>"
2304 [(match_operand:VH 0 "s_register_operand")
2305 (match_operand:VH 1 "s_register_operand")
2306 (match_operand:VH 2 "s_register_operand")
2307 (match_operand:VH 3 "s_register_operand")]
2308 "TARGET_NEON_FP16INST"
2310 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2315 ;; The expand RTL structure here is not important.
2316 ;; We use the gen_* functions anyway.
2317 ;; We just need something to wrap the iterators around.
2319 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2320 [(set (match_operand:VCVTF 0 "s_register_operand")
2322 [(match_operand:VCVTF 1 "s_register_operand")
2324 (match_operand:<VFML> 2 "s_register_operand")
2325 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2328 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2329 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2337 (define_insn "vfmal_low<mode>_intrinsic"
2338 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2341 (vec_select:<VFMLSEL>
2342 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2343 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2345 (vec_select:<VFMLSEL>
2346 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2347 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2348 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2350 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2351 [(set_attr "type" "neon_fp_mla_s<q>")]
2354 (define_insn "vfmsl_high<mode>_intrinsic"
2355 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2359 (vec_select:<VFMLSEL>
2360 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2361 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2363 (vec_select:<VFMLSEL>
2364 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2365 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2366 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2368 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2369 [(set_attr "type" "neon_fp_mla_s<q>")]
2372 (define_insn "vfmal_high<mode>_intrinsic"
2373 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2376 (vec_select:<VFMLSEL>
2377 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2378 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2380 (vec_select:<VFMLSEL>
2381 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2382 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2383 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2385 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2386 [(set_attr "type" "neon_fp_mla_s<q>")]
2389 (define_insn "vfmsl_low<mode>_intrinsic"
2390 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2394 (vec_select:<VFMLSEL>
2395 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2396 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2398 (vec_select:<VFMLSEL>
2399 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2400 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2401 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2403 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2404 [(set_attr "type" "neon_fp_mla_s<q>")]
2407 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2408 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2410 [(match_operand:VCVTF 1 "s_register_operand")
2412 (match_operand:<VFML> 2 "s_register_operand")
2413 (match_operand:<VFML> 3 "s_register_operand"))
2414 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2417 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2418 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2419 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2420 (operands[0], operands[1],
2421 operands[2], operands[3],
2426 (define_insn "vfmal_lane_low<mode>_intrinsic"
2427 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2430 (vec_select:<VFMLSEL>
2431 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2432 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2434 (vec_duplicate:<VFMLSEL>
2436 (match_operand:<VFML> 3 "s_register_operand" "x")
2437 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2438 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2441 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2442 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2444 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2445 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2449 operands[5] = GEN_INT (lane);
2450 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2453 [(set_attr "type" "neon_fp_mla_s<q>")]
2456 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2457 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2459 [(match_operand:VCVTF 1 "s_register_operand")
2461 (match_operand:<VFML> 2 "s_register_operand")
2462 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2463 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2467 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2468 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2469 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2470 (operands[0], operands[1], operands[2], operands[3],
2475 ;; Used to implement the intrinsics:
2476 ;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2477 ;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2478 ;; Needs a bit of care to get the modes of the different sub-expressions right
2479 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2480 ;; S or D subregister to select the appropriate lane from.
2482 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2483 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2486 (vec_select:<VFMLSEL>
2487 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2488 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2490 (vec_duplicate:<VFMLSEL>
2492 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2493 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2494 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2497 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2498 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2499 int new_lane = lane % elts_per_reg;
2500 int regdiff = lane / elts_per_reg;
2501 operands[5] = GEN_INT (new_lane);
2502 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2503 because we want the print_operand code to print the appropriate
2504 S or D register prefix. */
2505 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2506 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2507 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2509 [(set_attr "type" "neon_fp_mla_s<q>")]
2512 ;; Used to implement the intrinsics:
2513 ;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2514 ;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2515 ;; Needs a bit of care to get the modes of the different sub-expressions right
2516 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2517 ;; S or D subregister to select the appropriate lane from.
2519 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2520 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2523 (vec_select:<VFMLSEL>
2524 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2525 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2527 (vec_duplicate:<VFMLSEL>
2529 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2530 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2531 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2534 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2535 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2536 int new_lane = lane % elts_per_reg;
2537 int regdiff = lane / elts_per_reg;
2538 operands[5] = GEN_INT (new_lane);
2539 /* We re-create operands[3] in the halved VFMLSEL mode
2540 because we've calculated the correct half-width subreg to extract
2541 the lane from and we want to print *that* subreg instead. */
2542 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2543 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2545 [(set_attr "type" "neon_fp_mla_s<q>")]
2548 (define_insn "vfmal_lane_high<mode>_intrinsic"
2549 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2552 (vec_select:<VFMLSEL>
2553 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2554 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2556 (vec_duplicate:<VFMLSEL>
2558 (match_operand:<VFML> 3 "s_register_operand" "x")
2559 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2560 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2563 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2564 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2566 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2567 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2571 operands[5] = GEN_INT (lane);
2572 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2575 [(set_attr "type" "neon_fp_mla_s<q>")]
2578 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2579 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2583 (vec_select:<VFMLSEL>
2584 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2585 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2587 (vec_duplicate:<VFMLSEL>
2589 (match_operand:<VFML> 3 "s_register_operand" "x")
2590 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2591 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2594 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2595 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2597 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2598 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2602 operands[5] = GEN_INT (lane);
2603 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2606 [(set_attr "type" "neon_fp_mla_s<q>")]
2609 ;; Used to implement the intrinsics:
2610 ;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2611 ;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2612 ;; Needs a bit of care to get the modes of the different sub-expressions right
2613 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2614 ;; S or D subregister to select the appropriate lane from.
2616 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2617 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2621 (vec_select:<VFMLSEL>
2622 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2623 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2625 (vec_duplicate:<VFMLSEL>
2627 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2628 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2629 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2632 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2633 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2634 int new_lane = lane % elts_per_reg;
2635 int regdiff = lane / elts_per_reg;
2636 operands[5] = GEN_INT (new_lane);
2637 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2638 because we want the print_operand code to print the appropriate
2639 S or D register prefix. */
2640 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2641 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2642 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2644 [(set_attr "type" "neon_fp_mla_s<q>")]
2647 ;; Used to implement the intrinsics:
2648 ;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2649 ;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2650 ;; Needs a bit of care to get the modes of the different sub-expressions right
2651 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2652 ;; S or D subregister to select the appropriate lane from.
2654 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2655 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2659 (vec_select:<VFMLSEL>
2660 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2661 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2663 (vec_duplicate:<VFMLSEL>
2665 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2666 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2667 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2670 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2671 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2672 int new_lane = lane % elts_per_reg;
2673 int regdiff = lane / elts_per_reg;
2674 operands[5] = GEN_INT (new_lane);
2675 /* We re-create operands[3] in the halved VFMLSEL mode
2676 because we've calculated the correct half-width subreg to extract
2677 the lane from and we want to print *that* subreg instead. */
2678 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2679 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2681 [(set_attr "type" "neon_fp_mla_s<q>")]
2684 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2685 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2689 (vec_select:<VFMLSEL>
2690 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2691 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2693 (vec_duplicate:<VFMLSEL>
2695 (match_operand:<VFML> 3 "s_register_operand" "x")
2696 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2697 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2700 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2701 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2703 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2704 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2708 operands[5] = GEN_INT (lane);
2709 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2712 [(set_attr "type" "neon_fp_mla_s<q>")]
2715 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2717 (define_insn "neon_vmla<mode>_unspec"
2718 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2719 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2720 (match_operand:VDQW 2 "s_register_operand" "w")
2721 (match_operand:VDQW 3 "s_register_operand" "w")]
2724 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2726 (if_then_else (match_test "<Is_float_mode>")
2727 (const_string "neon_fp_mla_s<q>")
2728 (const_string "neon_mla_<V_elem_ch><q>")))]
2731 (define_insn "neon_vmlal<sup><mode>"
2732 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2733 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2734 (match_operand:VW 2 "s_register_operand" "w")
2735 (match_operand:VW 3 "s_register_operand" "w")]
2738 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2739 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2742 (define_expand "neon_vmls<mode>"
2743 [(match_operand:VDQW 0 "s_register_operand" "=w")
2744 (match_operand:VDQW 1 "s_register_operand" "0")
2745 (match_operand:VDQW 2 "s_register_operand" "w")
2746 (match_operand:VDQW 3 "s_register_operand" "w")]
2749 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2750 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2751 operands[1], operands[2], operands[3]));
2753 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2754 operands[2], operands[3]));
2758 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2760 (define_insn "neon_vmls<mode>_unspec"
2761 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2762 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2763 (match_operand:VDQW 2 "s_register_operand" "w")
2764 (match_operand:VDQW 3 "s_register_operand" "w")]
2767 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2769 (if_then_else (match_test "<Is_float_mode>")
2770 (const_string "neon_fp_mla_s<q>")
2771 (const_string "neon_mla_<V_elem_ch><q>")))]
2774 (define_insn "neon_vmlsl<sup><mode>"
2775 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2776 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2777 (match_operand:VW 2 "s_register_operand" "w")
2778 (match_operand:VW 3 "s_register_operand" "w")]
2781 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2782 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2785 ;; vqdmulh, vqrdmulh
2786 (define_insn "neon_vq<r>dmulh<mode>"
2787 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2788 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2789 (match_operand:VMDQI 2 "s_register_operand" "w")]
2792 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2793 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2796 ;; vqrdmlah, vqrdmlsh
2797 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2798 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2799 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2800 (match_operand:VMDQI 2 "s_register_operand" "w")
2801 (match_operand:VMDQI 3 "s_register_operand" "w")]
2804 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2805 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2808 (define_insn "neon_vqdmlal<mode>"
2809 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2810 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2811 (match_operand:VMDI 2 "s_register_operand" "w")
2812 (match_operand:VMDI 3 "s_register_operand" "w")]
2815 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2816 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2819 (define_insn "neon_vqdmlsl<mode>"
2820 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2821 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2822 (match_operand:VMDI 2 "s_register_operand" "w")
2823 (match_operand:VMDI 3 "s_register_operand" "w")]
2826 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2827 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2830 (define_insn "neon_vmull<sup><mode>"
2831 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2832 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2833 (match_operand:VW 2 "s_register_operand" "w")]
2836 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2837 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2840 (define_insn "neon_vqdmull<mode>"
2841 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2842 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2843 (match_operand:VMDI 2 "s_register_operand" "w")]
2846 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2847 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2850 (define_expand "neon_vsub<mode>"
2851 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2852 (match_operand:VCVTF 1 "s_register_operand" "w")
2853 (match_operand:VCVTF 2 "s_register_operand" "w")]
2856 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2857 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2859 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2864 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2866 (define_insn "neon_vsub<mode>_unspec"
2867 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2868 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2869 (match_operand:VCVTF 2 "s_register_operand" "w")]
2872 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2874 (if_then_else (match_test "<Is_float_mode>")
2875 (const_string "neon_fp_addsub_s<q>")
2876 (const_string "neon_sub<q>")))]
2879 (define_insn "neon_vsubl<sup><mode>"
2880 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2881 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2882 (match_operand:VDI 2 "s_register_operand" "w")]
2885 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2886 [(set_attr "type" "neon_sub_long")]
2889 (define_insn "neon_vsubw<sup><mode>"
2890 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2891 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2892 (match_operand:VDI 2 "s_register_operand" "w")]
2895 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2896 [(set_attr "type" "neon_sub_widen")]
2899 (define_insn "neon_vqsub<sup><mode>"
2900 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2901 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2902 (match_operand:VDQIX 2 "s_register_operand" "w")]
2905 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2906 [(set_attr "type" "neon_qsub<q>")]
2909 (define_insn "neon_vhsub<sup><mode>"
2910 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2911 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2912 (match_operand:VDQIW 2 "s_register_operand" "w")]
2915 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2916 [(set_attr "type" "neon_sub_halve<q>")]
2919 (define_insn "neon_v<r>subhn<mode>"
2920 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2921 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2922 (match_operand:VN 2 "s_register_operand" "w")]
2925 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2926 [(set_attr "type" "neon_sub_halve_narrow_q")]
2929 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2930 ;; without unsafe math optimizations.
2931 (define_expand "neon_vc<cmp_op><mode>"
2932 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2934 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2935 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2938 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2940 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2941 && !flag_unsafe_math_optimizations)
2943 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2944 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2945 whereas this expander iterates over the integer modes as well,
2946 but we will never expand to UNSPECs for the integer comparisons. */
2950 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2955 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2964 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2971 (define_insn "neon_vc<cmp_op><mode>_insn"
2972 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2974 (COMPARISONS:<V_cmp_result>
2975 (match_operand:VDQW 1 "s_register_operand" "w,w")
2976 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2977 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2978 && !flag_unsafe_math_optimizations)"
2981 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2983 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2984 ? "f" : "<cmp_type>",
2985 which_alternative == 0
2986 ? "%<V_reg>2" : "#0");
2987 output_asm_insn (pattern, operands);
2991 (if_then_else (match_operand 2 "zero_operand")
2992 (const_string "neon_compare_zero<q>")
2993 (const_string "neon_compare<q>")))]
2996 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2997 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2998 (unspec:<V_cmp_result>
2999 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
3000 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
3005 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3007 which_alternative == 0
3008 ? "%<V_reg>2" : "#0");
3009 output_asm_insn (pattern, operands);
3012 [(set_attr "type" "neon_fp_compare_s<q>")]
3015 (define_expand "neon_vc<cmp_op><mode>"
3016 [(match_operand:<V_cmp_result> 0 "s_register_operand")
3019 (match_operand:VH 1 "s_register_operand")
3020 (match_operand:VH 2 "reg_or_zero_operand")))]
3021 "TARGET_NEON_FP16INST"
3023 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
3025 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3026 && !flag_unsafe_math_optimizations)
3028 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
3029 (operands[0], operands[1], operands[2]));
3032 (gen_neon_vc<cmp_op><mode>_fp16insn
3033 (operands[0], operands[1], operands[2]));
3037 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
3038 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3040 (COMPARISONS:<V_cmp_result>
3041 (match_operand:VH 1 "s_register_operand" "w,w")
3042 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
3043 "TARGET_NEON_FP16INST
3044 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3045 && !flag_unsafe_math_optimizations)"
3048 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
3050 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3051 ? "f" : "<cmp_type>",
3052 which_alternative == 0
3053 ? "%<V_reg>2" : "#0");
3054 output_asm_insn (pattern, operands);
3058 (if_then_else (match_operand 2 "zero_operand")
3059 (const_string "neon_compare_zero<q>")
3060 (const_string "neon_compare<q>")))])
3062 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
3064 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3065 (unspec:<V_cmp_result>
3066 [(match_operand:VH 1 "s_register_operand" "w,w")
3067 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
3069 "TARGET_NEON_FP16INST"
3072 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3074 which_alternative == 0
3075 ? "%<V_reg>2" : "#0");
3076 output_asm_insn (pattern, operands);
3079 [(set_attr "type" "neon_fp_compare_s<q>")])
3081 (define_insn "neon_vc<cmp_op>u<mode>"
3082 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3084 (GTUGEU:<V_cmp_result>
3085 (match_operand:VDQIW 1 "s_register_operand" "w")
3086 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
3088 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3089 [(set_attr "type" "neon_compare<q>")]
3092 (define_expand "neon_vca<cmp_op><mode>"
3093 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
3095 (GTGE:<V_cmp_result>
3096 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
3097 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
3100 if (flag_unsafe_math_optimizations)
3101 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
3104 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
3111 (define_insn "neon_vca<cmp_op><mode>_insn"
3112 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3114 (GTGE:<V_cmp_result>
3115 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
3116 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
3117 "TARGET_NEON && flag_unsafe_math_optimizations"
3118 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3119 [(set_attr "type" "neon_fp_compare_s<q>")]
3122 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
3123 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3124 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
3125 (match_operand:VCVTF 2 "s_register_operand" "w")]
3128 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3129 [(set_attr "type" "neon_fp_compare_s<q>")]
3132 (define_expand "neon_vca<cmp_op><mode>"
3134 (match_operand:<V_cmp_result> 0 "s_register_operand")
3136 (GLTE:<V_cmp_result>
3137 (abs:VH (match_operand:VH 1 "s_register_operand"))
3138 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
3139 "TARGET_NEON_FP16INST"
3141 if (flag_unsafe_math_optimizations)
3142 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
3143 (operands[0], operands[1], operands[2]));
3145 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
3146 (operands[0], operands[1], operands[2]));
3150 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
3152 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3154 (GLTE:<V_cmp_result>
3155 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
3156 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
3157 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
3158 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3159 [(set_attr "type" "neon_fp_compare_s<q>")]
3162 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
3163 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3164 (unspec:<V_cmp_result>
3165 [(match_operand:VH 1 "s_register_operand" "w")
3166 (match_operand:VH 2 "s_register_operand" "w")]
3169 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3170 [(set_attr "type" "neon_fp_compare_s<q>")]
3173 (define_expand "neon_vc<cmp_op>z<mode>"
3175 (match_operand:<V_cmp_result> 0 "s_register_operand")
3176 (COMPARISONS:<V_cmp_result>
3177 (match_operand:VH 1 "s_register_operand")
3179 "TARGET_NEON_FP16INST"
3181 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
3182 CONST0_RTX (<MODE>mode)));
3186 (define_insn "neon_vtst<mode>"
3187 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3188 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3189 (match_operand:VDQIW 2 "s_register_operand" "w")]
3192 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3193 [(set_attr "type" "neon_tst<q>")]
3196 (define_insn "neon_vabd<sup><mode>"
3197 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3198 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3199 (match_operand:VDQIW 2 "s_register_operand" "w")]
3202 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3203 [(set_attr "type" "neon_abd<q>")]
3206 (define_insn "neon_vabd<mode>"
3207 [(set (match_operand:VH 0 "s_register_operand" "=w")
3208 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3209 (match_operand:VH 2 "s_register_operand" "w")]
3211 "TARGET_NEON_FP16INST"
3212 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3213 [(set_attr "type" "neon_abd<q>")]
3216 (define_insn "neon_vabdf<mode>"
3217 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3218 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3219 (match_operand:VCVTF 2 "s_register_operand" "w")]
3222 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3223 [(set_attr "type" "neon_fp_abd_s<q>")]
3226 (define_insn "neon_vabdl<sup><mode>"
3227 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3228 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3229 (match_operand:VW 2 "s_register_operand" "w")]
3232 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3233 [(set_attr "type" "neon_abd_long")]
3236 (define_insn "neon_vaba<sup><mode>"
3237 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3238 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3239 (match_operand:VDQIW 3 "s_register_operand" "w")]
3241 (match_operand:VDQIW 1 "s_register_operand" "0")))]
3243 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3244 [(set_attr "type" "neon_arith_acc<q>")]
3247 (define_insn "neon_vabal<sup><mode>"
3248 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3249 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3250 (match_operand:VW 3 "s_register_operand" "w")]
3252 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3254 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3255 [(set_attr "type" "neon_arith_acc<q>")]
3258 (define_insn "neon_v<maxmin><sup><mode>"
3259 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3260 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3261 (match_operand:VDQIW 2 "s_register_operand" "w")]
3264 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3265 [(set_attr "type" "neon_minmax<q>")]
3268 (define_insn "neon_v<maxmin>f<mode>"
3269 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3270 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3271 (match_operand:VCVTF 2 "s_register_operand" "w")]
3274 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3275 [(set_attr "type" "neon_fp_minmax_s<q>")]
3278 (define_insn "neon_v<maxmin>f<mode>"
3279 [(set (match_operand:VH 0 "s_register_operand" "=w")
3281 [(match_operand:VH 1 "s_register_operand" "w")
3282 (match_operand:VH 2 "s_register_operand" "w")]
3284 "TARGET_NEON_FP16INST"
3285 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3286 [(set_attr "type" "neon_fp_minmax_s<q>")]
3289 (define_insn "neon_vp<maxmin>fv4hf"
3290 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3292 [(match_operand:V4HF 1 "s_register_operand" "w")
3293 (match_operand:V4HF 2 "s_register_operand" "w")]
3295 "TARGET_NEON_FP16INST"
3296 "vp<maxmin>.f16\t%P0, %P1, %P2"
3297 [(set_attr "type" "neon_reduc_minmax")]
3300 (define_insn "neon_<fmaxmin_op><mode>"
3302 (match_operand:VH 0 "s_register_operand" "=w")
3304 [(match_operand:VH 1 "s_register_operand" "w")
3305 (match_operand:VH 2 "s_register_operand" "w")]
3307 "TARGET_NEON_FP16INST"
3308 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3309 [(set_attr "type" "neon_fp_minmax_s<q>")]
3312 ;; v<maxmin>nm intrinsics.
3313 (define_insn "neon_<fmaxmin_op><mode>"
3314 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3315 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3316 (match_operand:VCVTF 2 "s_register_operand" "w")]
3318 "TARGET_NEON && TARGET_VFP5"
3319 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3320 [(set_attr "type" "neon_fp_minmax_s<q>")]
3323 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
3324 (define_insn "<fmaxmin><mode>3"
3325 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3326 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3327 (match_operand:VCVTF 2 "s_register_operand" "w")]
3329 "TARGET_NEON && TARGET_VFP5"
3330 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3331 [(set_attr "type" "neon_fp_minmax_s<q>")]
3334 (define_expand "neon_vpadd<mode>"
3335 [(match_operand:VD 0 "s_register_operand" "=w")
3336 (match_operand:VD 1 "s_register_operand" "w")
3337 (match_operand:VD 2 "s_register_operand" "w")]
3340 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3345 (define_insn "neon_vpaddl<sup><mode>"
3346 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3347 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3350 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3351 [(set_attr "type" "neon_reduc_add_long")]
3354 (define_insn "neon_vpadal<sup><mode>"
3355 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3356 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3357 (match_operand:VDQIW 2 "s_register_operand" "w")]
3360 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3361 [(set_attr "type" "neon_reduc_add_acc")]
3364 (define_insn "neon_vp<maxmin><sup><mode>"
3365 [(set (match_operand:VDI 0 "s_register_operand" "=w")
3366 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3367 (match_operand:VDI 2 "s_register_operand" "w")]
3370 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3371 [(set_attr "type" "neon_reduc_minmax<q>")]
3374 (define_insn "neon_vp<maxmin>f<mode>"
3375 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3376 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3377 (match_operand:VCVTF 2 "s_register_operand" "w")]
3380 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3381 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3384 (define_insn "neon_vrecps<mode>"
3385 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3386 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3387 (match_operand:VCVTF 2 "s_register_operand" "w")]
3390 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3391 [(set_attr "type" "neon_fp_recps_s<q>")]
3394 (define_insn "neon_vrecps<mode>"
3396 (match_operand:VH 0 "s_register_operand" "=w")
3397 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3398 (match_operand:VH 2 "s_register_operand" "w")]
3400 "TARGET_NEON_FP16INST"
3401 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3402 [(set_attr "type" "neon_fp_recps_s<q>")]
3405 (define_insn "neon_vrsqrts<mode>"
3406 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3407 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3408 (match_operand:VCVTF 2 "s_register_operand" "w")]
3411 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3412 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3415 (define_insn "neon_vrsqrts<mode>"
3417 (match_operand:VH 0 "s_register_operand" "=w")
3418 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3419 (match_operand:VH 2 "s_register_operand" "w")]
3421 "TARGET_NEON_FP16INST"
3422 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3423 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3426 (define_expand "neon_vabs<mode>"
3427 [(match_operand:VDQW 0 "s_register_operand" "")
3428 (match_operand:VDQW 1 "s_register_operand" "")]
3431 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3435 (define_insn "neon_vqabs<mode>"
3436 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3437 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3440 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3441 [(set_attr "type" "neon_qabs<q>")]
3444 (define_insn "neon_bswap<mode>"
3445 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3446 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3448 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3449 [(set_attr "type" "neon_rev<q>")]
3452 (define_expand "neon_vneg<mode>"
3453 [(match_operand:VDQW 0 "s_register_operand" "")
3454 (match_operand:VDQW 1 "s_register_operand" "")]
3457 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3462 ;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
3463 ;; fact that their usage need to guarantee that the source vectors are
3464 ;; contiguous. It would be wrong to describe the operation without being able
3465 ;; to describe the permute that is also required, but even if that is done
3466 ;; the permute would have been created as a LOAD_LANES which means the values
3467 ;; in the registers are in the wrong order.
3468 (define_insn "neon_vcadd<rot><mode>"
3469 [(set (match_operand:VF 0 "register_operand" "=w")
3470 (unspec:VF [(match_operand:VF 1 "register_operand" "w")
3471 (match_operand:VF 2 "register_operand" "w")]
3474 "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
3475 [(set_attr "type" "neon_fcadd")]
3478 (define_insn "neon_vcmla<rot><mode>"
3479 [(set (match_operand:VF 0 "register_operand" "=w")
3480 (plus:VF (match_operand:VF 1 "register_operand" "0")
3481 (unspec:VF [(match_operand:VF 2 "register_operand" "w")
3482 (match_operand:VF 3 "register_operand" "w")]
3485 "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
3486 [(set_attr "type" "neon_fcmla")]
3489 (define_insn "neon_vcmla_lane<rot><mode>"
3490 [(set (match_operand:VF 0 "s_register_operand" "=w")
3491 (plus:VF (match_operand:VF 1 "s_register_operand" "0")
3492 (unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
3493 (match_operand:VF 3 "s_register_operand" "<VF_constraint>")
3494 (match_operand:SI 4 "const_int_operand" "n")]
3498 operands = neon_vcmla_lane_prepare_operands (operands);
3499 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3501 [(set_attr "type" "neon_fcmla")]
3504 (define_insn "neon_vcmla_laneq<rot><mode>"
3505 [(set (match_operand:VDF 0 "s_register_operand" "=w")
3506 (plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
3507 (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
3508 (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
3509 (match_operand:SI 4 "const_int_operand" "n")]
3513 operands = neon_vcmla_lane_prepare_operands (operands);
3514 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3516 [(set_attr "type" "neon_fcmla")]
3519 (define_insn "neon_vcmlaq_lane<rot><mode>"
3520 [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
3521 (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
3522 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
3523 (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
3524 (match_operand:SI 4 "const_int_operand" "n")]
3528 operands = neon_vcmla_lane_prepare_operands (operands);
3529 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3531 [(set_attr "type" "neon_fcmla")]
3535 ;; These instructions map to the __builtins for the Dot Product operations.
3536 (define_insn "neon_<sup>dot<vsi2qi>"
3537 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3538 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3539 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3540 "register_operand" "w")
3541 (match_operand:<VSI2QI> 3
3542 "register_operand" "w")]
3545 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3546 [(set_attr "type" "neon_dot<q>")]
3549 ;; These instructions map to the __builtins for the Dot Product
3550 ;; indexed operations.
3551 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3552 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3553 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3554 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3555 "register_operand" "w")
3556 (match_operand:V8QI 3 "register_operand" "t")
3557 (match_operand:SI 4 "immediate_operand" "i")]
3562 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3563 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3565 [(set_attr "type" "neon_dot<q>")]
3568 ;; These expands map to the Dot Product optab the vectorizer checks for.
3569 ;; The auto-vectorizer expects a dot product builtin that also does an
3570 ;; accumulation into the provided register.
3571 ;; Given the following pattern
3573 ;; for (i=0; i<len; i++) {
3579 ;; This can be auto-vectorized to
3580 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3582 ;; given enough iterations. However the vectorizer can keep unrolling the loop
3583 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3584 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3587 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3588 (define_expand "<sup>dot_prod<vsi2qi>"
3589 [(set (match_operand:VCVTI 0 "register_operand")
3590 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3592 (match_operand:<VSI2QI> 2
3593 "register_operand")]
3595 (match_operand:VCVTI 3 "register_operand")))]
3599 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3601 emit_insn (gen_rtx_SET (operands[0], operands[3]));
3605 (define_expand "neon_copysignf<mode>"
3606 [(match_operand:VCVTF 0 "register_operand")
3607 (match_operand:VCVTF 1 "register_operand")
3608 (match_operand:VCVTF 2 "register_operand")]
3612 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3613 rtx c = gen_int_mode (0x80000000, SImode);
3615 emit_move_insn (v_bitmask,
3616 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3617 emit_move_insn (operands[0], operands[2]);
3618 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3619 <VCVTF:V_cmp_result>mode, 0);
3620 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3627 (define_insn "neon_vqneg<mode>"
3628 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3629 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3632 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3633 [(set_attr "type" "neon_qneg<q>")]
3636 (define_insn "neon_vcls<mode>"
3637 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3638 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3641 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3642 [(set_attr "type" "neon_cls<q>")]
3645 (define_insn "clz<mode>2"
3646 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3647 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3649 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3650 [(set_attr "type" "neon_cnt<q>")]
3653 (define_expand "neon_vclz<mode>"
3654 [(match_operand:VDQIW 0 "s_register_operand" "")
3655 (match_operand:VDQIW 1 "s_register_operand" "")]
3658 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3662 (define_insn "popcount<mode>2"
3663 [(set (match_operand:VE 0 "s_register_operand" "=w")
3664 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3666 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3667 [(set_attr "type" "neon_cnt<q>")]
3670 (define_expand "neon_vcnt<mode>"
3671 [(match_operand:VE 0 "s_register_operand" "=w")
3672 (match_operand:VE 1 "s_register_operand" "w")]
3675 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3679 (define_insn "neon_vrecpe<mode>"
3680 [(set (match_operand:VH 0 "s_register_operand" "=w")
3681 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3683 "TARGET_NEON_FP16INST"
3684 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3685 [(set_attr "type" "neon_fp_recpe_s<q>")]
3688 (define_insn "neon_vrecpe<mode>"
3689 [(set (match_operand:V32 0 "s_register_operand" "=w")
3690 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3693 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3694 [(set_attr "type" "neon_fp_recpe_s<q>")]
3697 (define_insn "neon_vrsqrte<mode>"
3698 [(set (match_operand:V32 0 "s_register_operand" "=w")
3699 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3702 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3703 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3706 (define_expand "neon_vmvn<mode>"
3707 [(match_operand:VDQIW 0 "s_register_operand" "")
3708 (match_operand:VDQIW 1 "s_register_operand" "")]
3711 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3715 (define_insn "neon_vget_lane<mode>_sext_internal"
3716 [(set (match_operand:SI 0 "s_register_operand" "=r")
3718 (vec_select:<V_elem>
3719 (match_operand:VD 1 "s_register_operand" "w")
3720 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3723 if (BYTES_BIG_ENDIAN)
3725 int elt = INTVAL (operands[2]);
3726 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3727 operands[2] = GEN_INT (elt);
3729 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3731 [(set_attr "type" "neon_to_gp")]
3734 (define_insn "neon_vget_lane<mode>_zext_internal"
3735 [(set (match_operand:SI 0 "s_register_operand" "=r")
3737 (vec_select:<V_elem>
3738 (match_operand:VD 1 "s_register_operand" "w")
3739 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3742 if (BYTES_BIG_ENDIAN)
3744 int elt = INTVAL (operands[2]);
3745 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3746 operands[2] = GEN_INT (elt);
3748 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3750 [(set_attr "type" "neon_to_gp")]
3753 (define_insn "neon_vget_lane<mode>_sext_internal"
3754 [(set (match_operand:SI 0 "s_register_operand" "=r")
3756 (vec_select:<V_elem>
3757 (match_operand:VQ2 1 "s_register_operand" "w")
3758 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3762 int regno = REGNO (operands[1]);
3763 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3764 unsigned int elt = INTVAL (operands[2]);
3765 unsigned int elt_adj = elt % halfelts;
3767 if (BYTES_BIG_ENDIAN)
3768 elt_adj = halfelts - 1 - elt_adj;
3770 ops[0] = operands[0];
3771 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3772 ops[2] = GEN_INT (elt_adj);
3773 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3777 [(set_attr "type" "neon_to_gp_q")]
3780 (define_insn "neon_vget_lane<mode>_zext_internal"
3781 [(set (match_operand:SI 0 "s_register_operand" "=r")
3783 (vec_select:<V_elem>
3784 (match_operand:VQ2 1 "s_register_operand" "w")
3785 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3789 int regno = REGNO (operands[1]);
3790 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3791 unsigned int elt = INTVAL (operands[2]);
3792 unsigned int elt_adj = elt % halfelts;
3794 if (BYTES_BIG_ENDIAN)
3795 elt_adj = halfelts - 1 - elt_adj;
3797 ops[0] = operands[0];
3798 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3799 ops[2] = GEN_INT (elt_adj);
3800 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3804 [(set_attr "type" "neon_to_gp_q")]
3807 (define_expand "neon_vget_lane<mode>"
3808 [(match_operand:<V_ext> 0 "s_register_operand" "")
3809 (match_operand:VDQW 1 "s_register_operand" "")
3810 (match_operand:SI 2 "immediate_operand" "")]
3813 if (BYTES_BIG_ENDIAN)
3815 /* The intrinsics are defined in terms of a model where the
3816 element ordering in memory is vldm order, whereas the generic
3817 RTL is defined in terms of a model where the element ordering
3818 in memory is array order. Convert the lane number to conform
3820 unsigned int elt = INTVAL (operands[2]);
3821 unsigned int reg_nelts
3822 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3823 elt ^= reg_nelts - 1;
3824 operands[2] = GEN_INT (elt);
3827 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3828 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3831 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3837 (define_expand "neon_vget_laneu<mode>"
3838 [(match_operand:<V_ext> 0 "s_register_operand" "")
3839 (match_operand:VDQIW 1 "s_register_operand" "")
3840 (match_operand:SI 2 "immediate_operand" "")]
3843 if (BYTES_BIG_ENDIAN)
3845 /* The intrinsics are defined in terms of a model where the
3846 element ordering in memory is vldm order, whereas the generic
3847 RTL is defined in terms of a model where the element ordering
3848 in memory is array order. Convert the lane number to conform
3850 unsigned int elt = INTVAL (operands[2]);
3851 unsigned int reg_nelts
3852 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3853 elt ^= reg_nelts - 1;
3854 operands[2] = GEN_INT (elt);
3857 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3858 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3861 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3867 (define_expand "neon_vget_lanedi"
3868 [(match_operand:DI 0 "s_register_operand" "=r")
3869 (match_operand:DI 1 "s_register_operand" "w")
3870 (match_operand:SI 2 "immediate_operand" "")]
3873 emit_move_insn (operands[0], operands[1]);
3877 (define_expand "neon_vget_lanev2di"
3878 [(match_operand:DI 0 "s_register_operand" "")
3879 (match_operand:V2DI 1 "s_register_operand" "")
3880 (match_operand:SI 2 "immediate_operand" "")]
3885 if (BYTES_BIG_ENDIAN)
3887 /* The intrinsics are defined in terms of a model where the
3888 element ordering in memory is vldm order, whereas the generic
3889 RTL is defined in terms of a model where the element ordering
3890 in memory is array order. Convert the lane number to conform
3892 unsigned int elt = INTVAL (operands[2]);
3893 unsigned int reg_nelts = 2;
3894 elt ^= reg_nelts - 1;
3895 operands[2] = GEN_INT (elt);
3898 lane = INTVAL (operands[2]);
3899 gcc_assert ((lane ==0) || (lane == 1));
3900 emit_move_insn (operands[0], lane == 0
3901 ? gen_lowpart (DImode, operands[1])
3902 : gen_highpart (DImode, operands[1]));
3906 (define_expand "neon_vset_lane<mode>"
3907 [(match_operand:VDQ 0 "s_register_operand" "=w")
3908 (match_operand:<V_elem> 1 "s_register_operand" "r")
3909 (match_operand:VDQ 2 "s_register_operand" "0")
3910 (match_operand:SI 3 "immediate_operand" "i")]
3913 unsigned int elt = INTVAL (operands[3]);
3915 if (BYTES_BIG_ENDIAN)
3917 unsigned int reg_nelts
3918 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3919 elt ^= reg_nelts - 1;
3922 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3923 GEN_INT (1 << elt), operands[2]));
3927 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3929 (define_expand "neon_vset_lanedi"
3930 [(match_operand:DI 0 "s_register_operand" "=w")
3931 (match_operand:DI 1 "s_register_operand" "r")
3932 (match_operand:DI 2 "s_register_operand" "0")
3933 (match_operand:SI 3 "immediate_operand" "i")]
3936 emit_move_insn (operands[0], operands[1]);
3940 (define_expand "neon_vcreate<mode>"
3941 [(match_operand:VD_RE 0 "s_register_operand" "")
3942 (match_operand:DI 1 "general_operand" "")]
3945 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3946 emit_move_insn (operands[0], src);
3950 (define_insn "neon_vdup_n<mode>"
3951 [(set (match_operand:VX 0 "s_register_operand" "=w")
3952 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3954 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3955 [(set_attr "type" "neon_from_gp<q>")]
3958 (define_insn "neon_vdup_nv4hf"
3959 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3960 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3963 [(set_attr "type" "neon_from_gp")]
3966 (define_insn "neon_vdup_nv8hf"
3967 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3968 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3971 [(set_attr "type" "neon_from_gp_q")]
3974 (define_insn "neon_vdup_n<mode>"
3975 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3976 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3979 vdup.<V_sz_elem>\t%<V_reg>0, %1
3980 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3981 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3984 (define_expand "neon_vdup_ndi"
3985 [(match_operand:DI 0 "s_register_operand" "=w")
3986 (match_operand:DI 1 "s_register_operand" "r")]
3989 emit_move_insn (operands[0], operands[1]);
3994 (define_insn "neon_vdup_nv2di"
3995 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3996 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3999 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
4000 vmov\t%e0, %P1\;vmov\t%f0, %P1"
4001 [(set_attr "length" "8")
4002 (set_attr "type" "multiple")]
4005 (define_insn "neon_vdup_lane<mode>_internal"
4006 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
4008 (vec_select:<V_elem>
4009 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
4010 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4013 if (BYTES_BIG_ENDIAN)
4015 int elt = INTVAL (operands[2]);
4016 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
4017 operands[2] = GEN_INT (elt);
4020 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
4022 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
4024 [(set_attr "type" "neon_dup<q>")]
4027 (define_insn "neon_vdup_lane<mode>_internal"
4028 [(set (match_operand:VH 0 "s_register_operand" "=w")
4030 (vec_select:<V_elem>
4031 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
4032 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
4033 "TARGET_NEON && TARGET_FP16"
4035 if (BYTES_BIG_ENDIAN)
4037 int elt = INTVAL (operands[2]);
4038 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
4039 operands[2] = GEN_INT (elt);
4042 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
4044 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
4046 [(set_attr "type" "neon_dup<q>")]
4049 (define_expand "neon_vdup_lane<mode>"
4050 [(match_operand:VDQW 0 "s_register_operand" "=w")
4051 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
4052 (match_operand:SI 2 "immediate_operand" "i")]
4055 if (BYTES_BIG_ENDIAN)
4057 unsigned int elt = INTVAL (operands[2]);
4058 unsigned int reg_nelts
4059 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
4060 elt ^= reg_nelts - 1;
4061 operands[2] = GEN_INT (elt);
4063 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
4068 (define_expand "neon_vdup_lane<mode>"
4069 [(match_operand:VH 0 "s_register_operand")
4070 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
4071 (match_operand:SI 2 "immediate_operand")]
4072 "TARGET_NEON && TARGET_FP16"
4074 if (BYTES_BIG_ENDIAN)
4076 unsigned int elt = INTVAL (operands[2]);
4077 unsigned int reg_nelts
4078 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
4079 elt ^= reg_nelts - 1;
4080 operands[2] = GEN_INT (elt);
4082 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
4087 ; Scalar index is ignored, since only zero is valid here.
4088 (define_expand "neon_vdup_lanedi"
4089 [(match_operand:DI 0 "s_register_operand" "=w")
4090 (match_operand:DI 1 "s_register_operand" "w")
4091 (match_operand:SI 2 "immediate_operand" "i")]
4094 emit_move_insn (operands[0], operands[1]);
4098 ; Likewise for v2di, as the DImode second operand has only a single element.
4099 (define_expand "neon_vdup_lanev2di"
4100 [(match_operand:V2DI 0 "s_register_operand" "=w")
4101 (match_operand:DI 1 "s_register_operand" "w")
4102 (match_operand:SI 2 "immediate_operand" "i")]
4105 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
4109 ; Disabled before reload because we don't want combine doing something silly,
4110 ; but used by the post-reload expansion of neon_vcombine.
4111 (define_insn "*neon_vswp<mode>"
4112 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
4113 (match_operand:VDQX 1 "s_register_operand" "+w"))
4114 (set (match_dup 1) (match_dup 0))]
4115 "TARGET_NEON && reload_completed"
4116 "vswp\t%<V_reg>0, %<V_reg>1"
4117 [(set_attr "type" "neon_permute<q>")]
4120 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4122 ;; FIXME: A different implementation of this builtin could make it much
4123 ;; more likely that we wouldn't actually need to output anything (we could make
4124 ;; it so that the reg allocator puts things in the right places magically
4125 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
4127 (define_insn_and_split "neon_vcombine<mode>"
4128 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
4129 (vec_concat:<V_DOUBLE>
4130 (match_operand:VDX 1 "s_register_operand" "w")
4131 (match_operand:VDX 2 "s_register_operand" "w")))]
4134 "&& reload_completed"
4137 neon_split_vcombine (operands);
4140 [(set_attr "type" "multiple")]
4143 (define_expand "neon_vget_high<mode>"
4144 [(match_operand:<V_HALF> 0 "s_register_operand")
4145 (match_operand:VQX 1 "s_register_operand")]
4148 emit_move_insn (operands[0],
4149 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
4150 GET_MODE_SIZE (<V_HALF>mode)));
4154 (define_expand "neon_vget_low<mode>"
4155 [(match_operand:<V_HALF> 0 "s_register_operand")
4156 (match_operand:VQX 1 "s_register_operand")]
4159 emit_move_insn (operands[0],
4160 simplify_gen_subreg (<V_HALF>mode, operands[1],
4165 (define_insn "float<mode><V_cvtto>2"
4166 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4167 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4168 "TARGET_NEON && !flag_rounding_math"
4169 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
4170 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4173 (define_insn "floatuns<mode><V_cvtto>2"
4174 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4175 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4176 "TARGET_NEON && !flag_rounding_math"
4177 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
4178 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4181 (define_insn "fix_trunc<mode><V_cvtto>2"
4182 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4183 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4185 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
4186 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4189 (define_insn "fixuns_trunc<mode><V_cvtto>2"
4190 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4191 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4193 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4194 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4197 (define_insn "neon_vcvt<sup><mode>"
4198 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4199 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4202 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4203 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4206 (define_insn "neon_vcvt<sup><mode>"
4207 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4208 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4211 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4212 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4215 (define_insn "neon_vcvtv4sfv4hf"
4216 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4217 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4219 "TARGET_NEON && TARGET_FP16"
4220 "vcvt.f32.f16\t%q0, %P1"
4221 [(set_attr "type" "neon_fp_cvt_widen_h")]
4224 (define_insn "neon_vcvtv4hfv4sf"
4225 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4226 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4228 "TARGET_NEON && TARGET_FP16"
4229 "vcvt.f16.f32\t%P0, %q1"
4230 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4233 (define_insn "neon_vcvt<sup><mode>"
4235 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4237 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4239 "TARGET_NEON_FP16INST"
4240 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4241 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4244 (define_insn "neon_vcvt<sup><mode>"
4246 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4248 [(match_operand:VH 1 "s_register_operand" "w")]
4250 "TARGET_NEON_FP16INST"
4251 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4252 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4255 (define_insn "neon_vcvt<sup>_n<mode>"
4256 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4257 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4258 (match_operand:SI 2 "immediate_operand" "i")]
4262 arm_const_bounds (operands[2], 1, 33);
4263 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4265 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4268 (define_insn "neon_vcvt<sup>_n<mode>"
4269 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4271 [(match_operand:VH 1 "s_register_operand" "w")
4272 (match_operand:SI 2 "immediate_operand" "i")]
4274 "TARGET_NEON_FP16INST"
4276 arm_const_bounds (operands[2], 0, 17);
4277 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4279 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4282 (define_insn "neon_vcvt<sup>_n<mode>"
4283 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4284 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4285 (match_operand:SI 2 "immediate_operand" "i")]
4289 arm_const_bounds (operands[2], 1, 33);
4290 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4292 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4295 (define_insn "neon_vcvt<sup>_n<mode>"
4296 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4298 [(match_operand:VCVTHI 1 "s_register_operand" "w")
4299 (match_operand:SI 2 "immediate_operand" "i")]
4301 "TARGET_NEON_FP16INST"
4303 arm_const_bounds (operands[2], 0, 17);
4304 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4306 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4309 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
4311 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4313 [(match_operand:VH 1 "s_register_operand" "w")]
4315 "TARGET_NEON_FP16INST"
4316 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4317 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4320 (define_insn "neon_vmovn<mode>"
4321 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4322 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4325 "vmovn.<V_if_elem>\t%P0, %q1"
4326 [(set_attr "type" "neon_shift_imm_narrow_q")]
4329 (define_insn "neon_vqmovn<sup><mode>"
4330 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4331 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4334 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4335 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4338 (define_insn "neon_vqmovun<mode>"
4339 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4340 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4343 "vqmovun.<V_s_elem>\t%P0, %q1"
4344 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4347 (define_insn "neon_vmovl<sup><mode>"
4348 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4349 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4352 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4353 [(set_attr "type" "neon_shift_imm_long")]
4356 (define_insn "neon_vmul_lane<mode>"
4357 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4358 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4359 (match_operand:VMD 2 "s_register_operand"
4360 "<scalar_mul_constraint>")
4361 (match_operand:SI 3 "immediate_operand" "i")]
4365 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4368 (if_then_else (match_test "<Is_float_mode>")
4369 (const_string "neon_fp_mul_s_scalar<q>")
4370 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4373 (define_insn "neon_vmul_lane<mode>"
4374 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4375 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4376 (match_operand:<V_HALF> 2 "s_register_operand"
4377 "<scalar_mul_constraint>")
4378 (match_operand:SI 3 "immediate_operand" "i")]
4382 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4385 (if_then_else (match_test "<Is_float_mode>")
4386 (const_string "neon_fp_mul_s_scalar<q>")
4387 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4390 (define_insn "neon_vmul_lane<mode>"
4391 [(set (match_operand:VH 0 "s_register_operand" "=w")
4392 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4393 (match_operand:V4HF 2 "s_register_operand"
4394 "<scalar_mul_constraint>")
4395 (match_operand:SI 3 "immediate_operand" "i")]
4397 "TARGET_NEON_FP16INST"
4398 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4399 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4402 (define_insn "neon_vmull<sup>_lane<mode>"
4403 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4404 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4405 (match_operand:VMDI 2 "s_register_operand"
4406 "<scalar_mul_constraint>")
4407 (match_operand:SI 3 "immediate_operand" "i")]
4411 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4413 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4416 (define_insn "neon_vqdmull_lane<mode>"
4417 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4418 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4419 (match_operand:VMDI 2 "s_register_operand"
4420 "<scalar_mul_constraint>")
4421 (match_operand:SI 3 "immediate_operand" "i")]
4422 UNSPEC_VQDMULL_LANE))]
4425 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4427 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4430 (define_insn "neon_vq<r>dmulh_lane<mode>"
4431 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4432 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4433 (match_operand:<V_HALF> 2 "s_register_operand"
4434 "<scalar_mul_constraint>")
4435 (match_operand:SI 3 "immediate_operand" "i")]
4439 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4441 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4444 (define_insn "neon_vq<r>dmulh_lane<mode>"
4445 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4446 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4447 (match_operand:VMDI 2 "s_register_operand"
4448 "<scalar_mul_constraint>")
4449 (match_operand:SI 3 "immediate_operand" "i")]
4453 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4455 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4458 ;; vqrdmlah_lane, vqrdmlsh_lane
4459 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4460 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4461 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4462 (match_operand:VMQI 2 "s_register_operand" "w")
4463 (match_operand:<V_HALF> 3 "s_register_operand"
4464 "<scalar_mul_constraint>")
4465 (match_operand:SI 4 "immediate_operand" "i")]
4470 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4472 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4475 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4476 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4477 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4478 (match_operand:VMDI 2 "s_register_operand" "w")
4479 (match_operand:VMDI 3 "s_register_operand"
4480 "<scalar_mul_constraint>")
4481 (match_operand:SI 4 "immediate_operand" "i")]
4486 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4488 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4491 (define_insn "neon_vmla_lane<mode>"
4492 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4493 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4494 (match_operand:VMD 2 "s_register_operand" "w")
4495 (match_operand:VMD 3 "s_register_operand"
4496 "<scalar_mul_constraint>")
4497 (match_operand:SI 4 "immediate_operand" "i")]
4501 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4504 (if_then_else (match_test "<Is_float_mode>")
4505 (const_string "neon_fp_mla_s_scalar<q>")
4506 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4509 (define_insn "neon_vmla_lane<mode>"
4510 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4511 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4512 (match_operand:VMQ 2 "s_register_operand" "w")
4513 (match_operand:<V_HALF> 3 "s_register_operand"
4514 "<scalar_mul_constraint>")
4515 (match_operand:SI 4 "immediate_operand" "i")]
4519 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4522 (if_then_else (match_test "<Is_float_mode>")
4523 (const_string "neon_fp_mla_s_scalar<q>")
4524 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4527 (define_insn "neon_vmlal<sup>_lane<mode>"
4528 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4529 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4530 (match_operand:VMDI 2 "s_register_operand" "w")
4531 (match_operand:VMDI 3 "s_register_operand"
4532 "<scalar_mul_constraint>")
4533 (match_operand:SI 4 "immediate_operand" "i")]
4537 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4539 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4542 (define_insn "neon_vqdmlal_lane<mode>"
4543 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4544 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4545 (match_operand:VMDI 2 "s_register_operand" "w")
4546 (match_operand:VMDI 3 "s_register_operand"
4547 "<scalar_mul_constraint>")
4548 (match_operand:SI 4 "immediate_operand" "i")]
4549 UNSPEC_VQDMLAL_LANE))]
4552 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4554 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4557 (define_insn "neon_vmls_lane<mode>"
4558 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4559 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4560 (match_operand:VMD 2 "s_register_operand" "w")
4561 (match_operand:VMD 3 "s_register_operand"
4562 "<scalar_mul_constraint>")
4563 (match_operand:SI 4 "immediate_operand" "i")]
4567 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4570 (if_then_else (match_test "<Is_float_mode>")
4571 (const_string "neon_fp_mla_s_scalar<q>")
4572 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4575 (define_insn "neon_vmls_lane<mode>"
4576 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4577 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4578 (match_operand:VMQ 2 "s_register_operand" "w")
4579 (match_operand:<V_HALF> 3 "s_register_operand"
4580 "<scalar_mul_constraint>")
4581 (match_operand:SI 4 "immediate_operand" "i")]
4585 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4588 (if_then_else (match_test "<Is_float_mode>")
4589 (const_string "neon_fp_mla_s_scalar<q>")
4590 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4593 (define_insn "neon_vmlsl<sup>_lane<mode>"
4594 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4595 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4596 (match_operand:VMDI 2 "s_register_operand" "w")
4597 (match_operand:VMDI 3 "s_register_operand"
4598 "<scalar_mul_constraint>")
4599 (match_operand:SI 4 "immediate_operand" "i")]
4603 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4605 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4608 (define_insn "neon_vqdmlsl_lane<mode>"
4609 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4610 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4611 (match_operand:VMDI 2 "s_register_operand" "w")
4612 (match_operand:VMDI 3 "s_register_operand"
4613 "<scalar_mul_constraint>")
4614 (match_operand:SI 4 "immediate_operand" "i")]
4615 UNSPEC_VQDMLSL_LANE))]
4618 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4620 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4623 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4624 ; core register into a temp register, then use a scalar taken from that. This
4625 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4626 ; or extracted from another vector. The latter case it's currently better to
4627 ; use the "_lane" variant, and the former case can probably be implemented
4628 ; using vld1_lane, but that hasn't been done yet.
4630 (define_expand "neon_vmul_n<mode>"
4631 [(match_operand:VMD 0 "s_register_operand" "")
4632 (match_operand:VMD 1 "s_register_operand" "")
4633 (match_operand:<V_elem> 2 "s_register_operand" "")]
4636 rtx tmp = gen_reg_rtx (<MODE>mode);
4637 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4638 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4643 (define_expand "neon_vmul_n<mode>"
4644 [(match_operand:VMQ 0 "s_register_operand" "")
4645 (match_operand:VMQ 1 "s_register_operand" "")
4646 (match_operand:<V_elem> 2 "s_register_operand" "")]
4649 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4650 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4651 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4656 (define_expand "neon_vmul_n<mode>"
4657 [(match_operand:VH 0 "s_register_operand")
4658 (match_operand:VH 1 "s_register_operand")
4659 (match_operand:<V_elem> 2 "s_register_operand")]
4660 "TARGET_NEON_FP16INST"
4662 rtx tmp = gen_reg_rtx (V4HFmode);
4663 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4664 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4669 (define_expand "neon_vmulls_n<mode>"
4670 [(match_operand:<V_widen> 0 "s_register_operand" "")
4671 (match_operand:VMDI 1 "s_register_operand" "")
4672 (match_operand:<V_elem> 2 "s_register_operand" "")]
4675 rtx tmp = gen_reg_rtx (<MODE>mode);
4676 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4677 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4682 (define_expand "neon_vmullu_n<mode>"
4683 [(match_operand:<V_widen> 0 "s_register_operand" "")
4684 (match_operand:VMDI 1 "s_register_operand" "")
4685 (match_operand:<V_elem> 2 "s_register_operand" "")]
4688 rtx tmp = gen_reg_rtx (<MODE>mode);
4689 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4690 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4695 (define_expand "neon_vqdmull_n<mode>"
4696 [(match_operand:<V_widen> 0 "s_register_operand" "")
4697 (match_operand:VMDI 1 "s_register_operand" "")
4698 (match_operand:<V_elem> 2 "s_register_operand" "")]
4701 rtx tmp = gen_reg_rtx (<MODE>mode);
4702 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4703 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4708 (define_expand "neon_vqdmulh_n<mode>"
4709 [(match_operand:VMDI 0 "s_register_operand" "")
4710 (match_operand:VMDI 1 "s_register_operand" "")
4711 (match_operand:<V_elem> 2 "s_register_operand" "")]
4714 rtx tmp = gen_reg_rtx (<MODE>mode);
4715 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4716 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4721 (define_expand "neon_vqrdmulh_n<mode>"
4722 [(match_operand:VMDI 0 "s_register_operand" "")
4723 (match_operand:VMDI 1 "s_register_operand" "")
4724 (match_operand:<V_elem> 2 "s_register_operand" "")]
4727 rtx tmp = gen_reg_rtx (<MODE>mode);
4728 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4729 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4734 (define_expand "neon_vqdmulh_n<mode>"
4735 [(match_operand:VMQI 0 "s_register_operand" "")
4736 (match_operand:VMQI 1 "s_register_operand" "")
4737 (match_operand:<V_elem> 2 "s_register_operand" "")]
4740 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4741 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4742 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4747 (define_expand "neon_vqrdmulh_n<mode>"
4748 [(match_operand:VMQI 0 "s_register_operand" "")
4749 (match_operand:VMQI 1 "s_register_operand" "")
4750 (match_operand:<V_elem> 2 "s_register_operand" "")]
4753 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4754 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4755 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4760 (define_expand "neon_vmla_n<mode>"
4761 [(match_operand:VMD 0 "s_register_operand" "")
4762 (match_operand:VMD 1 "s_register_operand" "")
4763 (match_operand:VMD 2 "s_register_operand" "")
4764 (match_operand:<V_elem> 3 "s_register_operand" "")]
4767 rtx tmp = gen_reg_rtx (<MODE>mode);
4768 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4769 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4774 (define_expand "neon_vmla_n<mode>"
4775 [(match_operand:VMQ 0 "s_register_operand" "")
4776 (match_operand:VMQ 1 "s_register_operand" "")
4777 (match_operand:VMQ 2 "s_register_operand" "")
4778 (match_operand:<V_elem> 3 "s_register_operand" "")]
4781 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4782 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4783 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4788 (define_expand "neon_vmlals_n<mode>"
4789 [(match_operand:<V_widen> 0 "s_register_operand" "")
4790 (match_operand:<V_widen> 1 "s_register_operand" "")
4791 (match_operand:VMDI 2 "s_register_operand" "")
4792 (match_operand:<V_elem> 3 "s_register_operand" "")]
4795 rtx tmp = gen_reg_rtx (<MODE>mode);
4796 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4797 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4802 (define_expand "neon_vmlalu_n<mode>"
4803 [(match_operand:<V_widen> 0 "s_register_operand" "")
4804 (match_operand:<V_widen> 1 "s_register_operand" "")
4805 (match_operand:VMDI 2 "s_register_operand" "")
4806 (match_operand:<V_elem> 3 "s_register_operand" "")]
4809 rtx tmp = gen_reg_rtx (<MODE>mode);
4810 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4811 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4816 (define_expand "neon_vqdmlal_n<mode>"
4817 [(match_operand:<V_widen> 0 "s_register_operand" "")
4818 (match_operand:<V_widen> 1 "s_register_operand" "")
4819 (match_operand:VMDI 2 "s_register_operand" "")
4820 (match_operand:<V_elem> 3 "s_register_operand" "")]
4823 rtx tmp = gen_reg_rtx (<MODE>mode);
4824 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4825 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4830 (define_expand "neon_vmls_n<mode>"
4831 [(match_operand:VMD 0 "s_register_operand" "")
4832 (match_operand:VMD 1 "s_register_operand" "")
4833 (match_operand:VMD 2 "s_register_operand" "")
4834 (match_operand:<V_elem> 3 "s_register_operand" "")]
4837 rtx tmp = gen_reg_rtx (<MODE>mode);
4838 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4839 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4844 (define_expand "neon_vmls_n<mode>"
4845 [(match_operand:VMQ 0 "s_register_operand" "")
4846 (match_operand:VMQ 1 "s_register_operand" "")
4847 (match_operand:VMQ 2 "s_register_operand" "")
4848 (match_operand:<V_elem> 3 "s_register_operand" "")]
4851 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4852 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4853 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4858 (define_expand "neon_vmlsls_n<mode>"
4859 [(match_operand:<V_widen> 0 "s_register_operand" "")
4860 (match_operand:<V_widen> 1 "s_register_operand" "")
4861 (match_operand:VMDI 2 "s_register_operand" "")
4862 (match_operand:<V_elem> 3 "s_register_operand" "")]
4865 rtx tmp = gen_reg_rtx (<MODE>mode);
4866 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4867 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4872 (define_expand "neon_vmlslu_n<mode>"
4873 [(match_operand:<V_widen> 0 "s_register_operand" "")
4874 (match_operand:<V_widen> 1 "s_register_operand" "")
4875 (match_operand:VMDI 2 "s_register_operand" "")
4876 (match_operand:<V_elem> 3 "s_register_operand" "")]
4879 rtx tmp = gen_reg_rtx (<MODE>mode);
4880 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4881 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4886 (define_expand "neon_vqdmlsl_n<mode>"
4887 [(match_operand:<V_widen> 0 "s_register_operand" "")
4888 (match_operand:<V_widen> 1 "s_register_operand" "")
4889 (match_operand:VMDI 2 "s_register_operand" "")
4890 (match_operand:<V_elem> 3 "s_register_operand" "")]
4893 rtx tmp = gen_reg_rtx (<MODE>mode);
4894 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4895 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4900 (define_insn "@neon_vext<mode>"
4901 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4902 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4903 (match_operand:VDQX 2 "s_register_operand" "w")
4904 (match_operand:SI 3 "immediate_operand" "i")]
4908 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4909 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4911 [(set_attr "type" "neon_ext<q>")]
4914 (define_insn "@neon_vrev64<mode>"
4915 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4916 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4919 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4920 [(set_attr "type" "neon_rev<q>")]
4923 (define_insn "@neon_vrev32<mode>"
4924 [(set (match_operand:VX 0 "s_register_operand" "=w")
4925 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4928 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4929 [(set_attr "type" "neon_rev<q>")]
4932 (define_insn "@neon_vrev16<mode>"
4933 [(set (match_operand:VE 0 "s_register_operand" "=w")
4934 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4937 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4938 [(set_attr "type" "neon_rev<q>")]
4941 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4942 ; allocation. For an intrinsic of form:
4943 ; rD = vbsl_* (rS, rN, rM)
4944 ; We can use any of:
4945 ; vbsl rS, rN, rM (if D = S)
4946 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4947 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4949 (define_insn "neon_vbsl<mode>_internal"
4950 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4951 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4952 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4953 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4957 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4958 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4959 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4960 [(set_attr "type" "neon_bsl<q>")]
4963 (define_expand "neon_vbsl<mode>"
4964 [(set (match_operand:VDQX 0 "s_register_operand" "")
4965 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4966 (match_operand:VDQX 2 "s_register_operand" "")
4967 (match_operand:VDQX 3 "s_register_operand" "")]
4971 /* We can't alias operands together if they have different modes. */
4972 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4976 (define_insn "neon_v<shift_op><sup><mode>"
4977 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4978 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4979 (match_operand:VDQIX 2 "s_register_operand" "w")]
4982 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4983 [(set_attr "type" "neon_shift_imm<q>")]
4987 (define_insn "neon_v<shift_op><sup><mode>"
4988 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4989 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4990 (match_operand:VDQIX 2 "s_register_operand" "w")]
4993 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4994 [(set_attr "type" "neon_sat_shift_imm<q>")]
4998 (define_insn "neon_v<shift_op><sup>_n<mode>"
4999 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5000 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5001 (match_operand:SI 2 "immediate_operand" "i")]
5005 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
5006 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
5008 [(set_attr "type" "neon_shift_imm<q>")]
5011 ;; vshrn_n, vrshrn_n
5012 (define_insn "neon_v<shift_op>_n<mode>"
5013 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
5014 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
5015 (match_operand:SI 2 "immediate_operand" "i")]
5019 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
5020 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
5022 [(set_attr "type" "neon_shift_imm_narrow_q")]
5025 ;; vqshrn_n, vqrshrn_n
5026 (define_insn "neon_v<shift_op><sup>_n<mode>"
5027 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
5028 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
5029 (match_operand:SI 2 "immediate_operand" "i")]
5033 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
5034 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
5036 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5039 ;; vqshrun_n, vqrshrun_n
5040 (define_insn "neon_v<shift_op>_n<mode>"
5041 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
5042 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
5043 (match_operand:SI 2 "immediate_operand" "i")]
5047 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
5048 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
5050 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
5053 (define_insn "neon_vshl_n<mode>"
5054 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5055 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5056 (match_operand:SI 2 "immediate_operand" "i")]
5060 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
5061 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
5063 [(set_attr "type" "neon_shift_imm<q>")]
5066 (define_insn "neon_vqshl_<sup>_n<mode>"
5067 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5068 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5069 (match_operand:SI 2 "immediate_operand" "i")]
5073 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
5074 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
5076 [(set_attr "type" "neon_sat_shift_imm<q>")]
5079 (define_insn "neon_vqshlu_n<mode>"
5080 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5081 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
5082 (match_operand:SI 2 "immediate_operand" "i")]
5086 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
5087 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
5089 [(set_attr "type" "neon_sat_shift_imm<q>")]
5092 (define_insn "neon_vshll<sup>_n<mode>"
5093 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
5094 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
5095 (match_operand:SI 2 "immediate_operand" "i")]
5099 /* The boundaries are: 0 < imm <= size. */
5100 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
5101 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
5103 [(set_attr "type" "neon_shift_imm_long")]
5107 (define_insn "neon_v<shift_op><sup>_n<mode>"
5108 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5109 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5110 (match_operand:VDQIX 2 "s_register_operand" "w")
5111 (match_operand:SI 3 "immediate_operand" "i")]
5115 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5116 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5118 [(set_attr "type" "neon_shift_acc<q>")]
5121 (define_insn "neon_vsri_n<mode>"
5122 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5123 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5124 (match_operand:VDQIX 2 "s_register_operand" "w")
5125 (match_operand:SI 3 "immediate_operand" "i")]
5129 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5130 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5132 [(set_attr "type" "neon_shift_reg<q>")]
5135 (define_insn "neon_vsli_n<mode>"
5136 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5137 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5138 (match_operand:VDQIX 2 "s_register_operand" "w")
5139 (match_operand:SI 3 "immediate_operand" "i")]
5143 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
5144 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5146 [(set_attr "type" "neon_shift_reg<q>")]
5149 (define_insn "neon_vtbl1v8qi"
5150 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5151 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
5152 (match_operand:V8QI 2 "s_register_operand" "w")]
5155 "vtbl.8\t%P0, {%P1}, %P2"
5156 [(set_attr "type" "neon_tbl1")]
5159 (define_insn "neon_vtbl2v8qi"
5160 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5161 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
5162 (match_operand:V8QI 2 "s_register_operand" "w")]
5167 int tabbase = REGNO (operands[1]);
5169 ops[0] = operands[0];
5170 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5171 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5172 ops[3] = operands[2];
5173 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
5177 [(set_attr "type" "neon_tbl2")]
5180 (define_insn "neon_vtbl3v8qi"
5181 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5182 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
5183 (match_operand:V8QI 2 "s_register_operand" "w")]
5188 int tabbase = REGNO (operands[1]);
5190 ops[0] = operands[0];
5191 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5192 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5193 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5194 ops[4] = operands[2];
5195 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5199 [(set_attr "type" "neon_tbl3")]
5202 (define_insn "neon_vtbl4v8qi"
5203 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5204 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5205 (match_operand:V8QI 2 "s_register_operand" "w")]
5210 int tabbase = REGNO (operands[1]);
5212 ops[0] = operands[0];
5213 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5214 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5215 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5216 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5217 ops[5] = operands[2];
5218 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5222 [(set_attr "type" "neon_tbl4")]
5225 ;; These three are used by the vec_perm infrastructure for V16QImode.
5226 (define_insn_and_split "neon_vtbl1v16qi"
5227 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5228 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5229 (match_operand:V16QI 2 "s_register_operand" "w")]
5233 "&& reload_completed"
5236 rtx op0, op1, op2, part0, part2;
5240 op1 = gen_lowpart (TImode, operands[1]);
5243 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5244 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5245 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5246 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5248 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5249 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5250 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5251 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5254 [(set_attr "type" "multiple")]
5257 (define_insn_and_split "neon_vtbl2v16qi"
5258 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5259 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5260 (match_operand:V16QI 2 "s_register_operand" "w")]
5264 "&& reload_completed"
5267 rtx op0, op1, op2, part0, part2;
5274 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5275 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5276 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5277 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5279 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5280 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5281 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5282 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5285 [(set_attr "type" "multiple")]
5288 ;; ??? Logically we should extend the regular neon_vcombine pattern to
5289 ;; handle quad-word input modes, producing octa-word output modes. But
5290 ;; that requires us to add support for octa-word vector modes in moves.
5291 ;; That seems overkill for this one use in vec_perm.
5292 (define_insn_and_split "neon_vcombinev16qi"
5293 [(set (match_operand:OI 0 "s_register_operand" "=w")
5294 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5295 (match_operand:V16QI 2 "s_register_operand" "w")]
5299 "&& reload_completed"
5302 neon_split_vcombine (operands);
5305 [(set_attr "type" "multiple")]
5308 (define_insn "neon_vtbx1v8qi"
5309 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5310 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5311 (match_operand:V8QI 2 "s_register_operand" "w")
5312 (match_operand:V8QI 3 "s_register_operand" "w")]
5315 "vtbx.8\t%P0, {%P2}, %P3"
5316 [(set_attr "type" "neon_tbl1")]
5319 (define_insn "neon_vtbx2v8qi"
5320 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5321 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5322 (match_operand:TI 2 "s_register_operand" "w")
5323 (match_operand:V8QI 3 "s_register_operand" "w")]
5328 int tabbase = REGNO (operands[2]);
5330 ops[0] = operands[0];
5331 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5332 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5333 ops[3] = operands[3];
5334 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5338 [(set_attr "type" "neon_tbl2")]
5341 (define_insn "neon_vtbx3v8qi"
5342 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5343 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5344 (match_operand:EI 2 "s_register_operand" "w")
5345 (match_operand:V8QI 3 "s_register_operand" "w")]
5350 int tabbase = REGNO (operands[2]);
5352 ops[0] = operands[0];
5353 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5354 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5355 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5356 ops[4] = operands[3];
5357 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5361 [(set_attr "type" "neon_tbl3")]
5364 (define_insn "neon_vtbx4v8qi"
5365 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5366 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5367 (match_operand:OI 2 "s_register_operand" "w")
5368 (match_operand:V8QI 3 "s_register_operand" "w")]
5373 int tabbase = REGNO (operands[2]);
5375 ops[0] = operands[0];
5376 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5377 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5378 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5379 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5380 ops[5] = operands[3];
5381 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5385 [(set_attr "type" "neon_tbl4")]
5388 (define_expand "@neon_vtrn<mode>_internal"
5390 [(set (match_operand:VDQWH 0 "s_register_operand")
5391 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5392 (match_operand:VDQWH 2 "s_register_operand")]
5394 (set (match_operand:VDQWH 3 "s_register_operand")
5395 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5400 ;; Note: Different operand numbering to handle tied registers correctly.
5401 (define_insn "*neon_vtrn<mode>_insn"
5402 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5403 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5404 (match_operand:VDQWH 3 "s_register_operand" "2")]
5406 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5407 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5410 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5411 [(set_attr "type" "neon_permute<q>")]
5414 (define_expand "@neon_vzip<mode>_internal"
5416 [(set (match_operand:VDQWH 0 "s_register_operand")
5417 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5418 (match_operand:VDQWH 2 "s_register_operand")]
5420 (set (match_operand:VDQWH 3 "s_register_operand")
5421 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5426 ;; Note: Different operand numbering to handle tied registers correctly.
5427 (define_insn "*neon_vzip<mode>_insn"
5428 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5429 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5430 (match_operand:VDQWH 3 "s_register_operand" "2")]
5432 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5433 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5436 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5437 [(set_attr "type" "neon_zip<q>")]
5440 (define_expand "@neon_vuzp<mode>_internal"
5442 [(set (match_operand:VDQWH 0 "s_register_operand")
5443 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5444 (match_operand:VDQWH 2 "s_register_operand")]
5446 (set (match_operand:VDQWH 3 "s_register_operand" "")
5447 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5452 ;; Note: Different operand numbering to handle tied registers correctly.
5453 (define_insn "*neon_vuzp<mode>_insn"
5454 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5455 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5456 (match_operand:VDQWH 3 "s_register_operand" "2")]
5458 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5459 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5462 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5463 [(set_attr "type" "neon_zip<q>")]
5466 (define_expand "vec_load_lanes<mode><mode>"
5467 [(set (match_operand:VDQX 0 "s_register_operand")
5468 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5472 (define_insn "neon_vld1<mode>"
5473 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5474 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5477 "vld1.<V_sz_elem>\t%h0, %A1"
5478 [(set_attr "type" "neon_load1_1reg<q>")]
5481 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5482 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5484 (define_insn "neon_vld1_lane<mode>"
5485 [(set (match_operand:VDX 0 "s_register_operand" "=w")
5486 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5487 (match_operand:VDX 2 "s_register_operand" "0")
5488 (match_operand:SI 3 "immediate_operand" "i")]
5492 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5493 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5494 operands[3] = GEN_INT (lane);
5496 return "vld1.<V_sz_elem>\t%P0, %A1";
5498 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5500 [(set_attr "type" "neon_load1_one_lane<q>")]
5503 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5504 ;; here on big endian targets.
5505 (define_insn "neon_vld1_lane<mode>"
5506 [(set (match_operand:VQX 0 "s_register_operand" "=w")
5507 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5508 (match_operand:VQX 2 "s_register_operand" "0")
5509 (match_operand:SI 3 "immediate_operand" "i")]
5513 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5514 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5515 operands[3] = GEN_INT (lane);
5516 int regno = REGNO (operands[0]);
5517 if (lane >= max / 2)
5521 operands[3] = GEN_INT (lane);
5523 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5525 return "vld1.<V_sz_elem>\t%P0, %A1";
5527 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5529 [(set_attr "type" "neon_load1_one_lane<q>")]
5532 (define_insn "neon_vld1_dup<mode>"
5533 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5534 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5536 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5537 [(set_attr "type" "neon_load1_all_lanes<q>")]
5540 ;; Special case for DImode. Treat it exactly like a simple load.
5541 (define_expand "neon_vld1_dupdi"
5542 [(set (match_operand:DI 0 "s_register_operand" "")
5543 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
5549 (define_insn "neon_vld1_dup<mode>"
5550 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5551 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5554 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5556 [(set_attr "type" "neon_load1_all_lanes<q>")]
5559 (define_insn_and_split "neon_vld1_dupv2di"
5560 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5561 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5564 "&& reload_completed"
5567 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5568 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5569 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5572 [(set_attr "length" "8")
5573 (set_attr "type" "neon_load1_all_lanes_q")]
5576 (define_expand "vec_store_lanes<mode><mode>"
5577 [(set (match_operand:VDQX 0 "neon_struct_operand")
5578 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5582 (define_insn "neon_vst1<mode>"
5583 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5584 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5587 "vst1.<V_sz_elem>\t%h1, %A0"
5588 [(set_attr "type" "neon_store1_1reg<q>")])
5590 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5591 ;; here on big endian targets.
5592 (define_insn "neon_vst1_lane<mode>"
5593 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5595 [(match_operand:VDX 1 "s_register_operand" "w")
5596 (match_operand:SI 2 "immediate_operand" "i")]
5600 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5601 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5602 operands[2] = GEN_INT (lane);
5604 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5606 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5608 [(set_attr "type" "neon_store1_one_lane<q>")]
5611 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5612 ;; here on big endian targets.
5613 (define_insn "neon_vst1_lane<mode>"
5614 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5616 [(match_operand:VQX 1 "s_register_operand" "w")
5617 (match_operand:SI 2 "immediate_operand" "i")]
5621 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5622 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5623 int regno = REGNO (operands[1]);
5624 if (lane >= max / 2)
5629 operands[2] = GEN_INT (lane);
5630 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5632 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5634 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5636 [(set_attr "type" "neon_store1_one_lane<q>")]
5639 (define_expand "vec_load_lanesti<mode>"
5640 [(set (match_operand:TI 0 "s_register_operand")
5641 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5642 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5646 (define_insn "neon_vld2<mode>"
5647 [(set (match_operand:TI 0 "s_register_operand" "=w")
5648 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5649 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5653 if (<V_sz_elem> == 64)
5654 return "vld1.64\t%h0, %A1";
5656 return "vld2.<V_sz_elem>\t%h0, %A1";
5659 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5660 (const_string "neon_load1_2reg<q>")
5661 (const_string "neon_load2_2reg<q>")))]
5664 (define_expand "vec_load_lanesoi<mode>"
5665 [(set (match_operand:OI 0 "s_register_operand")
5666 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5667 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5671 (define_insn "neon_vld2<mode>"
5672 [(set (match_operand:OI 0 "s_register_operand" "=w")
5673 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5674 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5677 "vld2.<V_sz_elem>\t%h0, %A1"
5678 [(set_attr "type" "neon_load2_2reg_q")])
5680 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5681 ;; here on big endian targets.
5682 (define_insn "neon_vld2_lane<mode>"
5683 [(set (match_operand:TI 0 "s_register_operand" "=w")
5684 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5685 (match_operand:TI 2 "s_register_operand" "0")
5686 (match_operand:SI 3 "immediate_operand" "i")
5687 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5691 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5692 int regno = REGNO (operands[0]);
5694 ops[0] = gen_rtx_REG (DImode, regno);
5695 ops[1] = gen_rtx_REG (DImode, regno + 2);
5696 ops[2] = operands[1];
5697 ops[3] = GEN_INT (lane);
5698 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5701 [(set_attr "type" "neon_load2_one_lane<q>")]
5704 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5705 ;; here on big endian targets.
5706 (define_insn "neon_vld2_lane<mode>"
5707 [(set (match_operand:OI 0 "s_register_operand" "=w")
5708 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5709 (match_operand:OI 2 "s_register_operand" "0")
5710 (match_operand:SI 3 "immediate_operand" "i")
5711 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5715 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5716 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5717 int regno = REGNO (operands[0]);
5719 if (lane >= max / 2)
5724 ops[0] = gen_rtx_REG (DImode, regno);
5725 ops[1] = gen_rtx_REG (DImode, regno + 4);
5726 ops[2] = operands[1];
5727 ops[3] = GEN_INT (lane);
5728 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5731 [(set_attr "type" "neon_load2_one_lane<q>")]
5734 (define_insn "neon_vld2_dup<mode>"
5735 [(set (match_operand:TI 0 "s_register_operand" "=w")
5736 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5737 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5741 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5742 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5744 return "vld1.<V_sz_elem>\t%h0, %A1";
5747 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5748 (const_string "neon_load2_all_lanes<q>")
5749 (const_string "neon_load1_1reg<q>")))]
5752 (define_expand "vec_store_lanesti<mode>"
5753 [(set (match_operand:TI 0 "neon_struct_operand")
5754 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5755 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5759 (define_insn "neon_vst2<mode>"
5760 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5761 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5762 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5766 if (<V_sz_elem> == 64)
5767 return "vst1.64\t%h1, %A0";
5769 return "vst2.<V_sz_elem>\t%h1, %A0";
5772 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5773 (const_string "neon_store1_2reg<q>")
5774 (const_string "neon_store2_one_lane<q>")))]
5777 (define_expand "vec_store_lanesoi<mode>"
5778 [(set (match_operand:OI 0 "neon_struct_operand")
5779 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5780 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5784 (define_insn "neon_vst2<mode>"
5785 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5786 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5787 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5790 "vst2.<V_sz_elem>\t%h1, %A0"
5791 [(set_attr "type" "neon_store2_4reg<q>")]
5794 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5795 ;; here on big endian targets.
5796 (define_insn "neon_vst2_lane<mode>"
5797 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5798 (unspec:<V_two_elem>
5799 [(match_operand:TI 1 "s_register_operand" "w")
5800 (match_operand:SI 2 "immediate_operand" "i")
5801 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5805 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5806 int regno = REGNO (operands[1]);
5808 ops[0] = operands[0];
5809 ops[1] = gen_rtx_REG (DImode, regno);
5810 ops[2] = gen_rtx_REG (DImode, regno + 2);
5811 ops[3] = GEN_INT (lane);
5812 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5815 [(set_attr "type" "neon_store2_one_lane<q>")]
5818 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5819 ;; here on big endian targets.
5820 (define_insn "neon_vst2_lane<mode>"
5821 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5822 (unspec:<V_two_elem>
5823 [(match_operand:OI 1 "s_register_operand" "w")
5824 (match_operand:SI 2 "immediate_operand" "i")
5825 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5829 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5830 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5831 int regno = REGNO (operands[1]);
5833 if (lane >= max / 2)
5838 ops[0] = operands[0];
5839 ops[1] = gen_rtx_REG (DImode, regno);
5840 ops[2] = gen_rtx_REG (DImode, regno + 4);
5841 ops[3] = GEN_INT (lane);
5842 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5845 [(set_attr "type" "neon_store2_one_lane<q>")]
5848 (define_expand "vec_load_lanesei<mode>"
5849 [(set (match_operand:EI 0 "s_register_operand")
5850 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5851 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5855 (define_insn "neon_vld3<mode>"
5856 [(set (match_operand:EI 0 "s_register_operand" "=w")
5857 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5858 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5862 if (<V_sz_elem> == 64)
5863 return "vld1.64\t%h0, %A1";
5865 return "vld3.<V_sz_elem>\t%h0, %A1";
5868 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5869 (const_string "neon_load1_3reg<q>")
5870 (const_string "neon_load3_3reg<q>")))]
5873 (define_expand "vec_load_lanesci<mode>"
5874 [(match_operand:CI 0 "s_register_operand")
5875 (match_operand:CI 1 "neon_struct_operand")
5876 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5879 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5883 (define_expand "neon_vld3<mode>"
5884 [(match_operand:CI 0 "s_register_operand")
5885 (match_operand:CI 1 "neon_struct_operand")
5886 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5891 mem = adjust_address (operands[1], EImode, 0);
5892 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5893 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5894 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5898 (define_insn "neon_vld3qa<mode>"
5899 [(set (match_operand:CI 0 "s_register_operand" "=w")
5900 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5901 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5905 int regno = REGNO (operands[0]);
5907 ops[0] = gen_rtx_REG (DImode, regno);
5908 ops[1] = gen_rtx_REG (DImode, regno + 4);
5909 ops[2] = gen_rtx_REG (DImode, regno + 8);
5910 ops[3] = operands[1];
5911 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5914 [(set_attr "type" "neon_load3_3reg<q>")]
5917 (define_insn "neon_vld3qb<mode>"
5918 [(set (match_operand:CI 0 "s_register_operand" "=w")
5919 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5920 (match_operand:CI 2 "s_register_operand" "0")
5921 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5925 int regno = REGNO (operands[0]);
5927 ops[0] = gen_rtx_REG (DImode, regno + 2);
5928 ops[1] = gen_rtx_REG (DImode, regno + 6);
5929 ops[2] = gen_rtx_REG (DImode, regno + 10);
5930 ops[3] = operands[1];
5931 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5934 [(set_attr "type" "neon_load3_3reg<q>")]
5937 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5938 ;; here on big endian targets.
5939 (define_insn "neon_vld3_lane<mode>"
5940 [(set (match_operand:EI 0 "s_register_operand" "=w")
5941 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5942 (match_operand:EI 2 "s_register_operand" "0")
5943 (match_operand:SI 3 "immediate_operand" "i")
5944 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5948 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5949 int regno = REGNO (operands[0]);
5951 ops[0] = gen_rtx_REG (DImode, regno);
5952 ops[1] = gen_rtx_REG (DImode, regno + 2);
5953 ops[2] = gen_rtx_REG (DImode, regno + 4);
5954 ops[3] = operands[1];
5955 ops[4] = GEN_INT (lane);
5956 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5960 [(set_attr "type" "neon_load3_one_lane<q>")]
5963 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5964 ;; here on big endian targets.
5965 (define_insn "neon_vld3_lane<mode>"
5966 [(set (match_operand:CI 0 "s_register_operand" "=w")
5967 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5968 (match_operand:CI 2 "s_register_operand" "0")
5969 (match_operand:SI 3 "immediate_operand" "i")
5970 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5974 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5975 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5976 int regno = REGNO (operands[0]);
5978 if (lane >= max / 2)
5983 ops[0] = gen_rtx_REG (DImode, regno);
5984 ops[1] = gen_rtx_REG (DImode, regno + 4);
5985 ops[2] = gen_rtx_REG (DImode, regno + 8);
5986 ops[3] = operands[1];
5987 ops[4] = GEN_INT (lane);
5988 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5992 [(set_attr "type" "neon_load3_one_lane<q>")]
5995 (define_insn "neon_vld3_dup<mode>"
5996 [(set (match_operand:EI 0 "s_register_operand" "=w")
5997 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5998 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6002 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6004 int regno = REGNO (operands[0]);
6006 ops[0] = gen_rtx_REG (DImode, regno);
6007 ops[1] = gen_rtx_REG (DImode, regno + 2);
6008 ops[2] = gen_rtx_REG (DImode, regno + 4);
6009 ops[3] = operands[1];
6010 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
6014 return "vld1.<V_sz_elem>\t%h0, %A1";
6017 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6018 (const_string "neon_load3_all_lanes<q>")
6019 (const_string "neon_load1_1reg<q>")))])
6021 (define_expand "vec_store_lanesei<mode>"
6022 [(set (match_operand:EI 0 "neon_struct_operand")
6023 (unspec:EI [(match_operand:EI 1 "s_register_operand")
6024 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6028 (define_insn "neon_vst3<mode>"
6029 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
6030 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
6031 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6035 if (<V_sz_elem> == 64)
6036 return "vst1.64\t%h1, %A0";
6038 return "vst3.<V_sz_elem>\t%h1, %A0";
6041 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6042 (const_string "neon_store1_3reg<q>")
6043 (const_string "neon_store3_one_lane<q>")))])
6045 (define_expand "vec_store_lanesci<mode>"
6046 [(match_operand:CI 0 "neon_struct_operand")
6047 (match_operand:CI 1 "s_register_operand")
6048 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6051 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
6055 (define_expand "neon_vst3<mode>"
6056 [(match_operand:CI 0 "neon_struct_operand")
6057 (match_operand:CI 1 "s_register_operand")
6058 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6063 mem = adjust_address (operands[0], EImode, 0);
6064 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
6065 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
6066 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
6070 (define_insn "neon_vst3qa<mode>"
6071 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
6072 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
6073 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6077 int regno = REGNO (operands[1]);
6079 ops[0] = operands[0];
6080 ops[1] = gen_rtx_REG (DImode, regno);
6081 ops[2] = gen_rtx_REG (DImode, regno + 4);
6082 ops[3] = gen_rtx_REG (DImode, regno + 8);
6083 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6086 [(set_attr "type" "neon_store3_3reg<q>")]
6089 (define_insn "neon_vst3qb<mode>"
6090 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
6091 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
6092 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6096 int regno = REGNO (operands[1]);
6098 ops[0] = operands[0];
6099 ops[1] = gen_rtx_REG (DImode, regno + 2);
6100 ops[2] = gen_rtx_REG (DImode, regno + 6);
6101 ops[3] = gen_rtx_REG (DImode, regno + 10);
6102 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
6105 [(set_attr "type" "neon_store3_3reg<q>")]
6108 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6109 ;; here on big endian targets.
6110 (define_insn "neon_vst3_lane<mode>"
6111 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6112 (unspec:<V_three_elem>
6113 [(match_operand:EI 1 "s_register_operand" "w")
6114 (match_operand:SI 2 "immediate_operand" "i")
6115 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6119 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6120 int regno = REGNO (operands[1]);
6122 ops[0] = operands[0];
6123 ops[1] = gen_rtx_REG (DImode, regno);
6124 ops[2] = gen_rtx_REG (DImode, regno + 2);
6125 ops[3] = gen_rtx_REG (DImode, regno + 4);
6126 ops[4] = GEN_INT (lane);
6127 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6131 [(set_attr "type" "neon_store3_one_lane<q>")]
6134 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6135 ;; here on big endian targets.
6136 (define_insn "neon_vst3_lane<mode>"
6137 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6138 (unspec:<V_three_elem>
6139 [(match_operand:CI 1 "s_register_operand" "w")
6140 (match_operand:SI 2 "immediate_operand" "i")
6141 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6145 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6146 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6147 int regno = REGNO (operands[1]);
6149 if (lane >= max / 2)
6154 ops[0] = operands[0];
6155 ops[1] = gen_rtx_REG (DImode, regno);
6156 ops[2] = gen_rtx_REG (DImode, regno + 4);
6157 ops[3] = gen_rtx_REG (DImode, regno + 8);
6158 ops[4] = GEN_INT (lane);
6159 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6163 [(set_attr "type" "neon_store3_one_lane<q>")]
6166 (define_expand "vec_load_lanesoi<mode>"
6167 [(set (match_operand:OI 0 "s_register_operand")
6168 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
6169 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6173 (define_insn "neon_vld4<mode>"
6174 [(set (match_operand:OI 0 "s_register_operand" "=w")
6175 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
6176 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6180 if (<V_sz_elem> == 64)
6181 return "vld1.64\t%h0, %A1";
6183 return "vld4.<V_sz_elem>\t%h0, %A1";
6186 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6187 (const_string "neon_load1_4reg<q>")
6188 (const_string "neon_load4_4reg<q>")))]
6191 (define_expand "vec_load_lanesxi<mode>"
6192 [(match_operand:XI 0 "s_register_operand")
6193 (match_operand:XI 1 "neon_struct_operand")
6194 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6197 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6201 (define_expand "neon_vld4<mode>"
6202 [(match_operand:XI 0 "s_register_operand")
6203 (match_operand:XI 1 "neon_struct_operand")
6204 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6209 mem = adjust_address (operands[1], OImode, 0);
6210 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6211 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6212 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6216 (define_insn "neon_vld4qa<mode>"
6217 [(set (match_operand:XI 0 "s_register_operand" "=w")
6218 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6219 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6223 int regno = REGNO (operands[0]);
6225 ops[0] = gen_rtx_REG (DImode, regno);
6226 ops[1] = gen_rtx_REG (DImode, regno + 4);
6227 ops[2] = gen_rtx_REG (DImode, regno + 8);
6228 ops[3] = gen_rtx_REG (DImode, regno + 12);
6229 ops[4] = operands[1];
6230 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6233 [(set_attr "type" "neon_load4_4reg<q>")]
6236 (define_insn "neon_vld4qb<mode>"
6237 [(set (match_operand:XI 0 "s_register_operand" "=w")
6238 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6239 (match_operand:XI 2 "s_register_operand" "0")
6240 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6244 int regno = REGNO (operands[0]);
6246 ops[0] = gen_rtx_REG (DImode, regno + 2);
6247 ops[1] = gen_rtx_REG (DImode, regno + 6);
6248 ops[2] = gen_rtx_REG (DImode, regno + 10);
6249 ops[3] = gen_rtx_REG (DImode, regno + 14);
6250 ops[4] = operands[1];
6251 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6254 [(set_attr "type" "neon_load4_4reg<q>")]
6257 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6258 ;; here on big endian targets.
6259 (define_insn "neon_vld4_lane<mode>"
6260 [(set (match_operand:OI 0 "s_register_operand" "=w")
6261 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6262 (match_operand:OI 2 "s_register_operand" "0")
6263 (match_operand:SI 3 "immediate_operand" "i")
6264 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6268 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6269 int regno = REGNO (operands[0]);
6271 ops[0] = gen_rtx_REG (DImode, regno);
6272 ops[1] = gen_rtx_REG (DImode, regno + 2);
6273 ops[2] = gen_rtx_REG (DImode, regno + 4);
6274 ops[3] = gen_rtx_REG (DImode, regno + 6);
6275 ops[4] = operands[1];
6276 ops[5] = GEN_INT (lane);
6277 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6281 [(set_attr "type" "neon_load4_one_lane<q>")]
6284 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6285 ;; here on big endian targets.
6286 (define_insn "neon_vld4_lane<mode>"
6287 [(set (match_operand:XI 0 "s_register_operand" "=w")
6288 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6289 (match_operand:XI 2 "s_register_operand" "0")
6290 (match_operand:SI 3 "immediate_operand" "i")
6291 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6295 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6296 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6297 int regno = REGNO (operands[0]);
6299 if (lane >= max / 2)
6304 ops[0] = gen_rtx_REG (DImode, regno);
6305 ops[1] = gen_rtx_REG (DImode, regno + 4);
6306 ops[2] = gen_rtx_REG (DImode, regno + 8);
6307 ops[3] = gen_rtx_REG (DImode, regno + 12);
6308 ops[4] = operands[1];
6309 ops[5] = GEN_INT (lane);
6310 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6314 [(set_attr "type" "neon_load4_one_lane<q>")]
6317 (define_insn "neon_vld4_dup<mode>"
6318 [(set (match_operand:OI 0 "s_register_operand" "=w")
6319 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6320 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6324 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6326 int regno = REGNO (operands[0]);
6328 ops[0] = gen_rtx_REG (DImode, regno);
6329 ops[1] = gen_rtx_REG (DImode, regno + 2);
6330 ops[2] = gen_rtx_REG (DImode, regno + 4);
6331 ops[3] = gen_rtx_REG (DImode, regno + 6);
6332 ops[4] = operands[1];
6333 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6338 return "vld1.<V_sz_elem>\t%h0, %A1";
6341 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6342 (const_string "neon_load4_all_lanes<q>")
6343 (const_string "neon_load1_1reg<q>")))]
6346 (define_expand "vec_store_lanesoi<mode>"
6347 [(set (match_operand:OI 0 "neon_struct_operand")
6348 (unspec:OI [(match_operand:OI 1 "s_register_operand")
6349 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6353 (define_insn "neon_vst4<mode>"
6354 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6355 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6356 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6360 if (<V_sz_elem> == 64)
6361 return "vst1.64\t%h1, %A0";
6363 return "vst4.<V_sz_elem>\t%h1, %A0";
6366 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6367 (const_string "neon_store1_4reg<q>")
6368 (const_string "neon_store4_4reg<q>")))]
6371 (define_expand "vec_store_lanesxi<mode>"
6372 [(match_operand:XI 0 "neon_struct_operand")
6373 (match_operand:XI 1 "s_register_operand")
6374 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6377 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6381 (define_expand "neon_vst4<mode>"
6382 [(match_operand:XI 0 "neon_struct_operand")
6383 (match_operand:XI 1 "s_register_operand")
6384 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6389 mem = adjust_address (operands[0], OImode, 0);
6390 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6391 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6392 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6396 (define_insn "neon_vst4qa<mode>"
6397 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6398 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6399 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6403 int regno = REGNO (operands[1]);
6405 ops[0] = operands[0];
6406 ops[1] = gen_rtx_REG (DImode, regno);
6407 ops[2] = gen_rtx_REG (DImode, regno + 4);
6408 ops[3] = gen_rtx_REG (DImode, regno + 8);
6409 ops[4] = gen_rtx_REG (DImode, regno + 12);
6410 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6413 [(set_attr "type" "neon_store4_4reg<q>")]
6416 (define_insn "neon_vst4qb<mode>"
6417 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6418 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6419 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6423 int regno = REGNO (operands[1]);
6425 ops[0] = operands[0];
6426 ops[1] = gen_rtx_REG (DImode, regno + 2);
6427 ops[2] = gen_rtx_REG (DImode, regno + 6);
6428 ops[3] = gen_rtx_REG (DImode, regno + 10);
6429 ops[4] = gen_rtx_REG (DImode, regno + 14);
6430 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6433 [(set_attr "type" "neon_store4_4reg<q>")]
6436 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6437 ;; here on big endian targets.
6438 (define_insn "neon_vst4_lane<mode>"
6439 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6440 (unspec:<V_four_elem>
6441 [(match_operand:OI 1 "s_register_operand" "w")
6442 (match_operand:SI 2 "immediate_operand" "i")
6443 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6447 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6448 int regno = REGNO (operands[1]);
6450 ops[0] = operands[0];
6451 ops[1] = gen_rtx_REG (DImode, regno);
6452 ops[2] = gen_rtx_REG (DImode, regno + 2);
6453 ops[3] = gen_rtx_REG (DImode, regno + 4);
6454 ops[4] = gen_rtx_REG (DImode, regno + 6);
6455 ops[5] = GEN_INT (lane);
6456 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6460 [(set_attr "type" "neon_store4_one_lane<q>")]
6463 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6464 ;; here on big endian targets.
6465 (define_insn "neon_vst4_lane<mode>"
6466 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6467 (unspec:<V_four_elem>
6468 [(match_operand:XI 1 "s_register_operand" "w")
6469 (match_operand:SI 2 "immediate_operand" "i")
6470 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6474 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6475 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6476 int regno = REGNO (operands[1]);
6478 if (lane >= max / 2)
6483 ops[0] = operands[0];
6484 ops[1] = gen_rtx_REG (DImode, regno);
6485 ops[2] = gen_rtx_REG (DImode, regno + 4);
6486 ops[3] = gen_rtx_REG (DImode, regno + 8);
6487 ops[4] = gen_rtx_REG (DImode, regno + 12);
6488 ops[5] = GEN_INT (lane);
6489 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6493 [(set_attr "type" "neon_store4_4reg<q>")]
6496 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6497 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6498 (SE:<V_unpack> (vec_select:<V_HALF>
6499 (match_operand:VU 1 "register_operand" "w")
6500 (match_operand:VU 2 "vect_par_constant_low" ""))))]
6501 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6502 "vmovl.<US><V_sz_elem> %q0, %e1"
6503 [(set_attr "type" "neon_shift_imm_long")]
6506 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6507 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6508 (SE:<V_unpack> (vec_select:<V_HALF>
6509 (match_operand:VU 1 "register_operand" "w")
6510 (match_operand:VU 2 "vect_par_constant_high" ""))))]
6511 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6512 "vmovl.<US><V_sz_elem> %q0, %f1"
6513 [(set_attr "type" "neon_shift_imm_long")]
6516 (define_expand "vec_unpack<US>_hi_<mode>"
6517 [(match_operand:<V_unpack> 0 "register_operand" "")
6518 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6519 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6521 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6524 for (i = 0; i < (<V_mode_nunits>/2); i++)
6525 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6527 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6528 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6535 (define_expand "vec_unpack<US>_lo_<mode>"
6536 [(match_operand:<V_unpack> 0 "register_operand" "")
6537 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
6538 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6540 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6543 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6544 RTVEC_ELT (v, i) = GEN_INT (i);
6545 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6546 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6553 (define_insn "neon_vec_<US>mult_lo_<mode>"
6554 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6555 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6556 (match_operand:VU 1 "register_operand" "w")
6557 (match_operand:VU 2 "vect_par_constant_low" "")))
6558 (SE:<V_unpack> (vec_select:<V_HALF>
6559 (match_operand:VU 3 "register_operand" "w")
6561 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6562 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6563 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6566 (define_expand "vec_widen_<US>mult_lo_<mode>"
6567 [(match_operand:<V_unpack> 0 "register_operand" "")
6568 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6569 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6570 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6572 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6575 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6576 RTVEC_ELT (v, i) = GEN_INT (i);
6577 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6579 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6587 (define_insn "neon_vec_<US>mult_hi_<mode>"
6588 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6589 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6590 (match_operand:VU 1 "register_operand" "w")
6591 (match_operand:VU 2 "vect_par_constant_high" "")))
6592 (SE:<V_unpack> (vec_select:<V_HALF>
6593 (match_operand:VU 3 "register_operand" "w")
6595 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6596 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6597 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6600 (define_expand "vec_widen_<US>mult_hi_<mode>"
6601 [(match_operand:<V_unpack> 0 "register_operand" "")
6602 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6603 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6604 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6606 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6609 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6610 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6611 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6613 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6622 (define_insn "neon_vec_<US>shiftl_<mode>"
6623 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6624 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6625 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6628 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6630 [(set_attr "type" "neon_shift_imm_long")]
6633 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6634 [(match_operand:<V_unpack> 0 "register_operand" "")
6635 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6636 (match_operand:SI 2 "immediate_operand" "i")]
6637 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6639 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6640 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6646 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6647 [(match_operand:<V_unpack> 0 "register_operand" "")
6648 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6649 (match_operand:SI 2 "immediate_operand" "i")]
6650 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6652 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6653 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6654 GET_MODE_SIZE (<V_HALF>mode)),
6660 ;; Vectorize for non-neon-quad case
6661 (define_insn "neon_unpack<US>_<mode>"
6662 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6663 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6665 "vmovl.<US><V_sz_elem> %q0, %P1"
6666 [(set_attr "type" "neon_move")]
6669 (define_expand "vec_unpack<US>_lo_<mode>"
6670 [(match_operand:<V_double_width> 0 "register_operand" "")
6671 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6674 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6675 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6676 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6682 (define_expand "vec_unpack<US>_hi_<mode>"
6683 [(match_operand:<V_double_width> 0 "register_operand" "")
6684 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6687 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6688 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6689 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6695 (define_insn "neon_vec_<US>mult_<mode>"
6696 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6697 (mult:<V_widen> (SE:<V_widen>
6698 (match_operand:VDI 1 "register_operand" "w"))
6700 (match_operand:VDI 2 "register_operand" "w"))))]
6702 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6703 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6706 (define_expand "vec_widen_<US>mult_hi_<mode>"
6707 [(match_operand:<V_double_width> 0 "register_operand" "")
6708 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6709 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6712 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6713 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6714 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6721 (define_expand "vec_widen_<US>mult_lo_<mode>"
6722 [(match_operand:<V_double_width> 0 "register_operand" "")
6723 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6724 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6727 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6728 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6729 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6736 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6737 [(match_operand:<V_double_width> 0 "register_operand" "")
6738 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6739 (match_operand:SI 2 "immediate_operand" "i")]
6742 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6743 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6744 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6750 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6751 [(match_operand:<V_double_width> 0 "register_operand" "")
6752 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6753 (match_operand:SI 2 "immediate_operand" "i")]
6756 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6757 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6758 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6764 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6765 ; because the ordering of vector elements in Q registers is different from what
6766 ; the semantics of the instructions require.
6768 (define_insn "vec_pack_trunc_<mode>"
6769 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6770 (vec_concat:<V_narrow_pack>
6771 (truncate:<V_narrow>
6772 (match_operand:VN 1 "register_operand" "w"))
6773 (truncate:<V_narrow>
6774 (match_operand:VN 2 "register_operand" "w"))))]
6775 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6776 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6777 [(set_attr "type" "multiple")
6778 (set_attr "length" "8")]
6781 ;; For the non-quad case.
6782 (define_insn "neon_vec_pack_trunc_<mode>"
6783 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6784 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6785 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6786 "vmovn.i<V_sz_elem>\t%P0, %q1"
6787 [(set_attr "type" "neon_move_narrow_q")]
6790 (define_expand "vec_pack_trunc_<mode>"
6791 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6792 (match_operand:VSHFT 1 "register_operand" "")
6793 (match_operand:VSHFT 2 "register_operand")]
6794 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6796 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6798 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6799 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6800 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6804 (define_insn "neon_vabd<mode>_2"
6805 [(set (match_operand:VF 0 "s_register_operand" "=w")
6806 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6807 (match_operand:VF 2 "s_register_operand" "w"))))]
6808 "TARGET_NEON && flag_unsafe_math_optimizations"
6809 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6810 [(set_attr "type" "neon_fp_abd_s<q>")]
6813 (define_insn "neon_vabd<mode>_3"
6814 [(set (match_operand:VF 0 "s_register_operand" "=w")
6815 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6816 (match_operand:VF 2 "s_register_operand" "w")]
6818 "TARGET_NEON && flag_unsafe_math_optimizations"
6819 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6820 [(set_attr "type" "neon_fp_abd_s<q>")]
6823 ;; Copy from core-to-neon regs, then extend, not vice-versa
6826 [(set (match_operand:DI 0 "s_register_operand" "")
6827 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6828 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6829 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6830 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6832 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6836 [(set (match_operand:DI 0 "s_register_operand" "")
6837 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6838 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6839 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6840 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6842 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6846 [(set (match_operand:DI 0 "s_register_operand" "")
6847 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6848 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6849 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6850 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6852 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6856 [(set (match_operand:DI 0 "s_register_operand" "")
6857 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6858 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6859 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6860 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6862 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6866 [(set (match_operand:DI 0 "s_register_operand" "")
6867 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6868 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6869 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6870 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6872 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6876 [(set (match_operand:DI 0 "s_register_operand" "")
6877 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6878 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6879 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6880 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6882 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));