1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_expand "movv4hf"
141 [(set (match_operand:V4HF 0 "s_register_operand")
142 (match_operand:V4HF 1 "s_register_operand"))]
143 "TARGET_NEON && TARGET_FP16"
145 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
146 causing an ICE on big-endian because it cannot extract subregs in
148 if (can_create_pseudo_p ())
150 if (!REG_P (operands[0]))
151 operands[1] = force_reg (V4HFmode, operands[1]);
155 (define_expand "movv8hf"
156 [(set (match_operand:V8HF 0 "")
157 (match_operand:V8HF 1 ""))]
158 "TARGET_NEON && TARGET_FP16"
160 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
161 causing an ICE on big-endian because it cannot extract subregs in
163 if (can_create_pseudo_p ())
165 if (!REG_P (operands[0]))
166 operands[1] = force_reg (V8HFmode, operands[1]);
170 (define_insn "*neon_mov<mode>"
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
172 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
177 switch (which_alternative)
180 case 1: case 2: return output_move_neon (operands);
181 default: gcc_unreachable ();
184 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
188 [(set (match_operand:EI 0 "s_register_operand" "")
189 (match_operand:EI 1 "s_register_operand" ""))]
190 "TARGET_NEON && reload_completed"
191 [(set (match_dup 0) (match_dup 1))
192 (set (match_dup 2) (match_dup 3))]
194 int rdest = REGNO (operands[0]);
195 int rsrc = REGNO (operands[1]);
198 dest[0] = gen_rtx_REG (TImode, rdest);
199 src[0] = gen_rtx_REG (TImode, rsrc);
200 dest[1] = gen_rtx_REG (DImode, rdest + 4);
201 src[1] = gen_rtx_REG (DImode, rsrc + 4);
203 neon_disambiguate_copy (operands, dest, src, 2);
207 [(set (match_operand:OI 0 "s_register_operand" "")
208 (match_operand:OI 1 "s_register_operand" ""))]
209 "TARGET_NEON && reload_completed"
210 [(set (match_dup 0) (match_dup 1))
211 (set (match_dup 2) (match_dup 3))]
213 int rdest = REGNO (operands[0]);
214 int rsrc = REGNO (operands[1]);
217 dest[0] = gen_rtx_REG (TImode, rdest);
218 src[0] = gen_rtx_REG (TImode, rsrc);
219 dest[1] = gen_rtx_REG (TImode, rdest + 4);
220 src[1] = gen_rtx_REG (TImode, rsrc + 4);
222 neon_disambiguate_copy (operands, dest, src, 2);
226 [(set (match_operand:CI 0 "s_register_operand" "")
227 (match_operand:CI 1 "s_register_operand" ""))]
228 "TARGET_NEON && reload_completed"
229 [(set (match_dup 0) (match_dup 1))
230 (set (match_dup 2) (match_dup 3))
231 (set (match_dup 4) (match_dup 5))]
233 int rdest = REGNO (operands[0]);
234 int rsrc = REGNO (operands[1]);
237 dest[0] = gen_rtx_REG (TImode, rdest);
238 src[0] = gen_rtx_REG (TImode, rsrc);
239 dest[1] = gen_rtx_REG (TImode, rdest + 4);
240 src[1] = gen_rtx_REG (TImode, rsrc + 4);
241 dest[2] = gen_rtx_REG (TImode, rdest + 8);
242 src[2] = gen_rtx_REG (TImode, rsrc + 8);
244 neon_disambiguate_copy (operands, dest, src, 3);
248 [(set (match_operand:XI 0 "s_register_operand" "")
249 (match_operand:XI 1 "s_register_operand" ""))]
250 "TARGET_NEON && reload_completed"
251 [(set (match_dup 0) (match_dup 1))
252 (set (match_dup 2) (match_dup 3))
253 (set (match_dup 4) (match_dup 5))
254 (set (match_dup 6) (match_dup 7))]
256 int rdest = REGNO (operands[0]);
257 int rsrc = REGNO (operands[1]);
260 dest[0] = gen_rtx_REG (TImode, rdest);
261 src[0] = gen_rtx_REG (TImode, rsrc);
262 dest[1] = gen_rtx_REG (TImode, rdest + 4);
263 src[1] = gen_rtx_REG (TImode, rsrc + 4);
264 dest[2] = gen_rtx_REG (TImode, rdest + 8);
265 src[2] = gen_rtx_REG (TImode, rsrc + 8);
266 dest[3] = gen_rtx_REG (TImode, rdest + 12);
267 src[3] = gen_rtx_REG (TImode, rsrc + 12);
269 neon_disambiguate_copy (operands, dest, src, 4);
272 (define_expand "movmisalign<mode>"
273 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275 UNSPEC_MISALIGNED_ACCESS))]
276 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
279 /* This pattern is not permitted to fail during expansion: if both arguments
280 are non-registers (e.g. memory := constant, which can be created by the
281 auto-vectorizer), force operand 1 into a register. */
282 if (!s_register_operand (operands[0], <MODE>mode)
283 && !s_register_operand (operands[1], <MODE>mode))
284 operands[1] = force_reg (<MODE>mode, operands[1]);
286 if (s_register_operand (operands[0], <MODE>mode))
287 adjust_mem = operands[1];
289 adjust_mem = operands[0];
291 /* Legitimize address. */
292 if (!neon_vector_mem_operand (adjust_mem, 2, true))
293 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
297 (define_insn "*movmisalign<mode>_neon_store"
298 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
299 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vst1.<V_sz_elem>\t{%P1}, %A0"
303 [(set_attr "type" "neon_store1_1reg<q>")])
305 (define_insn "*movmisalign<mode>_neon_load"
306 [(set (match_operand:VDX 0 "s_register_operand" "=w")
307 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
309 UNSPEC_MISALIGNED_ACCESS))]
310 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311 "vld1.<V_sz_elem>\t{%P0}, %A1"
312 [(set_attr "type" "neon_load1_1reg<q>")])
314 (define_insn "*movmisalign<mode>_neon_store"
315 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
316 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vst1.<V_sz_elem>\t{%q1}, %A0"
320 [(set_attr "type" "neon_store1_1reg<q>")])
322 (define_insn "*movmisalign<mode>_neon_load"
323 [(set (match_operand:VQX 0 "s_register_operand" "=w")
324 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
326 UNSPEC_MISALIGNED_ACCESS))]
327 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328 "vld1.<V_sz_elem>\t{%q0}, %A1"
329 [(set_attr "type" "neon_load1_1reg<q>")])
331 (define_insn "vec_set<mode>_internal"
332 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
334 (vec_duplicate:VD_LANE
335 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337 (match_operand:SI 2 "immediate_operand" "i,i")))]
340 int elt = ffs ((int) INTVAL (operands[2])) - 1;
341 if (BYTES_BIG_ENDIAN)
342 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343 operands[2] = GEN_INT (elt);
345 if (which_alternative == 0)
346 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
348 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
350 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
352 (define_insn "vec_set<mode>_internal"
353 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
356 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357 (match_operand:VQ2 3 "s_register_operand" "0,0")
358 (match_operand:SI 2 "immediate_operand" "i,i")))]
361 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363 int elt = elem % half_elts;
364 int hi = (elem / half_elts) * 2;
365 int regno = REGNO (operands[0]);
367 if (BYTES_BIG_ENDIAN)
368 elt = half_elts - 1 - elt;
370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371 operands[2] = GEN_INT (elt);
373 if (which_alternative == 0)
374 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
376 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
378 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
381 (define_insn "vec_setv2di_internal"
382 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
385 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386 (match_operand:V2DI 3 "s_register_operand" "0,0")
387 (match_operand:SI 2 "immediate_operand" "i,i")))]
390 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391 int regno = REGNO (operands[0]) + 2 * elem;
393 operands[0] = gen_rtx_REG (DImode, regno);
395 if (which_alternative == 0)
396 return "vld1.64\t%P0, %A1";
398 return "vmov\t%P0, %Q1, %R1";
400 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
403 (define_expand "vec_set<mode>"
404 [(match_operand:VDQ 0 "s_register_operand" "")
405 (match_operand:<V_elem> 1 "s_register_operand" "")
406 (match_operand:SI 2 "immediate_operand" "")]
409 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
410 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411 GEN_INT (elem), operands[0]));
415 (define_insn "vec_extract<mode>"
416 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
418 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
422 if (BYTES_BIG_ENDIAN)
424 int elt = INTVAL (operands[2]);
425 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426 operands[2] = GEN_INT (elt);
429 if (which_alternative == 0)
430 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
432 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
434 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
437 (define_insn "vec_extract<mode>"
438 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
440 (match_operand:VQ2 1 "s_register_operand" "w,w")
441 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
444 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445 int elt = INTVAL (operands[2]) % half_elts;
446 int hi = (INTVAL (operands[2]) / half_elts) * 2;
447 int regno = REGNO (operands[1]);
449 if (BYTES_BIG_ENDIAN)
450 elt = half_elts - 1 - elt;
452 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453 operands[2] = GEN_INT (elt);
455 if (which_alternative == 0)
456 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
458 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
460 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
463 (define_insn "vec_extractv2di"
464 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
466 (match_operand:V2DI 1 "s_register_operand" "w,w")
467 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
470 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
472 operands[1] = gen_rtx_REG (DImode, regno);
474 if (which_alternative == 0)
475 return "vst1.64\t{%P1}, %A0 @ v2di";
477 return "vmov\t%Q0, %R0, %P1 @ v2di";
479 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
482 (define_expand "vec_init<mode>"
483 [(match_operand:VDQ 0 "s_register_operand" "")
484 (match_operand 1 "" "")]
487 neon_expand_vector_init (operands[0], operands[1]);
491 ;; Doubleword and quadword arithmetic.
493 ;; NOTE: some other instructions also support 64-bit integer
494 ;; element size, which we could potentially use for "long long" operations.
496 (define_insn "*add<mode>3_neon"
497 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499 (match_operand:VDQ 2 "s_register_operand" "w")))]
500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
503 (if_then_else (match_test "<Is_float_mode>")
504 (const_string "neon_fp_addsub_s<q>")
505 (const_string "neon_add<q>")))]
508 (define_insn "add<mode>3_fp16"
510 (match_operand:VH 0 "s_register_operand" "=w")
512 (match_operand:VH 1 "s_register_operand" "w")
513 (match_operand:VH 2 "s_register_operand" "w")))]
514 "TARGET_NEON_FP16INST"
515 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
517 (if_then_else (match_test "<Is_float_mode>")
518 (const_string "neon_fp_addsub_s<q>")
519 (const_string "neon_add<q>")))]
522 (define_insn "adddi3_neon"
523 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
524 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
525 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
526 (clobber (reg:CC CC_REGNUM))]
529 switch (which_alternative)
531 case 0: /* fall through */
532 case 3: return "vadd.i64\t%P0, %P1, %P2";
538 default: gcc_unreachable ();
541 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
542 multiple,multiple,multiple")
543 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
544 (set_attr "length" "*,8,8,*,8,8,8")
545 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
548 (define_insn "*sub<mode>3_neon"
549 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
550 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
551 (match_operand:VDQ 2 "s_register_operand" "w")))]
552 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
553 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
555 (if_then_else (match_test "<Is_float_mode>")
556 (const_string "neon_fp_addsub_s<q>")
557 (const_string "neon_sub<q>")))]
560 (define_insn "sub<mode>3_fp16"
562 (match_operand:VH 0 "s_register_operand" "=w")
564 (match_operand:VH 1 "s_register_operand" "w")
565 (match_operand:VH 2 "s_register_operand" "w")))]
566 "TARGET_NEON_FP16INST"
567 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
568 [(set_attr "type" "neon_sub<q>")]
571 (define_insn "subdi3_neon"
572 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
573 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
574 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
575 (clobber (reg:CC CC_REGNUM))]
578 switch (which_alternative)
580 case 0: /* fall through */
581 case 4: return "vsub.i64\t%P0, %P1, %P2";
582 case 1: /* fall through */
583 case 2: /* fall through */
584 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
585 default: gcc_unreachable ();
588 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
589 (set_attr "conds" "*,clob,clob,clob,*")
590 (set_attr "length" "*,8,8,8,*")
591 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
594 (define_insn "*mul<mode>3_neon"
595 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
596 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
597 (match_operand:VDQW 2 "s_register_operand" "w")))]
598 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
599 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
601 (if_then_else (match_test "<Is_float_mode>")
602 (const_string "neon_fp_mul_s<q>")
603 (const_string "neon_mul_<V_elem_ch><q>")))]
606 (define_insn "mul<mode>3add<mode>_neon"
607 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
608 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
609 (match_operand:VDQW 3 "s_register_operand" "w"))
610 (match_operand:VDQW 1 "s_register_operand" "0")))]
611 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
612 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
614 (if_then_else (match_test "<Is_float_mode>")
615 (const_string "neon_fp_mla_s<q>")
616 (const_string "neon_mla_<V_elem_ch><q>")))]
619 (define_insn "mul<mode>3add<mode>_neon"
620 [(set (match_operand:VH 0 "s_register_operand" "=w")
621 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
622 (match_operand:VH 3 "s_register_operand" "w"))
623 (match_operand:VH 1 "s_register_operand" "0")))]
624 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
625 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
626 [(set_attr "type" "neon_fp_mla_s<q>")]
629 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
630 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
631 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
632 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
633 (match_operand:VDQW 3 "s_register_operand" "w"))))]
634 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
635 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
637 (if_then_else (match_test "<Is_float_mode>")
638 (const_string "neon_fp_mla_s<q>")
639 (const_string "neon_mla_<V_elem_ch><q>")))]
642 ;; Fused multiply-accumulate
643 ;; We define each insn twice here:
644 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
645 ;; to be able to use when converting to FMA.
646 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
647 (define_insn "fma<VCVTF:mode>4"
648 [(set (match_operand:VCVTF 0 "register_operand" "=w")
649 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
650 (match_operand:VCVTF 2 "register_operand" "w")
651 (match_operand:VCVTF 3 "register_operand" "0")))]
652 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
653 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
654 [(set_attr "type" "neon_fp_mla_s<q>")]
657 (define_insn "fma<VCVTF:mode>4_intrinsic"
658 [(set (match_operand:VCVTF 0 "register_operand" "=w")
659 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
660 (match_operand:VCVTF 2 "register_operand" "w")
661 (match_operand:VCVTF 3 "register_operand" "0")))]
662 "TARGET_NEON && TARGET_FMA"
663 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
664 [(set_attr "type" "neon_fp_mla_s<q>")]
667 ;; There is limited support for unsafe-math optimizations using the NEON FP16
668 ;; arithmetic instructions, so only the intrinsic is currently supported.
669 (define_insn "fma<VH:mode>4_intrinsic"
670 [(set (match_operand:VH 0 "register_operand" "=w")
672 (match_operand:VH 1 "register_operand" "w")
673 (match_operand:VH 2 "register_operand" "w")
674 (match_operand:VH 3 "register_operand" "0")))]
675 "TARGET_NEON_FP16INST"
676 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
677 [(set_attr "type" "neon_fp_mla_s<q>")]
680 (define_insn "*fmsub<VCVTF:mode>4"
681 [(set (match_operand:VCVTF 0 "register_operand" "=w")
682 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
683 (match_operand:VCVTF 2 "register_operand" "w")
684 (match_operand:VCVTF 3 "register_operand" "0")))]
685 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
686 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
687 [(set_attr "type" "neon_fp_mla_s<q>")]
690 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
691 [(set (match_operand:VCVTF 0 "register_operand" "=w")
693 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
694 (match_operand:VCVTF 2 "register_operand" "w")
695 (match_operand:VCVTF 3 "register_operand" "0")))]
696 "TARGET_NEON && TARGET_FMA"
697 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
698 [(set_attr "type" "neon_fp_mla_s<q>")]
701 (define_insn "fmsub<VH:mode>4_intrinsic"
702 [(set (match_operand:VH 0 "register_operand" "=w")
704 (neg:VH (match_operand:VH 1 "register_operand" "w"))
705 (match_operand:VH 2 "register_operand" "w")
706 (match_operand:VH 3 "register_operand" "0")))]
707 "TARGET_NEON_FP16INST"
708 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
709 [(set_attr "type" "neon_fp_mla_s<q>")]
712 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
713 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
714 (unspec:VCVTF [(match_operand:VCVTF 1
715 "s_register_operand" "w")]
717 "TARGET_NEON && TARGET_FPU_ARMV8"
718 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
719 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
722 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
723 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
724 (FIXUORS:<V_cmp_result> (unspec:VCVTF
725 [(match_operand:VCVTF 1 "register_operand" "w")]
727 "TARGET_NEON && TARGET_FPU_ARMV8"
728 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
729 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
730 (set_attr "predicable" "no")]
733 (define_insn "ior<mode>3"
734 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
735 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
736 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
739 switch (which_alternative)
741 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
742 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
743 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
744 default: gcc_unreachable ();
747 [(set_attr "type" "neon_logic<q>")]
750 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
751 ;; vorr. We support the pseudo-instruction vand instead, because that
752 ;; corresponds to the canonical form the middle-end expects to use for
753 ;; immediate bitwise-ANDs.
755 (define_insn "and<mode>3"
756 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
757 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
758 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
761 switch (which_alternative)
763 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
764 case 1: return neon_output_logic_immediate ("vand", &operands[2],
765 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
766 default: gcc_unreachable ();
769 [(set_attr "type" "neon_logic<q>")]
772 (define_insn "orn<mode>3_neon"
773 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
774 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
775 (match_operand:VDQ 1 "s_register_operand" "w")))]
777 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
778 [(set_attr "type" "neon_logic<q>")]
781 ;; TODO: investigate whether we should disable
782 ;; this and bicdi3_neon for the A8 in line with the other
784 (define_insn_and_split "orndi3_neon"
785 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
786 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
787 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
795 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
796 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
797 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
802 operands[3] = gen_highpart (SImode, operands[0]);
803 operands[0] = gen_lowpart (SImode, operands[0]);
804 operands[4] = gen_highpart (SImode, operands[2]);
805 operands[2] = gen_lowpart (SImode, operands[2]);
806 operands[5] = gen_highpart (SImode, operands[1]);
807 operands[1] = gen_lowpart (SImode, operands[1]);
811 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
812 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
816 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
817 (set_attr "length" "*,16,8,8")
818 (set_attr "arch" "any,a,t2,t2")]
821 (define_insn "bic<mode>3_neon"
822 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
823 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
824 (match_operand:VDQ 1 "s_register_operand" "w")))]
826 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
827 [(set_attr "type" "neon_logic<q>")]
830 ;; Compare to *anddi_notdi_di.
831 (define_insn "bicdi3_neon"
832 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
833 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
834 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
840 [(set_attr "type" "neon_logic,multiple,multiple")
841 (set_attr "length" "*,8,8")]
844 (define_insn "xor<mode>3"
845 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
846 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
847 (match_operand:VDQ 2 "s_register_operand" "w")))]
849 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
850 [(set_attr "type" "neon_logic<q>")]
853 (define_insn "one_cmpl<mode>2"
854 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
855 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
857 "vmvn\t%<V_reg>0, %<V_reg>1"
858 [(set_attr "type" "neon_move<q>")]
861 (define_insn "abs<mode>2"
862 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
863 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
865 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
867 (if_then_else (match_test "<Is_float_mode>")
868 (const_string "neon_fp_abs_s<q>")
869 (const_string "neon_abs<q>")))]
872 (define_insn "neg<mode>2"
873 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
874 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
876 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
878 (if_then_else (match_test "<Is_float_mode>")
879 (const_string "neon_fp_neg_s<q>")
880 (const_string "neon_neg<q>")))]
883 (define_insn "negdi2_neon"
884 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
885 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
886 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
887 (clobber (reg:CC CC_REGNUM))]
890 [(set_attr "length" "8")
891 (set_attr "type" "multiple")]
894 ; Split negdi2_neon for vfp registers
896 [(set (match_operand:DI 0 "s_register_operand" "")
897 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
898 (clobber (match_scratch:DI 2 ""))
899 (clobber (reg:CC CC_REGNUM))]
900 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
901 [(set (match_dup 2) (const_int 0))
902 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
903 (clobber (reg:CC CC_REGNUM))])]
905 if (!REG_P (operands[2]))
906 operands[2] = operands[0];
910 ; Split negdi2_neon for core registers
912 [(set (match_operand:DI 0 "s_register_operand" "")
913 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
914 (clobber (match_scratch:DI 2 ""))
915 (clobber (reg:CC CC_REGNUM))]
916 "TARGET_32BIT && reload_completed
917 && arm_general_register_operand (operands[0], DImode)"
918 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
919 (clobber (reg:CC CC_REGNUM))])]
923 (define_insn "<absneg_str><mode>2"
924 [(set (match_operand:VH 0 "s_register_operand" "=w")
925 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
926 "TARGET_NEON_FP16INST"
927 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
928 [(set_attr "type" "neon_abs<q>")]
931 (define_expand "neon_v<absneg_str><mode>"
933 (match_operand:VH 0 "s_register_operand")
934 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
935 "TARGET_NEON_FP16INST"
937 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
941 (define_insn "neon_v<fp16_rnd_str><mode>"
942 [(set (match_operand:VH 0 "s_register_operand" "=w")
944 [(match_operand:VH 1 "s_register_operand" "w")]
946 "TARGET_NEON_FP16INST"
947 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
948 [(set_attr "type" "neon_fp_round_s<q>")]
951 (define_insn "neon_vrsqrte<mode>"
952 [(set (match_operand:VH 0 "s_register_operand" "=w")
954 [(match_operand:VH 1 "s_register_operand" "w")]
956 "TARGET_NEON_FP16INST"
957 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
958 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
961 (define_insn "*umin<mode>3_neon"
962 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
963 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
964 (match_operand:VDQIW 2 "s_register_operand" "w")))]
966 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
967 [(set_attr "type" "neon_minmax<q>")]
970 (define_insn "*umax<mode>3_neon"
971 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
972 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
973 (match_operand:VDQIW 2 "s_register_operand" "w")))]
975 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
976 [(set_attr "type" "neon_minmax<q>")]
979 (define_insn "*smin<mode>3_neon"
980 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
981 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
982 (match_operand:VDQW 2 "s_register_operand" "w")))]
984 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
986 (if_then_else (match_test "<Is_float_mode>")
987 (const_string "neon_fp_minmax_s<q>")
988 (const_string "neon_minmax<q>")))]
991 (define_insn "*smax<mode>3_neon"
992 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
993 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
994 (match_operand:VDQW 2 "s_register_operand" "w")))]
996 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
998 (if_then_else (match_test "<Is_float_mode>")
999 (const_string "neon_fp_minmax_s<q>")
1000 (const_string "neon_minmax<q>")))]
1003 ; TODO: V2DI shifts are current disabled because there are bugs in the
1004 ; generic vectorizer code. It ends up creating a V2DI constructor with
1007 (define_insn "vashl<mode>3"
1008 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1009 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1010 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1013 switch (which_alternative)
1015 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1016 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1018 VALID_NEON_QREG_MODE (<MODE>mode),
1020 default: gcc_unreachable ();
1023 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1026 (define_insn "vashr<mode>3_imm"
1027 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1028 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1029 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1032 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1033 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1036 [(set_attr "type" "neon_shift_imm<q>")]
1039 (define_insn "vlshr<mode>3_imm"
1040 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1041 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1042 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1045 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1046 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1049 [(set_attr "type" "neon_shift_imm<q>")]
1052 ; Used for implementing logical shift-right, which is a left-shift by a negative
1053 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1054 ; above, but using an unspec in case GCC tries anything tricky with negative
1057 (define_insn "ashl<mode>3_signed"
1058 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1059 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1060 (match_operand:VDQI 2 "s_register_operand" "w")]
1061 UNSPEC_ASHIFT_SIGNED))]
1063 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1064 [(set_attr "type" "neon_shift_reg<q>")]
1067 ; Used for implementing logical shift-right, which is a left-shift by a negative
1068 ; amount, with unsigned operands.
1070 (define_insn "ashl<mode>3_unsigned"
1071 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1072 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1073 (match_operand:VDQI 2 "s_register_operand" "w")]
1074 UNSPEC_ASHIFT_UNSIGNED))]
1076 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1077 [(set_attr "type" "neon_shift_reg<q>")]
1080 (define_expand "vashr<mode>3"
1081 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1082 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1083 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1086 if (s_register_operand (operands[2], <MODE>mode))
1088 rtx neg = gen_reg_rtx (<MODE>mode);
1089 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1090 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1093 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1097 (define_expand "vlshr<mode>3"
1098 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1099 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1100 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1103 if (s_register_operand (operands[2], <MODE>mode))
1105 rtx neg = gen_reg_rtx (<MODE>mode);
1106 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1107 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1110 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1116 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1117 ;; leaving the upper half uninitalized. This is OK since the shift
1118 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1119 ;; data flow analysis however, we pretend the full register is set
1121 (define_insn "neon_load_count"
1122 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1123 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1124 UNSPEC_LOAD_COUNT))]
1127 vld1.32\t{%P0[0]}, %A1
1128 vmov.32\t%P0[0], %1"
1129 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1132 (define_insn "ashldi3_neon_noclobber"
1133 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1134 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1135 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1136 "TARGET_NEON && reload_completed
1137 && (!CONST_INT_P (operands[2])
1138 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1140 vshl.u64\t%P0, %P1, %2
1141 vshl.u64\t%P0, %P1, %P2"
1142 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1145 (define_insn_and_split "ashldi3_neon"
1146 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w")
1147 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w")
1148 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i")))
1149 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X"))
1150 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X"))
1151 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X"))
1152 (clobber (reg:CC_C CC_REGNUM))]
1155 "TARGET_NEON && reload_completed"
1159 if (IS_VFP_REGNUM (REGNO (operands[0])))
1161 if (CONST_INT_P (operands[2]))
1163 if (INTVAL (operands[2]) < 1)
1165 emit_insn (gen_movdi (operands[0], operands[1]));
1168 else if (INTVAL (operands[2]) > 63)
1169 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1173 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1174 operands[2] = operands[5];
1177 /* Ditch the unnecessary clobbers. */
1178 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1183 /* The shift expanders support either full overlap or no overlap. */
1184 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1185 || REGNO (operands[0]) == REGNO (operands[1]));
1187 if (operands[2] == CONST1_RTX (SImode))
1188 /* This clobbers CC. */
1189 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1191 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1192 operands[2], operands[3], operands[4]);
1196 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1197 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1198 (set_attr "type" "multiple")]
1201 ; The shift amount needs to be negated for right-shifts
1202 (define_insn "signed_shift_di3_neon"
1203 [(set (match_operand:DI 0 "s_register_operand" "=w")
1204 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1205 (match_operand:DI 2 "s_register_operand" " w")]
1206 UNSPEC_ASHIFT_SIGNED))]
1207 "TARGET_NEON && reload_completed"
1208 "vshl.s64\t%P0, %P1, %P2"
1209 [(set_attr "type" "neon_shift_reg")]
1212 ; The shift amount needs to be negated for right-shifts
1213 (define_insn "unsigned_shift_di3_neon"
1214 [(set (match_operand:DI 0 "s_register_operand" "=w")
1215 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1216 (match_operand:DI 2 "s_register_operand" " w")]
1217 UNSPEC_ASHIFT_UNSIGNED))]
1218 "TARGET_NEON && reload_completed"
1219 "vshl.u64\t%P0, %P1, %P2"
1220 [(set_attr "type" "neon_shift_reg")]
1223 (define_insn "ashrdi3_neon_imm_noclobber"
1224 [(set (match_operand:DI 0 "s_register_operand" "=w")
1225 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1226 (match_operand:DI 2 "const_int_operand" " i")))]
1227 "TARGET_NEON && reload_completed
1228 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1229 "vshr.s64\t%P0, %P1, %2"
1230 [(set_attr "type" "neon_shift_imm")]
1233 (define_insn "lshrdi3_neon_imm_noclobber"
1234 [(set (match_operand:DI 0 "s_register_operand" "=w")
1235 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1236 (match_operand:DI 2 "const_int_operand" " i")))]
1237 "TARGET_NEON && reload_completed
1238 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1239 "vshr.u64\t%P0, %P1, %2"
1240 [(set_attr "type" "neon_shift_imm")]
1245 (define_insn_and_split "<shift>di3_neon"
1246 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w")
1247 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1248 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1249 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1250 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1251 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1252 (clobber (reg:CC CC_REGNUM))]
1255 "TARGET_NEON && reload_completed"
1259 if (IS_VFP_REGNUM (REGNO (operands[0])))
1261 if (CONST_INT_P (operands[2]))
1263 if (INTVAL (operands[2]) < 1)
1265 emit_insn (gen_movdi (operands[0], operands[1]));
1268 else if (INTVAL (operands[2]) > 64)
1269 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1271 /* Ditch the unnecessary clobbers. */
1272 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1278 /* We must use a negative left-shift. */
1279 emit_insn (gen_negsi2 (operands[3], operands[2]));
1280 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1281 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1287 /* The shift expanders support either full overlap or no overlap. */
1288 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1289 || REGNO (operands[0]) == REGNO (operands[1]));
1291 if (operands[2] == CONST1_RTX (SImode))
1292 /* This clobbers CC. */
1293 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1295 /* This clobbers CC (ASHIFTRT by register only). */
1296 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1297 operands[2], operands[3], operands[4]);
1302 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1303 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1304 (set_attr "type" "multiple")]
1307 ;; Widening operations
1309 (define_expand "widen_ssum<mode>3"
1310 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1311 (plus:<V_double_width>
1312 (sign_extend:<V_double_width>
1313 (match_operand:VQI 1 "s_register_operand" ""))
1314 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1317 machine_mode mode = GET_MODE (operands[1]);
1320 p1 = arm_simd_vect_par_cnst_half (mode, false);
1321 p2 = arm_simd_vect_par_cnst_half (mode, true);
1323 if (operands[0] != operands[2])
1324 emit_move_insn (operands[0], operands[2]);
1326 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1330 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1338 (define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3"
1339 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1341 (sign_extend:<VW:V_widen>
1343 (match_operand:VQI 1 "s_register_operand" "%w")
1344 (match_operand:VQI 2 "vect_par_constant_low" "")))
1345 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1348 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1349 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1351 [(set_attr "type" "neon_add_widen")])
1353 (define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3"
1354 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1356 (sign_extend:<VW:V_widen>
1357 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1358 (match_operand:VQI 2 "vect_par_constant_high" "")))
1359 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1362 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1363 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1365 [(set_attr "type" "neon_add_widen")])
1367 (define_insn "widen_ssum<mode>3"
1368 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1370 (sign_extend:<V_widen>
1371 (match_operand:VW 1 "s_register_operand" "%w"))
1372 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1374 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1375 [(set_attr "type" "neon_add_widen")]
1378 (define_expand "widen_usum<mode>3"
1379 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1380 (plus:<V_double_width>
1381 (zero_extend:<V_double_width>
1382 (match_operand:VQI 1 "s_register_operand" ""))
1383 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1386 machine_mode mode = GET_MODE (operands[1]);
1389 p1 = arm_simd_vect_par_cnst_half (mode, false);
1390 p2 = arm_simd_vect_par_cnst_half (mode, true);
1392 if (operands[0] != operands[2])
1393 emit_move_insn (operands[0], operands[2]);
1395 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1399 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1407 (define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3"
1408 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1410 (zero_extend:<VW:V_widen>
1412 (match_operand:VQI 1 "s_register_operand" "%w")
1413 (match_operand:VQI 2 "vect_par_constant_low" "")))
1414 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1417 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1418 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1420 [(set_attr "type" "neon_add_widen")])
1422 (define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3"
1423 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1425 (zero_extend:<VW:V_widen>
1426 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1427 (match_operand:VQI 2 "vect_par_constant_high" "")))
1428 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1431 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1432 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1434 [(set_attr "type" "neon_add_widen")])
1436 (define_insn "widen_usum<mode>3"
1437 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1438 (plus:<V_widen> (zero_extend:<V_widen>
1439 (match_operand:VW 1 "s_register_operand" "%w"))
1440 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1442 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1443 [(set_attr "type" "neon_add_widen")]
1446 ;; Helpers for quad-word reduction operations
1448 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1449 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1450 ; N/2-element vector.
1452 (define_insn "quad_halves_<code>v4si"
1453 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1455 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1456 (parallel [(const_int 0) (const_int 1)]))
1457 (vec_select:V2SI (match_dup 1)
1458 (parallel [(const_int 2) (const_int 3)]))))]
1460 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1461 [(set_attr "vqh_mnem" "<VQH_mnem>")
1462 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1465 (define_insn "quad_halves_<code>v4sf"
1466 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1468 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1469 (parallel [(const_int 0) (const_int 1)]))
1470 (vec_select:V2SF (match_dup 1)
1471 (parallel [(const_int 2) (const_int 3)]))))]
1472 "TARGET_NEON && flag_unsafe_math_optimizations"
1473 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1474 [(set_attr "vqh_mnem" "<VQH_mnem>")
1475 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1478 (define_insn "quad_halves_<code>v8hi"
1479 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1481 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1482 (parallel [(const_int 0) (const_int 1)
1483 (const_int 2) (const_int 3)]))
1484 (vec_select:V4HI (match_dup 1)
1485 (parallel [(const_int 4) (const_int 5)
1486 (const_int 6) (const_int 7)]))))]
1488 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1489 [(set_attr "vqh_mnem" "<VQH_mnem>")
1490 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1493 (define_insn "quad_halves_<code>v16qi"
1494 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1496 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1497 (parallel [(const_int 0) (const_int 1)
1498 (const_int 2) (const_int 3)
1499 (const_int 4) (const_int 5)
1500 (const_int 6) (const_int 7)]))
1501 (vec_select:V8QI (match_dup 1)
1502 (parallel [(const_int 8) (const_int 9)
1503 (const_int 10) (const_int 11)
1504 (const_int 12) (const_int 13)
1505 (const_int 14) (const_int 15)]))))]
1507 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1508 [(set_attr "vqh_mnem" "<VQH_mnem>")
1509 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1512 (define_expand "move_hi_quad_<mode>"
1513 [(match_operand:ANY128 0 "s_register_operand" "")
1514 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1517 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1518 GET_MODE_SIZE (<V_HALF>mode)),
1523 (define_expand "move_lo_quad_<mode>"
1524 [(match_operand:ANY128 0 "s_register_operand" "")
1525 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1528 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1534 ;; Reduction operations
1536 (define_expand "reduc_plus_scal_<mode>"
1537 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1538 (match_operand:VD 1 "s_register_operand" "")]
1539 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1541 rtx vec = gen_reg_rtx (<MODE>mode);
1542 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1543 &gen_neon_vpadd_internal<mode>);
1544 /* The same result is actually computed into every element. */
1545 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1549 (define_expand "reduc_plus_scal_<mode>"
1550 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1551 (match_operand:VQ 1 "s_register_operand" "")]
1552 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1553 && !BYTES_BIG_ENDIAN"
1555 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1557 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1558 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1563 (define_expand "reduc_plus_scal_v2di"
1564 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1565 (match_operand:V2DI 1 "s_register_operand" "")]
1566 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1568 rtx vec = gen_reg_rtx (V2DImode);
1570 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1571 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1576 (define_insn "arm_reduc_plus_internal_v2di"
1577 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1578 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1580 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1581 "vadd.i64\t%e0, %e1, %f1"
1582 [(set_attr "type" "neon_add_q")]
1585 (define_expand "reduc_smin_scal_<mode>"
1586 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1587 (match_operand:VD 1 "s_register_operand" "")]
1588 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1590 rtx vec = gen_reg_rtx (<MODE>mode);
1592 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1593 &gen_neon_vpsmin<mode>);
1594 /* The result is computed into every element of the vector. */
1595 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1599 (define_expand "reduc_smin_scal_<mode>"
1600 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1601 (match_operand:VQ 1 "s_register_operand" "")]
1602 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1603 && !BYTES_BIG_ENDIAN"
1605 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1607 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1608 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1613 (define_expand "reduc_smax_scal_<mode>"
1614 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1615 (match_operand:VD 1 "s_register_operand" "")]
1616 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1618 rtx vec = gen_reg_rtx (<MODE>mode);
1619 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1620 &gen_neon_vpsmax<mode>);
1621 /* The result is computed into every element of the vector. */
1622 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1626 (define_expand "reduc_smax_scal_<mode>"
1627 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1628 (match_operand:VQ 1 "s_register_operand" "")]
1629 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1630 && !BYTES_BIG_ENDIAN"
1632 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1634 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1635 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1640 (define_expand "reduc_umin_scal_<mode>"
1641 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1642 (match_operand:VDI 1 "s_register_operand" "")]
1645 rtx vec = gen_reg_rtx (<MODE>mode);
1646 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1647 &gen_neon_vpumin<mode>);
1648 /* The result is computed into every element of the vector. */
1649 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1653 (define_expand "reduc_umin_scal_<mode>"
1654 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1655 (match_operand:VQI 1 "s_register_operand" "")]
1656 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1658 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1660 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1661 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1666 (define_expand "reduc_umax_scal_<mode>"
1667 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1668 (match_operand:VDI 1 "s_register_operand" "")]
1671 rtx vec = gen_reg_rtx (<MODE>mode);
1672 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1673 &gen_neon_vpumax<mode>);
1674 /* The result is computed into every element of the vector. */
1675 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1679 (define_expand "reduc_umax_scal_<mode>"
1680 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1681 (match_operand:VQI 1 "s_register_operand" "")]
1682 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1684 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1686 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1687 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1692 (define_insn "neon_vpadd_internal<mode>"
1693 [(set (match_operand:VD 0 "s_register_operand" "=w")
1694 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1695 (match_operand:VD 2 "s_register_operand" "w")]
1698 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1699 ;; Assume this schedules like vadd.
1701 (if_then_else (match_test "<Is_float_mode>")
1702 (const_string "neon_fp_reduc_add_s<q>")
1703 (const_string "neon_reduc_add<q>")))]
1706 (define_insn "neon_vpaddv4hf"
1708 (match_operand:V4HF 0 "s_register_operand" "=w")
1709 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1710 (match_operand:V4HF 2 "s_register_operand" "w")]
1712 "TARGET_NEON_FP16INST"
1713 "vpadd.f16\t%P0, %P1, %P2"
1714 [(set_attr "type" "neon_reduc_add")]
1717 (define_insn "neon_vpsmin<mode>"
1718 [(set (match_operand:VD 0 "s_register_operand" "=w")
1719 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1720 (match_operand:VD 2 "s_register_operand" "w")]
1723 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1725 (if_then_else (match_test "<Is_float_mode>")
1726 (const_string "neon_fp_reduc_minmax_s<q>")
1727 (const_string "neon_reduc_minmax<q>")))]
1730 (define_insn "neon_vpsmax<mode>"
1731 [(set (match_operand:VD 0 "s_register_operand" "=w")
1732 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1733 (match_operand:VD 2 "s_register_operand" "w")]
1736 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1738 (if_then_else (match_test "<Is_float_mode>")
1739 (const_string "neon_fp_reduc_minmax_s<q>")
1740 (const_string "neon_reduc_minmax<q>")))]
1743 (define_insn "neon_vpumin<mode>"
1744 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1745 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1746 (match_operand:VDI 2 "s_register_operand" "w")]
1749 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1750 [(set_attr "type" "neon_reduc_minmax<q>")]
1753 (define_insn "neon_vpumax<mode>"
1754 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1755 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1756 (match_operand:VDI 2 "s_register_operand" "w")]
1759 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1760 [(set_attr "type" "neon_reduc_minmax<q>")]
1763 ;; Saturating arithmetic
1765 ; NOTE: Neon supports many more saturating variants of instructions than the
1766 ; following, but these are all GCC currently understands.
1767 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1768 ; yet either, although these patterns may be used by intrinsics when they're
1771 (define_insn "*ss_add<mode>_neon"
1772 [(set (match_operand:VD 0 "s_register_operand" "=w")
1773 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1774 (match_operand:VD 2 "s_register_operand" "w")))]
1776 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1777 [(set_attr "type" "neon_qadd<q>")]
1780 (define_insn "*us_add<mode>_neon"
1781 [(set (match_operand:VD 0 "s_register_operand" "=w")
1782 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1783 (match_operand:VD 2 "s_register_operand" "w")))]
1785 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1786 [(set_attr "type" "neon_qadd<q>")]
1789 (define_insn "*ss_sub<mode>_neon"
1790 [(set (match_operand:VD 0 "s_register_operand" "=w")
1791 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1792 (match_operand:VD 2 "s_register_operand" "w")))]
1794 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1795 [(set_attr "type" "neon_qsub<q>")]
1798 (define_insn "*us_sub<mode>_neon"
1799 [(set (match_operand:VD 0 "s_register_operand" "=w")
1800 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1801 (match_operand:VD 2 "s_register_operand" "w")))]
1803 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1804 [(set_attr "type" "neon_qsub<q>")]
1807 ;; Conditional instructions. These are comparisons with conditional moves for
1808 ;; vectors. They perform the assignment:
1810 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1812 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1815 (define_expand "vcond<mode><mode>"
1816 [(set (match_operand:VDQW 0 "s_register_operand" "")
1818 (match_operator 3 "comparison_operator"
1819 [(match_operand:VDQW 4 "s_register_operand" "")
1820 (match_operand:VDQW 5 "nonmemory_operand" "")])
1821 (match_operand:VDQW 1 "s_register_operand" "")
1822 (match_operand:VDQW 2 "s_register_operand" "")))]
1823 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1826 int use_zero_form = 0;
1827 int swap_bsl_operands = 0;
1828 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1829 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1831 rtx (*base_comparison) (rtx, rtx, rtx);
1832 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1834 switch (GET_CODE (operands[3]))
1841 if (operands[5] == CONST0_RTX (<MODE>mode))
1848 if (!REG_P (operands[5]))
1849 operands[5] = force_reg (<MODE>mode, operands[5]);
1852 switch (GET_CODE (operands[3]))
1862 base_comparison = gen_neon_vcge<mode>;
1863 complimentary_comparison = gen_neon_vcgt<mode>;
1871 base_comparison = gen_neon_vcgt<mode>;
1872 complimentary_comparison = gen_neon_vcge<mode>;
1877 base_comparison = gen_neon_vceq<mode>;
1878 complimentary_comparison = gen_neon_vceq<mode>;
1884 switch (GET_CODE (operands[3]))
1891 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1892 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1898 Note that there also exist direct comparison against 0 forms,
1899 so catch those as a special case. */
1903 switch (GET_CODE (operands[3]))
1906 base_comparison = gen_neon_vclt<mode>;
1909 base_comparison = gen_neon_vcle<mode>;
1912 /* Do nothing, other zero form cases already have the correct
1919 emit_insn (base_comparison (mask, operands[4], operands[5]));
1921 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1928 /* Vector compare returns false for lanes which are unordered, so if we use
1929 the inverse of the comparison we actually want to emit, then
1930 swap the operands to BSL, we will end up with the correct result.
1931 Note that a NE NaN and NaN NE b are true for all a, b.
1933 Our transformations are:
1938 a NE b -> !(a EQ b) */
1941 emit_insn (base_comparison (mask, operands[4], operands[5]));
1943 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1945 swap_bsl_operands = 1;
1948 /* We check (a > b || b > a). combining these comparisons give us
1949 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1950 will then give us (a == b || a UNORDERED b) as intended. */
1952 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1953 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1954 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1955 swap_bsl_operands = 1;
1958 /* Operands are ORDERED iff (a > b || b >= a).
1959 Swapping the operands to BSL will give the UNORDERED case. */
1960 swap_bsl_operands = 1;
1963 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1964 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1965 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1971 if (swap_bsl_operands)
1972 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1975 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1980 (define_expand "vcondu<mode><mode>"
1981 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1983 (match_operator 3 "arm_comparison_operator"
1984 [(match_operand:VDQIW 4 "s_register_operand" "")
1985 (match_operand:VDQIW 5 "s_register_operand" "")])
1986 (match_operand:VDQIW 1 "s_register_operand" "")
1987 (match_operand:VDQIW 2 "s_register_operand" "")))]
1991 int inverse = 0, immediate_zero = 0;
1993 mask = gen_reg_rtx (<V_cmp_result>mode);
1995 if (operands[5] == CONST0_RTX (<MODE>mode))
1997 else if (!REG_P (operands[5]))
1998 operands[5] = force_reg (<MODE>mode, operands[5]);
2000 switch (GET_CODE (operands[3]))
2003 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2007 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2011 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2016 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2018 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2023 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2025 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2029 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2038 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2041 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2047 ;; Patterns for builtins.
2049 ; good for plain vadd, vaddq.
2051 (define_expand "neon_vadd<mode>"
2052 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2053 (match_operand:VCVTF 1 "s_register_operand" "w")
2054 (match_operand:VCVTF 2 "s_register_operand" "w")]
2057 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2058 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2060 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2065 (define_expand "neon_vadd<mode>"
2066 [(match_operand:VH 0 "s_register_operand")
2067 (match_operand:VH 1 "s_register_operand")
2068 (match_operand:VH 2 "s_register_operand")]
2069 "TARGET_NEON_FP16INST"
2071 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2075 (define_expand "neon_vsub<mode>"
2076 [(match_operand:VH 0 "s_register_operand")
2077 (match_operand:VH 1 "s_register_operand")
2078 (match_operand:VH 2 "s_register_operand")]
2079 "TARGET_NEON_FP16INST"
2081 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2085 ; Note that NEON operations don't support the full IEEE 754 standard: in
2086 ; particular, denormal values are flushed to zero. This means that GCC cannot
2087 ; use those instructions for autovectorization, etc. unless
2088 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2089 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2090 ; header) must work in either case: if -funsafe-math-optimizations is given,
2091 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2092 ; expand to unspecs (which may potentially limit the extent to which they might
2093 ; be optimized by generic code).
2095 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2097 (define_insn "neon_vadd<mode>_unspec"
2098 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2099 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2100 (match_operand:VCVTF 2 "s_register_operand" "w")]
2103 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2105 (if_then_else (match_test "<Is_float_mode>")
2106 (const_string "neon_fp_addsub_s<q>")
2107 (const_string "neon_add<q>")))]
2110 (define_insn "neon_vaddl<sup><mode>"
2111 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2112 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2113 (match_operand:VDI 2 "s_register_operand" "w")]
2116 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2117 [(set_attr "type" "neon_add_long")]
2120 (define_insn "neon_vaddw<sup><mode>"
2121 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2122 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2123 (match_operand:VDI 2 "s_register_operand" "w")]
2126 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2127 [(set_attr "type" "neon_add_widen")]
2132 (define_insn "neon_v<r>hadd<sup><mode>"
2133 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2134 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2135 (match_operand:VDQIW 2 "s_register_operand" "w")]
2138 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2139 [(set_attr "type" "neon_add_halve_q")]
2142 (define_insn "neon_vqadd<sup><mode>"
2143 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2144 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2145 (match_operand:VDQIX 2 "s_register_operand" "w")]
2148 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2149 [(set_attr "type" "neon_qadd<q>")]
2152 (define_insn "neon_v<r>addhn<mode>"
2153 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2154 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2155 (match_operand:VN 2 "s_register_operand" "w")]
2158 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2159 [(set_attr "type" "neon_add_halve_narrow_q")]
2162 ;; Polynomial and Float multiplication.
2163 (define_insn "neon_vmul<pf><mode>"
2164 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2165 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2166 (match_operand:VPF 2 "s_register_operand" "w")]
2169 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2171 (if_then_else (match_test "<Is_float_mode>")
2172 (const_string "neon_fp_mul_s<q>")
2173 (const_string "neon_mul_<V_elem_ch><q>")))]
2176 (define_insn "neon_vmulf<mode>"
2178 (match_operand:VH 0 "s_register_operand" "=w")
2180 (match_operand:VH 1 "s_register_operand" "w")
2181 (match_operand:VH 2 "s_register_operand" "w")))]
2182 "TARGET_NEON_FP16INST"
2183 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2184 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2187 (define_expand "neon_vmla<mode>"
2188 [(match_operand:VDQW 0 "s_register_operand" "=w")
2189 (match_operand:VDQW 1 "s_register_operand" "0")
2190 (match_operand:VDQW 2 "s_register_operand" "w")
2191 (match_operand:VDQW 3 "s_register_operand" "w")]
2194 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2195 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2196 operands[2], operands[3]));
2198 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2199 operands[2], operands[3]));
2203 (define_expand "neon_vfma<VCVTF:mode>"
2204 [(match_operand:VCVTF 0 "s_register_operand")
2205 (match_operand:VCVTF 1 "s_register_operand")
2206 (match_operand:VCVTF 2 "s_register_operand")
2207 (match_operand:VCVTF 3 "s_register_operand")]
2208 "TARGET_NEON && TARGET_FMA"
2210 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2215 (define_expand "neon_vfma<VH:mode>"
2216 [(match_operand:VH 0 "s_register_operand")
2217 (match_operand:VH 1 "s_register_operand")
2218 (match_operand:VH 2 "s_register_operand")
2219 (match_operand:VH 3 "s_register_operand")]
2220 "TARGET_NEON_FP16INST"
2222 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2227 (define_expand "neon_vfms<VCVTF:mode>"
2228 [(match_operand:VCVTF 0 "s_register_operand")
2229 (match_operand:VCVTF 1 "s_register_operand")
2230 (match_operand:VCVTF 2 "s_register_operand")
2231 (match_operand:VCVTF 3 "s_register_operand")]
2232 "TARGET_NEON && TARGET_FMA"
2234 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2239 (define_expand "neon_vfms<VH:mode>"
2240 [(match_operand:VH 0 "s_register_operand")
2241 (match_operand:VH 1 "s_register_operand")
2242 (match_operand:VH 2 "s_register_operand")
2243 (match_operand:VH 3 "s_register_operand")]
2244 "TARGET_NEON_FP16INST"
2246 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2251 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2253 (define_insn "neon_vmla<mode>_unspec"
2254 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2255 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2256 (match_operand:VDQW 2 "s_register_operand" "w")
2257 (match_operand:VDQW 3 "s_register_operand" "w")]
2260 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2262 (if_then_else (match_test "<Is_float_mode>")
2263 (const_string "neon_fp_mla_s<q>")
2264 (const_string "neon_mla_<V_elem_ch><q>")))]
2267 (define_insn "neon_vmlal<sup><mode>"
2268 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2269 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2270 (match_operand:VW 2 "s_register_operand" "w")
2271 (match_operand:VW 3 "s_register_operand" "w")]
2274 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2275 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2278 (define_expand "neon_vmls<mode>"
2279 [(match_operand:VDQW 0 "s_register_operand" "=w")
2280 (match_operand:VDQW 1 "s_register_operand" "0")
2281 (match_operand:VDQW 2 "s_register_operand" "w")
2282 (match_operand:VDQW 3 "s_register_operand" "w")]
2285 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2286 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2287 operands[1], operands[2], operands[3]));
2289 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2290 operands[2], operands[3]));
2294 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2296 (define_insn "neon_vmls<mode>_unspec"
2297 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2298 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2299 (match_operand:VDQW 2 "s_register_operand" "w")
2300 (match_operand:VDQW 3 "s_register_operand" "w")]
2303 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2305 (if_then_else (match_test "<Is_float_mode>")
2306 (const_string "neon_fp_mla_s<q>")
2307 (const_string "neon_mla_<V_elem_ch><q>")))]
2310 (define_insn "neon_vmlsl<sup><mode>"
2311 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2312 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2313 (match_operand:VW 2 "s_register_operand" "w")
2314 (match_operand:VW 3 "s_register_operand" "w")]
2317 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2318 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2321 ;; vqdmulh, vqrdmulh
2322 (define_insn "neon_vq<r>dmulh<mode>"
2323 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2324 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2325 (match_operand:VMDQI 2 "s_register_operand" "w")]
2328 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2329 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2332 ;; vqrdmlah, vqrdmlsh
2333 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2334 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2335 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2336 (match_operand:VMDQI 2 "s_register_operand" "w")
2337 (match_operand:VMDQI 3 "s_register_operand" "w")]
2340 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2341 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2344 (define_insn "neon_vqdmlal<mode>"
2345 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2346 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2347 (match_operand:VMDI 2 "s_register_operand" "w")
2348 (match_operand:VMDI 3 "s_register_operand" "w")]
2351 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2352 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2355 (define_insn "neon_vqdmlsl<mode>"
2356 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2357 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2358 (match_operand:VMDI 2 "s_register_operand" "w")
2359 (match_operand:VMDI 3 "s_register_operand" "w")]
2362 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2363 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2366 (define_insn "neon_vmull<sup><mode>"
2367 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2368 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2369 (match_operand:VW 2 "s_register_operand" "w")]
2372 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2373 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2376 (define_insn "neon_vqdmull<mode>"
2377 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2378 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2379 (match_operand:VMDI 2 "s_register_operand" "w")]
2382 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2383 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2386 (define_expand "neon_vsub<mode>"
2387 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2388 (match_operand:VCVTF 1 "s_register_operand" "w")
2389 (match_operand:VCVTF 2 "s_register_operand" "w")]
2392 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2393 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2395 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2400 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2402 (define_insn "neon_vsub<mode>_unspec"
2403 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2404 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2405 (match_operand:VCVTF 2 "s_register_operand" "w")]
2408 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2410 (if_then_else (match_test "<Is_float_mode>")
2411 (const_string "neon_fp_addsub_s<q>")
2412 (const_string "neon_sub<q>")))]
2415 (define_insn "neon_vsubl<sup><mode>"
2416 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2417 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2418 (match_operand:VDI 2 "s_register_operand" "w")]
2421 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2422 [(set_attr "type" "neon_sub_long")]
2425 (define_insn "neon_vsubw<sup><mode>"
2426 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2427 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2428 (match_operand:VDI 2 "s_register_operand" "w")]
2431 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2432 [(set_attr "type" "neon_sub_widen")]
2435 (define_insn "neon_vqsub<sup><mode>"
2436 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2437 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2438 (match_operand:VDQIX 2 "s_register_operand" "w")]
2441 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2442 [(set_attr "type" "neon_qsub<q>")]
2445 (define_insn "neon_vhsub<sup><mode>"
2446 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2447 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2448 (match_operand:VDQIW 2 "s_register_operand" "w")]
2451 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2452 [(set_attr "type" "neon_sub_halve<q>")]
2455 (define_insn "neon_v<r>subhn<mode>"
2456 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2457 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2458 (match_operand:VN 2 "s_register_operand" "w")]
2461 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2462 [(set_attr "type" "neon_sub_halve_narrow_q")]
2465 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2466 ;; without unsafe math optimizations.
2467 (define_expand "neon_vc<cmp_op><mode>"
2468 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2470 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2471 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2474 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2476 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2477 && !flag_unsafe_math_optimizations)
2479 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2480 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2481 whereas this expander iterates over the integer modes as well,
2482 but we will never expand to UNSPECs for the integer comparisons. */
2486 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2491 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2500 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2507 (define_insn "neon_vc<cmp_op><mode>_insn"
2508 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2510 (COMPARISONS:<V_cmp_result>
2511 (match_operand:VDQW 1 "s_register_operand" "w,w")
2512 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2513 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2514 && !flag_unsafe_math_optimizations)"
2517 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2519 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2520 ? "f" : "<cmp_type>",
2521 which_alternative == 0
2522 ? "%<V_reg>2" : "#0");
2523 output_asm_insn (pattern, operands);
2527 (if_then_else (match_operand 2 "zero_operand")
2528 (const_string "neon_compare_zero<q>")
2529 (const_string "neon_compare<q>")))]
2532 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2533 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2534 (unspec:<V_cmp_result>
2535 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2536 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2541 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2543 which_alternative == 0
2544 ? "%<V_reg>2" : "#0");
2545 output_asm_insn (pattern, operands);
2548 [(set_attr "type" "neon_fp_compare_s<q>")]
2551 (define_expand "neon_vc<cmp_op><mode>"
2552 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2555 (match_operand:VH 1 "s_register_operand")
2556 (match_operand:VH 2 "reg_or_zero_operand")))]
2557 "TARGET_NEON_FP16INST"
2559 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2561 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2562 && !flag_unsafe_math_optimizations)
2564 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2565 (operands[0], operands[1], operands[2]));
2568 (gen_neon_vc<cmp_op><mode>_fp16insn
2569 (operands[0], operands[1], operands[2]));
2573 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
2574 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2576 (COMPARISONS:<V_cmp_result>
2577 (match_operand:VH 1 "s_register_operand" "w,w")
2578 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2579 "TARGET_NEON_FP16INST
2580 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2581 && !flag_unsafe_math_optimizations)"
2584 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2586 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2587 ? "f" : "<cmp_type>",
2588 which_alternative == 0
2589 ? "%<V_reg>2" : "#0");
2590 output_asm_insn (pattern, operands);
2594 (if_then_else (match_operand 2 "zero_operand")
2595 (const_string "neon_compare_zero<q>")
2596 (const_string "neon_compare<q>")))])
2598 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2600 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2601 (unspec:<V_cmp_result>
2602 [(match_operand:VH 1 "s_register_operand" "w,w")
2603 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2605 "TARGET_NEON_FP16INST"
2608 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2610 which_alternative == 0
2611 ? "%<V_reg>2" : "#0");
2612 output_asm_insn (pattern, operands);
2615 [(set_attr "type" "neon_fp_compare_s<q>")])
2617 (define_insn "neon_vc<cmp_op>u<mode>"
2618 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2620 (GTUGEU:<V_cmp_result>
2621 (match_operand:VDQIW 1 "s_register_operand" "w")
2622 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2624 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2625 [(set_attr "type" "neon_compare<q>")]
2628 (define_expand "neon_vca<cmp_op><mode>"
2629 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2631 (GTGE:<V_cmp_result>
2632 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2633 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2636 if (flag_unsafe_math_optimizations)
2637 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2640 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2647 (define_insn "neon_vca<cmp_op><mode>_insn"
2648 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2650 (GTGE:<V_cmp_result>
2651 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2652 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2653 "TARGET_NEON && flag_unsafe_math_optimizations"
2654 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2655 [(set_attr "type" "neon_fp_compare_s<q>")]
2658 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2659 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2660 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2661 (match_operand:VCVTF 2 "s_register_operand" "w")]
2664 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2665 [(set_attr "type" "neon_fp_compare_s<q>")]
2668 (define_expand "neon_vca<cmp_op><mode>"
2670 (match_operand:<V_cmp_result> 0 "s_register_operand")
2672 (GLTE:<V_cmp_result>
2673 (abs:VH (match_operand:VH 1 "s_register_operand"))
2674 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2675 "TARGET_NEON_FP16INST"
2677 if (flag_unsafe_math_optimizations)
2678 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2679 (operands[0], operands[1], operands[2]));
2681 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2682 (operands[0], operands[1], operands[2]));
2686 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
2688 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2690 (GLTE:<V_cmp_result>
2691 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2692 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2693 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2694 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2695 [(set_attr "type" "neon_fp_compare_s<q>")]
2698 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2699 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2700 (unspec:<V_cmp_result>
2701 [(match_operand:VH 1 "s_register_operand" "w")
2702 (match_operand:VH 2 "s_register_operand" "w")]
2705 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2706 [(set_attr "type" "neon_fp_compare_s<q>")]
2709 (define_expand "neon_vc<cmp_op>z<mode>"
2711 (match_operand:<V_cmp_result> 0 "s_register_operand")
2712 (COMPARISONS:<V_cmp_result>
2713 (match_operand:VH 1 "s_register_operand")
2715 "TARGET_NEON_FP16INST"
2717 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2718 CONST0_RTX (<MODE>mode)));
2722 (define_insn "neon_vtst<mode>"
2723 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2724 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2725 (match_operand:VDQIW 2 "s_register_operand" "w")]
2728 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2729 [(set_attr "type" "neon_tst<q>")]
2732 (define_insn "neon_vabd<sup><mode>"
2733 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2734 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2735 (match_operand:VDQIW 2 "s_register_operand" "w")]
2738 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2739 [(set_attr "type" "neon_abd<q>")]
2742 (define_insn "neon_vabd<mode>"
2743 [(set (match_operand:VH 0 "s_register_operand" "=w")
2744 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2745 (match_operand:VH 2 "s_register_operand" "w")]
2747 "TARGET_NEON_FP16INST"
2748 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2749 [(set_attr "type" "neon_abd<q>")]
2752 (define_insn "neon_vabdf<mode>"
2753 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2754 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2755 (match_operand:VCVTF 2 "s_register_operand" "w")]
2758 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2759 [(set_attr "type" "neon_fp_abd_s<q>")]
2762 (define_insn "neon_vabdl<sup><mode>"
2763 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2764 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2765 (match_operand:VW 2 "s_register_operand" "w")]
2768 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2769 [(set_attr "type" "neon_abd_long")]
2772 (define_insn "neon_vaba<sup><mode>"
2773 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2774 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2775 (match_operand:VDQIW 3 "s_register_operand" "w")]
2777 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2779 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2780 [(set_attr "type" "neon_arith_acc<q>")]
2783 (define_insn "neon_vabal<sup><mode>"
2784 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2785 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2786 (match_operand:VW 3 "s_register_operand" "w")]
2788 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2790 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2791 [(set_attr "type" "neon_arith_acc<q>")]
2794 (define_insn "neon_v<maxmin><sup><mode>"
2795 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2796 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2797 (match_operand:VDQIW 2 "s_register_operand" "w")]
2800 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2801 [(set_attr "type" "neon_minmax<q>")]
2804 (define_insn "neon_v<maxmin>f<mode>"
2805 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2806 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2807 (match_operand:VCVTF 2 "s_register_operand" "w")]
2810 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2811 [(set_attr "type" "neon_fp_minmax_s<q>")]
2814 (define_insn "neon_v<maxmin>f<mode>"
2815 [(set (match_operand:VH 0 "s_register_operand" "=w")
2817 [(match_operand:VH 1 "s_register_operand" "w")
2818 (match_operand:VH 2 "s_register_operand" "w")]
2820 "TARGET_NEON_FP16INST"
2821 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2822 [(set_attr "type" "neon_fp_minmax_s<q>")]
2825 (define_insn "neon_vp<maxmin>fv4hf"
2826 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2828 [(match_operand:V4HF 1 "s_register_operand" "w")
2829 (match_operand:V4HF 2 "s_register_operand" "w")]
2831 "TARGET_NEON_FP16INST"
2832 "vp<maxmin>.f16\t%P0, %P1, %P2"
2833 [(set_attr "type" "neon_reduc_minmax")]
2836 (define_insn "neon_<fmaxmin_op><mode>"
2838 (match_operand:VH 0 "s_register_operand" "=w")
2840 [(match_operand:VH 1 "s_register_operand" "w")
2841 (match_operand:VH 2 "s_register_operand" "w")]
2843 "TARGET_NEON_FP16INST"
2844 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2845 [(set_attr "type" "neon_fp_minmax_s<q>")]
2848 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2849 (define_insn "<fmaxmin><mode>3"
2850 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2851 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2852 (match_operand:VCVTF 2 "s_register_operand" "w")]
2854 "TARGET_NEON && TARGET_FPU_ARMV8"
2855 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2856 [(set_attr "type" "neon_fp_minmax_s<q>")]
2859 (define_expand "neon_vpadd<mode>"
2860 [(match_operand:VD 0 "s_register_operand" "=w")
2861 (match_operand:VD 1 "s_register_operand" "w")
2862 (match_operand:VD 2 "s_register_operand" "w")]
2865 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2870 (define_insn "neon_vpaddl<sup><mode>"
2871 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2872 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2875 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2876 [(set_attr "type" "neon_reduc_add_long")]
2879 (define_insn "neon_vpadal<sup><mode>"
2880 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2881 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2882 (match_operand:VDQIW 2 "s_register_operand" "w")]
2885 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2886 [(set_attr "type" "neon_reduc_add_acc")]
2889 (define_insn "neon_vp<maxmin><sup><mode>"
2890 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2891 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2892 (match_operand:VDI 2 "s_register_operand" "w")]
2895 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2896 [(set_attr "type" "neon_reduc_minmax<q>")]
2899 (define_insn "neon_vp<maxmin>f<mode>"
2900 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2901 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2902 (match_operand:VCVTF 2 "s_register_operand" "w")]
2905 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2906 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2909 (define_insn "neon_vrecps<mode>"
2910 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2911 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2912 (match_operand:VCVTF 2 "s_register_operand" "w")]
2915 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2916 [(set_attr "type" "neon_fp_recps_s<q>")]
2919 (define_insn "neon_vrecps<mode>"
2921 (match_operand:VH 0 "s_register_operand" "=w")
2922 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2923 (match_operand:VH 2 "s_register_operand" "w")]
2925 "TARGET_NEON_FP16INST"
2926 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2927 [(set_attr "type" "neon_fp_recps_s<q>")]
2930 (define_insn "neon_vrsqrts<mode>"
2931 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2932 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2933 (match_operand:VCVTF 2 "s_register_operand" "w")]
2936 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2937 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2940 (define_insn "neon_vrsqrts<mode>"
2942 (match_operand:VH 0 "s_register_operand" "=w")
2943 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2944 (match_operand:VH 2 "s_register_operand" "w")]
2946 "TARGET_NEON_FP16INST"
2947 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2948 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2951 (define_expand "neon_vabs<mode>"
2952 [(match_operand:VDQW 0 "s_register_operand" "")
2953 (match_operand:VDQW 1 "s_register_operand" "")]
2956 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2960 (define_insn "neon_vqabs<mode>"
2961 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2962 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2965 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2966 [(set_attr "type" "neon_qabs<q>")]
2969 (define_insn "neon_bswap<mode>"
2970 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2971 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2973 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2974 [(set_attr "type" "neon_rev<q>")]
2977 (define_expand "neon_vneg<mode>"
2978 [(match_operand:VDQW 0 "s_register_operand" "")
2979 (match_operand:VDQW 1 "s_register_operand" "")]
2982 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2986 (define_expand "neon_copysignf<mode>"
2987 [(match_operand:VCVTF 0 "register_operand")
2988 (match_operand:VCVTF 1 "register_operand")
2989 (match_operand:VCVTF 2 "register_operand")]
2993 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2994 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2995 rtvec v = rtvec_alloc (n_elt);
2997 /* Create bitmask for vector select. */
2998 for (i = 0; i < n_elt; ++i)
2999 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
3001 emit_move_insn (v_bitmask,
3002 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
3003 emit_move_insn (operands[0], operands[2]);
3004 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3005 <VCVTF:V_cmp_result>mode, 0);
3006 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3013 (define_insn "neon_vqneg<mode>"
3014 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3015 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3018 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3019 [(set_attr "type" "neon_qneg<q>")]
3022 (define_insn "neon_vcls<mode>"
3023 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3024 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3027 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3028 [(set_attr "type" "neon_cls<q>")]
3031 (define_insn "clz<mode>2"
3032 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3033 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3035 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3036 [(set_attr "type" "neon_cnt<q>")]
3039 (define_expand "neon_vclz<mode>"
3040 [(match_operand:VDQIW 0 "s_register_operand" "")
3041 (match_operand:VDQIW 1 "s_register_operand" "")]
3044 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3048 (define_insn "popcount<mode>2"
3049 [(set (match_operand:VE 0 "s_register_operand" "=w")
3050 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3052 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3053 [(set_attr "type" "neon_cnt<q>")]
3056 (define_expand "neon_vcnt<mode>"
3057 [(match_operand:VE 0 "s_register_operand" "=w")
3058 (match_operand:VE 1 "s_register_operand" "w")]
3061 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3065 (define_insn "neon_vrecpe<mode>"
3066 [(set (match_operand:VH 0 "s_register_operand" "=w")
3067 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3069 "TARGET_NEON_FP16INST"
3070 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3071 [(set_attr "type" "neon_fp_recpe_s<q>")]
3074 (define_insn "neon_vrecpe<mode>"
3075 [(set (match_operand:V32 0 "s_register_operand" "=w")
3076 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3079 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3080 [(set_attr "type" "neon_fp_recpe_s<q>")]
3083 (define_insn "neon_vrsqrte<mode>"
3084 [(set (match_operand:V32 0 "s_register_operand" "=w")
3085 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3088 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3089 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3092 (define_expand "neon_vmvn<mode>"
3093 [(match_operand:VDQIW 0 "s_register_operand" "")
3094 (match_operand:VDQIW 1 "s_register_operand" "")]
3097 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3101 (define_insn "neon_vget_lane<mode>_sext_internal"
3102 [(set (match_operand:SI 0 "s_register_operand" "=r")
3104 (vec_select:<V_elem>
3105 (match_operand:VD 1 "s_register_operand" "w")
3106 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3109 if (BYTES_BIG_ENDIAN)
3111 int elt = INTVAL (operands[2]);
3112 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3113 operands[2] = GEN_INT (elt);
3115 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3117 [(set_attr "type" "neon_to_gp")]
3120 (define_insn "neon_vget_lane<mode>_zext_internal"
3121 [(set (match_operand:SI 0 "s_register_operand" "=r")
3123 (vec_select:<V_elem>
3124 (match_operand:VD 1 "s_register_operand" "w")
3125 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3128 if (BYTES_BIG_ENDIAN)
3130 int elt = INTVAL (operands[2]);
3131 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3132 operands[2] = GEN_INT (elt);
3134 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3136 [(set_attr "type" "neon_to_gp")]
3139 (define_insn "neon_vget_lane<mode>_sext_internal"
3140 [(set (match_operand:SI 0 "s_register_operand" "=r")
3142 (vec_select:<V_elem>
3143 (match_operand:VQ2 1 "s_register_operand" "w")
3144 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3148 int regno = REGNO (operands[1]);
3149 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3150 unsigned int elt = INTVAL (operands[2]);
3151 unsigned int elt_adj = elt % halfelts;
3153 if (BYTES_BIG_ENDIAN)
3154 elt_adj = halfelts - 1 - elt_adj;
3156 ops[0] = operands[0];
3157 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3158 ops[2] = GEN_INT (elt_adj);
3159 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3163 [(set_attr "type" "neon_to_gp_q")]
3166 (define_insn "neon_vget_lane<mode>_zext_internal"
3167 [(set (match_operand:SI 0 "s_register_operand" "=r")
3169 (vec_select:<V_elem>
3170 (match_operand:VQ2 1 "s_register_operand" "w")
3171 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3175 int regno = REGNO (operands[1]);
3176 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3177 unsigned int elt = INTVAL (operands[2]);
3178 unsigned int elt_adj = elt % halfelts;
3180 if (BYTES_BIG_ENDIAN)
3181 elt_adj = halfelts - 1 - elt_adj;
3183 ops[0] = operands[0];
3184 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3185 ops[2] = GEN_INT (elt_adj);
3186 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3190 [(set_attr "type" "neon_to_gp_q")]
3193 (define_expand "neon_vget_lane<mode>"
3194 [(match_operand:<V_ext> 0 "s_register_operand" "")
3195 (match_operand:VDQW 1 "s_register_operand" "")
3196 (match_operand:SI 2 "immediate_operand" "")]
3199 if (BYTES_BIG_ENDIAN)
3201 /* The intrinsics are defined in terms of a model where the
3202 element ordering in memory is vldm order, whereas the generic
3203 RTL is defined in terms of a model where the element ordering
3204 in memory is array order. Convert the lane number to conform
3206 unsigned int elt = INTVAL (operands[2]);
3207 unsigned int reg_nelts
3208 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3209 elt ^= reg_nelts - 1;
3210 operands[2] = GEN_INT (elt);
3213 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3214 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
3216 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3222 (define_expand "neon_vget_laneu<mode>"
3223 [(match_operand:<V_ext> 0 "s_register_operand" "")
3224 (match_operand:VDQIW 1 "s_register_operand" "")
3225 (match_operand:SI 2 "immediate_operand" "")]
3228 if (BYTES_BIG_ENDIAN)
3230 /* The intrinsics are defined in terms of a model where the
3231 element ordering in memory is vldm order, whereas the generic
3232 RTL is defined in terms of a model where the element ordering
3233 in memory is array order. Convert the lane number to conform
3235 unsigned int elt = INTVAL (operands[2]);
3236 unsigned int reg_nelts
3237 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3238 elt ^= reg_nelts - 1;
3239 operands[2] = GEN_INT (elt);
3242 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3243 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
3245 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3251 (define_expand "neon_vget_lanedi"
3252 [(match_operand:DI 0 "s_register_operand" "=r")
3253 (match_operand:DI 1 "s_register_operand" "w")
3254 (match_operand:SI 2 "immediate_operand" "")]
3257 emit_move_insn (operands[0], operands[1]);
3261 (define_expand "neon_vget_lanev2di"
3262 [(match_operand:DI 0 "s_register_operand" "")
3263 (match_operand:V2DI 1 "s_register_operand" "")
3264 (match_operand:SI 2 "immediate_operand" "")]
3269 if (BYTES_BIG_ENDIAN)
3271 /* The intrinsics are defined in terms of a model where the
3272 element ordering in memory is vldm order, whereas the generic
3273 RTL is defined in terms of a model where the element ordering
3274 in memory is array order. Convert the lane number to conform
3276 unsigned int elt = INTVAL (operands[2]);
3277 unsigned int reg_nelts = 2;
3278 elt ^= reg_nelts - 1;
3279 operands[2] = GEN_INT (elt);
3282 lane = INTVAL (operands[2]);
3283 gcc_assert ((lane ==0) || (lane == 1));
3284 emit_move_insn (operands[0], lane == 0
3285 ? gen_lowpart (DImode, operands[1])
3286 : gen_highpart (DImode, operands[1]));
3290 (define_expand "neon_vset_lane<mode>"
3291 [(match_operand:VDQ 0 "s_register_operand" "=w")
3292 (match_operand:<V_elem> 1 "s_register_operand" "r")
3293 (match_operand:VDQ 2 "s_register_operand" "0")
3294 (match_operand:SI 3 "immediate_operand" "i")]
3297 unsigned int elt = INTVAL (operands[3]);
3299 if (BYTES_BIG_ENDIAN)
3301 unsigned int reg_nelts
3302 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3303 elt ^= reg_nelts - 1;
3306 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3307 GEN_INT (1 << elt), operands[2]));
3311 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3313 (define_expand "neon_vset_lanedi"
3314 [(match_operand:DI 0 "s_register_operand" "=w")
3315 (match_operand:DI 1 "s_register_operand" "r")
3316 (match_operand:DI 2 "s_register_operand" "0")
3317 (match_operand:SI 3 "immediate_operand" "i")]
3320 emit_move_insn (operands[0], operands[1]);
3324 (define_expand "neon_vcreate<mode>"
3325 [(match_operand:VD_RE 0 "s_register_operand" "")
3326 (match_operand:DI 1 "general_operand" "")]
3329 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3330 emit_move_insn (operands[0], src);
3334 (define_insn "neon_vdup_n<mode>"
3335 [(set (match_operand:VX 0 "s_register_operand" "=w")
3336 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3338 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3339 [(set_attr "type" "neon_from_gp<q>")]
3342 (define_insn "neon_vdup_nv4hf"
3343 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3344 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3347 [(set_attr "type" "neon_from_gp")]
3350 (define_insn "neon_vdup_nv8hf"
3351 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3352 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3355 [(set_attr "type" "neon_from_gp_q")]
3358 (define_insn "neon_vdup_n<mode>"
3359 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3360 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3363 vdup.<V_sz_elem>\t%<V_reg>0, %1
3364 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3365 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3368 (define_expand "neon_vdup_ndi"
3369 [(match_operand:DI 0 "s_register_operand" "=w")
3370 (match_operand:DI 1 "s_register_operand" "r")]
3373 emit_move_insn (operands[0], operands[1]);
3378 (define_insn "neon_vdup_nv2di"
3379 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3380 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3383 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3384 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3385 [(set_attr "length" "8")
3386 (set_attr "type" "multiple")]
3389 (define_insn "neon_vdup_lane<mode>_internal"
3390 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3392 (vec_select:<V_elem>
3393 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3394 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3397 if (BYTES_BIG_ENDIAN)
3399 int elt = INTVAL (operands[2]);
3400 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3401 operands[2] = GEN_INT (elt);
3404 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3406 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3408 [(set_attr "type" "neon_dup<q>")]
3411 (define_insn "neon_vdup_lane<mode>_internal"
3412 [(set (match_operand:VH 0 "s_register_operand" "=w")
3414 (vec_select:<V_elem>
3415 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3416 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3417 "TARGET_NEON && TARGET_FP16"
3419 if (BYTES_BIG_ENDIAN)
3421 int elt = INTVAL (operands[2]);
3422 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3423 operands[2] = GEN_INT (elt);
3426 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3428 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3430 [(set_attr "type" "neon_dup<q>")]
3433 (define_expand "neon_vdup_lane<mode>"
3434 [(match_operand:VDQW 0 "s_register_operand" "=w")
3435 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3436 (match_operand:SI 2 "immediate_operand" "i")]
3439 if (BYTES_BIG_ENDIAN)
3441 unsigned int elt = INTVAL (operands[2]);
3442 unsigned int reg_nelts
3443 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3444 elt ^= reg_nelts - 1;
3445 operands[2] = GEN_INT (elt);
3447 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3452 (define_expand "neon_vdup_lane<mode>"
3453 [(match_operand:VH 0 "s_register_operand")
3454 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3455 (match_operand:SI 2 "immediate_operand")]
3456 "TARGET_NEON && TARGET_FP16"
3458 if (BYTES_BIG_ENDIAN)
3460 unsigned int elt = INTVAL (operands[2]);
3461 unsigned int reg_nelts
3462 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3463 elt ^= reg_nelts - 1;
3464 operands[2] = GEN_INT (elt);
3466 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3471 ; Scalar index is ignored, since only zero is valid here.
3472 (define_expand "neon_vdup_lanedi"
3473 [(match_operand:DI 0 "s_register_operand" "=w")
3474 (match_operand:DI 1 "s_register_operand" "w")
3475 (match_operand:SI 2 "immediate_operand" "i")]
3478 emit_move_insn (operands[0], operands[1]);
3482 ; Likewise for v2di, as the DImode second operand has only a single element.
3483 (define_expand "neon_vdup_lanev2di"
3484 [(match_operand:V2DI 0 "s_register_operand" "=w")
3485 (match_operand:DI 1 "s_register_operand" "w")
3486 (match_operand:SI 2 "immediate_operand" "i")]
3489 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3493 ; Disabled before reload because we don't want combine doing something silly,
3494 ; but used by the post-reload expansion of neon_vcombine.
3495 (define_insn "*neon_vswp<mode>"
3496 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3497 (match_operand:VDQX 1 "s_register_operand" "+w"))
3498 (set (match_dup 1) (match_dup 0))]
3499 "TARGET_NEON && reload_completed"
3500 "vswp\t%<V_reg>0, %<V_reg>1"
3501 [(set_attr "type" "neon_permute<q>")]
3504 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3506 ;; FIXME: A different implementation of this builtin could make it much
3507 ;; more likely that we wouldn't actually need to output anything (we could make
3508 ;; it so that the reg allocator puts things in the right places magically
3509 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3511 (define_insn_and_split "neon_vcombine<mode>"
3512 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3513 (vec_concat:<V_DOUBLE>
3514 (match_operand:VDX 1 "s_register_operand" "w")
3515 (match_operand:VDX 2 "s_register_operand" "w")))]
3518 "&& reload_completed"
3521 neon_split_vcombine (operands);
3524 [(set_attr "type" "multiple")]
3527 (define_expand "neon_vget_high<mode>"
3528 [(match_operand:<V_HALF> 0 "s_register_operand")
3529 (match_operand:VQX 1 "s_register_operand")]
3532 emit_move_insn (operands[0],
3533 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3534 GET_MODE_SIZE (<V_HALF>mode)));
3538 (define_expand "neon_vget_low<mode>"
3539 [(match_operand:<V_HALF> 0 "s_register_operand")
3540 (match_operand:VQX 1 "s_register_operand")]
3543 emit_move_insn (operands[0],
3544 simplify_gen_subreg (<V_HALF>mode, operands[1],
3549 (define_insn "float<mode><V_cvtto>2"
3550 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3551 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3552 "TARGET_NEON && !flag_rounding_math"
3553 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3554 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3557 (define_insn "floatuns<mode><V_cvtto>2"
3558 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3559 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3560 "TARGET_NEON && !flag_rounding_math"
3561 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3562 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3565 (define_insn "fix_trunc<mode><V_cvtto>2"
3566 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3567 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3569 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3570 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3573 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3574 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3575 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3577 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3578 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3581 (define_insn "neon_vcvt<sup><mode>"
3582 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3583 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3586 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3587 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3590 (define_insn "neon_vcvt<sup><mode>"
3591 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3592 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3595 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3596 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3599 (define_insn "neon_vcvtv4sfv4hf"
3600 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3601 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3603 "TARGET_NEON && TARGET_FP16"
3604 "vcvt.f32.f16\t%q0, %P1"
3605 [(set_attr "type" "neon_fp_cvt_widen_h")]
3608 (define_insn "neon_vcvtv4hfv4sf"
3609 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3610 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3612 "TARGET_NEON && TARGET_FP16"
3613 "vcvt.f16.f32\t%P0, %q1"
3614 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3617 (define_insn "neon_vcvt<sup><mode>"
3619 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3621 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
3623 "TARGET_NEON_FP16INST"
3624 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
3625 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3628 (define_insn "neon_vcvt<sup><mode>"
3630 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3632 [(match_operand:VH 1 "s_register_operand" "w")]
3634 "TARGET_NEON_FP16INST"
3635 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3636 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3639 (define_insn "neon_vcvt<sup>_n<mode>"
3640 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3641 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3642 (match_operand:SI 2 "immediate_operand" "i")]
3646 neon_const_bounds (operands[2], 1, 33);
3647 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3649 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3652 (define_insn "neon_vcvt<sup>_n<mode>"
3653 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3655 [(match_operand:VH 1 "s_register_operand" "w")
3656 (match_operand:SI 2 "immediate_operand" "i")]
3658 "TARGET_NEON_FP16INST"
3660 neon_const_bounds (operands[2], 0, 17);
3661 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
3663 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3666 (define_insn "neon_vcvt<sup>_n<mode>"
3667 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3668 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3669 (match_operand:SI 2 "immediate_operand" "i")]
3673 neon_const_bounds (operands[2], 1, 33);
3674 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3676 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3679 (define_insn "neon_vcvt<sup>_n<mode>"
3680 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3682 [(match_operand:VCVTHI 1 "s_register_operand" "w")
3683 (match_operand:SI 2 "immediate_operand" "i")]
3685 "TARGET_NEON_FP16INST"
3687 neon_const_bounds (operands[2], 0, 17);
3688 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
3690 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3693 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
3695 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3697 [(match_operand:VH 1 "s_register_operand" "w")]
3699 "TARGET_NEON_FP16INST"
3700 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3701 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3704 (define_insn "neon_vmovn<mode>"
3705 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3706 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3709 "vmovn.<V_if_elem>\t%P0, %q1"
3710 [(set_attr "type" "neon_shift_imm_narrow_q")]
3713 (define_insn "neon_vqmovn<sup><mode>"
3714 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3715 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3718 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3719 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3722 (define_insn "neon_vqmovun<mode>"
3723 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3724 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3727 "vqmovun.<V_s_elem>\t%P0, %q1"
3728 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3731 (define_insn "neon_vmovl<sup><mode>"
3732 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3733 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3736 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3737 [(set_attr "type" "neon_shift_imm_long")]
3740 (define_insn "neon_vmul_lane<mode>"
3741 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3742 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3743 (match_operand:VMD 2 "s_register_operand"
3744 "<scalar_mul_constraint>")
3745 (match_operand:SI 3 "immediate_operand" "i")]
3749 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3752 (if_then_else (match_test "<Is_float_mode>")
3753 (const_string "neon_fp_mul_s_scalar<q>")
3754 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3757 (define_insn "neon_vmul_lane<mode>"
3758 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3759 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3760 (match_operand:<V_HALF> 2 "s_register_operand"
3761 "<scalar_mul_constraint>")
3762 (match_operand:SI 3 "immediate_operand" "i")]
3766 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3769 (if_then_else (match_test "<Is_float_mode>")
3770 (const_string "neon_fp_mul_s_scalar<q>")
3771 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3774 (define_insn "neon_vmul_lane<mode>"
3775 [(set (match_operand:VH 0 "s_register_operand" "=w")
3776 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3777 (match_operand:V4HF 2 "s_register_operand"
3778 "<scalar_mul_constraint>")
3779 (match_operand:SI 3 "immediate_operand" "i")]
3781 "TARGET_NEON_FP16INST"
3782 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
3783 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
3786 (define_insn "neon_vmull<sup>_lane<mode>"
3787 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3788 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3789 (match_operand:VMDI 2 "s_register_operand"
3790 "<scalar_mul_constraint>")
3791 (match_operand:SI 3 "immediate_operand" "i")]
3795 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3797 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3800 (define_insn "neon_vqdmull_lane<mode>"
3801 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3802 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3803 (match_operand:VMDI 2 "s_register_operand"
3804 "<scalar_mul_constraint>")
3805 (match_operand:SI 3 "immediate_operand" "i")]
3806 UNSPEC_VQDMULL_LANE))]
3809 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3811 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3814 (define_insn "neon_vq<r>dmulh_lane<mode>"
3815 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3816 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3817 (match_operand:<V_HALF> 2 "s_register_operand"
3818 "<scalar_mul_constraint>")
3819 (match_operand:SI 3 "immediate_operand" "i")]
3823 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3825 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3828 (define_insn "neon_vq<r>dmulh_lane<mode>"
3829 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3830 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3831 (match_operand:VMDI 2 "s_register_operand"
3832 "<scalar_mul_constraint>")
3833 (match_operand:SI 3 "immediate_operand" "i")]
3837 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3839 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3842 ;; vqrdmlah_lane, vqrdmlsh_lane
3843 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3844 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3845 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3846 (match_operand:VMQI 2 "s_register_operand" "w")
3847 (match_operand:<V_HALF> 3 "s_register_operand"
3848 "<scalar_mul_constraint>")
3849 (match_operand:SI 4 "immediate_operand" "i")]
3854 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3856 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3859 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3860 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3861 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3862 (match_operand:VMDI 2 "s_register_operand" "w")
3863 (match_operand:VMDI 3 "s_register_operand"
3864 "<scalar_mul_constraint>")
3865 (match_operand:SI 4 "immediate_operand" "i")]
3870 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3872 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3875 (define_insn "neon_vmla_lane<mode>"
3876 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3877 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3878 (match_operand:VMD 2 "s_register_operand" "w")
3879 (match_operand:VMD 3 "s_register_operand"
3880 "<scalar_mul_constraint>")
3881 (match_operand:SI 4 "immediate_operand" "i")]
3885 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3888 (if_then_else (match_test "<Is_float_mode>")
3889 (const_string "neon_fp_mla_s_scalar<q>")
3890 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3893 (define_insn "neon_vmla_lane<mode>"
3894 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3895 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3896 (match_operand:VMQ 2 "s_register_operand" "w")
3897 (match_operand:<V_HALF> 3 "s_register_operand"
3898 "<scalar_mul_constraint>")
3899 (match_operand:SI 4 "immediate_operand" "i")]
3903 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3906 (if_then_else (match_test "<Is_float_mode>")
3907 (const_string "neon_fp_mla_s_scalar<q>")
3908 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3911 (define_insn "neon_vmlal<sup>_lane<mode>"
3912 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3913 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3914 (match_operand:VMDI 2 "s_register_operand" "w")
3915 (match_operand:VMDI 3 "s_register_operand"
3916 "<scalar_mul_constraint>")
3917 (match_operand:SI 4 "immediate_operand" "i")]
3921 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3923 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3926 (define_insn "neon_vqdmlal_lane<mode>"
3927 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3928 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3929 (match_operand:VMDI 2 "s_register_operand" "w")
3930 (match_operand:VMDI 3 "s_register_operand"
3931 "<scalar_mul_constraint>")
3932 (match_operand:SI 4 "immediate_operand" "i")]
3933 UNSPEC_VQDMLAL_LANE))]
3936 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3938 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3941 (define_insn "neon_vmls_lane<mode>"
3942 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3943 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3944 (match_operand:VMD 2 "s_register_operand" "w")
3945 (match_operand:VMD 3 "s_register_operand"
3946 "<scalar_mul_constraint>")
3947 (match_operand:SI 4 "immediate_operand" "i")]
3951 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3954 (if_then_else (match_test "<Is_float_mode>")
3955 (const_string "neon_fp_mla_s_scalar<q>")
3956 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3959 (define_insn "neon_vmls_lane<mode>"
3960 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3961 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3962 (match_operand:VMQ 2 "s_register_operand" "w")
3963 (match_operand:<V_HALF> 3 "s_register_operand"
3964 "<scalar_mul_constraint>")
3965 (match_operand:SI 4 "immediate_operand" "i")]
3969 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3972 (if_then_else (match_test "<Is_float_mode>")
3973 (const_string "neon_fp_mla_s_scalar<q>")
3974 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3977 (define_insn "neon_vmlsl<sup>_lane<mode>"
3978 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3979 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3980 (match_operand:VMDI 2 "s_register_operand" "w")
3981 (match_operand:VMDI 3 "s_register_operand"
3982 "<scalar_mul_constraint>")
3983 (match_operand:SI 4 "immediate_operand" "i")]
3987 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3989 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3992 (define_insn "neon_vqdmlsl_lane<mode>"
3993 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3994 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3995 (match_operand:VMDI 2 "s_register_operand" "w")
3996 (match_operand:VMDI 3 "s_register_operand"
3997 "<scalar_mul_constraint>")
3998 (match_operand:SI 4 "immediate_operand" "i")]
3999 UNSPEC_VQDMLSL_LANE))]
4002 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4004 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4007 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4008 ; core register into a temp register, then use a scalar taken from that. This
4009 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4010 ; or extracted from another vector. The latter case it's currently better to
4011 ; use the "_lane" variant, and the former case can probably be implemented
4012 ; using vld1_lane, but that hasn't been done yet.
4014 (define_expand "neon_vmul_n<mode>"
4015 [(match_operand:VMD 0 "s_register_operand" "")
4016 (match_operand:VMD 1 "s_register_operand" "")
4017 (match_operand:<V_elem> 2 "s_register_operand" "")]
4020 rtx tmp = gen_reg_rtx (<MODE>mode);
4021 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4022 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4027 (define_expand "neon_vmul_n<mode>"
4028 [(match_operand:VMQ 0 "s_register_operand" "")
4029 (match_operand:VMQ 1 "s_register_operand" "")
4030 (match_operand:<V_elem> 2 "s_register_operand" "")]
4033 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4034 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4035 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4040 (define_expand "neon_vmul_n<mode>"
4041 [(match_operand:VH 0 "s_register_operand")
4042 (match_operand:VH 1 "s_register_operand")
4043 (match_operand:<V_elem> 2 "s_register_operand")]
4044 "TARGET_NEON_FP16INST"
4046 rtx tmp = gen_reg_rtx (V4HFmode);
4047 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4048 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4053 (define_expand "neon_vmulls_n<mode>"
4054 [(match_operand:<V_widen> 0 "s_register_operand" "")
4055 (match_operand:VMDI 1 "s_register_operand" "")
4056 (match_operand:<V_elem> 2 "s_register_operand" "")]
4059 rtx tmp = gen_reg_rtx (<MODE>mode);
4060 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4061 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4066 (define_expand "neon_vmullu_n<mode>"
4067 [(match_operand:<V_widen> 0 "s_register_operand" "")
4068 (match_operand:VMDI 1 "s_register_operand" "")
4069 (match_operand:<V_elem> 2 "s_register_operand" "")]
4072 rtx tmp = gen_reg_rtx (<MODE>mode);
4073 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4074 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4079 (define_expand "neon_vqdmull_n<mode>"
4080 [(match_operand:<V_widen> 0 "s_register_operand" "")
4081 (match_operand:VMDI 1 "s_register_operand" "")
4082 (match_operand:<V_elem> 2 "s_register_operand" "")]
4085 rtx tmp = gen_reg_rtx (<MODE>mode);
4086 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4087 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4092 (define_expand "neon_vqdmulh_n<mode>"
4093 [(match_operand:VMDI 0 "s_register_operand" "")
4094 (match_operand:VMDI 1 "s_register_operand" "")
4095 (match_operand:<V_elem> 2 "s_register_operand" "")]
4098 rtx tmp = gen_reg_rtx (<MODE>mode);
4099 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4100 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4105 (define_expand "neon_vqrdmulh_n<mode>"
4106 [(match_operand:VMDI 0 "s_register_operand" "")
4107 (match_operand:VMDI 1 "s_register_operand" "")
4108 (match_operand:<V_elem> 2 "s_register_operand" "")]
4111 rtx tmp = gen_reg_rtx (<MODE>mode);
4112 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4113 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4118 (define_expand "neon_vqdmulh_n<mode>"
4119 [(match_operand:VMQI 0 "s_register_operand" "")
4120 (match_operand:VMQI 1 "s_register_operand" "")
4121 (match_operand:<V_elem> 2 "s_register_operand" "")]
4124 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4125 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4126 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4131 (define_expand "neon_vqrdmulh_n<mode>"
4132 [(match_operand:VMQI 0 "s_register_operand" "")
4133 (match_operand:VMQI 1 "s_register_operand" "")
4134 (match_operand:<V_elem> 2 "s_register_operand" "")]
4137 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4138 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4139 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4144 (define_expand "neon_vmla_n<mode>"
4145 [(match_operand:VMD 0 "s_register_operand" "")
4146 (match_operand:VMD 1 "s_register_operand" "")
4147 (match_operand:VMD 2 "s_register_operand" "")
4148 (match_operand:<V_elem> 3 "s_register_operand" "")]
4151 rtx tmp = gen_reg_rtx (<MODE>mode);
4152 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4153 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4158 (define_expand "neon_vmla_n<mode>"
4159 [(match_operand:VMQ 0 "s_register_operand" "")
4160 (match_operand:VMQ 1 "s_register_operand" "")
4161 (match_operand:VMQ 2 "s_register_operand" "")
4162 (match_operand:<V_elem> 3 "s_register_operand" "")]
4165 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4166 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4167 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4172 (define_expand "neon_vmlals_n<mode>"
4173 [(match_operand:<V_widen> 0 "s_register_operand" "")
4174 (match_operand:<V_widen> 1 "s_register_operand" "")
4175 (match_operand:VMDI 2 "s_register_operand" "")
4176 (match_operand:<V_elem> 3 "s_register_operand" "")]
4179 rtx tmp = gen_reg_rtx (<MODE>mode);
4180 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4181 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4186 (define_expand "neon_vmlalu_n<mode>"
4187 [(match_operand:<V_widen> 0 "s_register_operand" "")
4188 (match_operand:<V_widen> 1 "s_register_operand" "")
4189 (match_operand:VMDI 2 "s_register_operand" "")
4190 (match_operand:<V_elem> 3 "s_register_operand" "")]
4193 rtx tmp = gen_reg_rtx (<MODE>mode);
4194 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4195 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4200 (define_expand "neon_vqdmlal_n<mode>"
4201 [(match_operand:<V_widen> 0 "s_register_operand" "")
4202 (match_operand:<V_widen> 1 "s_register_operand" "")
4203 (match_operand:VMDI 2 "s_register_operand" "")
4204 (match_operand:<V_elem> 3 "s_register_operand" "")]
4207 rtx tmp = gen_reg_rtx (<MODE>mode);
4208 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4209 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4214 (define_expand "neon_vmls_n<mode>"
4215 [(match_operand:VMD 0 "s_register_operand" "")
4216 (match_operand:VMD 1 "s_register_operand" "")
4217 (match_operand:VMD 2 "s_register_operand" "")
4218 (match_operand:<V_elem> 3 "s_register_operand" "")]
4221 rtx tmp = gen_reg_rtx (<MODE>mode);
4222 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4223 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4228 (define_expand "neon_vmls_n<mode>"
4229 [(match_operand:VMQ 0 "s_register_operand" "")
4230 (match_operand:VMQ 1 "s_register_operand" "")
4231 (match_operand:VMQ 2 "s_register_operand" "")
4232 (match_operand:<V_elem> 3 "s_register_operand" "")]
4235 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4236 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4237 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4242 (define_expand "neon_vmlsls_n<mode>"
4243 [(match_operand:<V_widen> 0 "s_register_operand" "")
4244 (match_operand:<V_widen> 1 "s_register_operand" "")
4245 (match_operand:VMDI 2 "s_register_operand" "")
4246 (match_operand:<V_elem> 3 "s_register_operand" "")]
4249 rtx tmp = gen_reg_rtx (<MODE>mode);
4250 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4251 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4256 (define_expand "neon_vmlslu_n<mode>"
4257 [(match_operand:<V_widen> 0 "s_register_operand" "")
4258 (match_operand:<V_widen> 1 "s_register_operand" "")
4259 (match_operand:VMDI 2 "s_register_operand" "")
4260 (match_operand:<V_elem> 3 "s_register_operand" "")]
4263 rtx tmp = gen_reg_rtx (<MODE>mode);
4264 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4265 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4270 (define_expand "neon_vqdmlsl_n<mode>"
4271 [(match_operand:<V_widen> 0 "s_register_operand" "")
4272 (match_operand:<V_widen> 1 "s_register_operand" "")
4273 (match_operand:VMDI 2 "s_register_operand" "")
4274 (match_operand:<V_elem> 3 "s_register_operand" "")]
4277 rtx tmp = gen_reg_rtx (<MODE>mode);
4278 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4279 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4284 (define_insn "neon_vext<mode>"
4285 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4286 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4287 (match_operand:VDQX 2 "s_register_operand" "w")
4288 (match_operand:SI 3 "immediate_operand" "i")]
4292 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4293 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4295 [(set_attr "type" "neon_ext<q>")]
4298 (define_insn "neon_vrev64<mode>"
4299 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4300 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4303 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4304 [(set_attr "type" "neon_rev<q>")]
4307 (define_insn "neon_vrev32<mode>"
4308 [(set (match_operand:VX 0 "s_register_operand" "=w")
4309 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4312 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4313 [(set_attr "type" "neon_rev<q>")]
4316 (define_insn "neon_vrev16<mode>"
4317 [(set (match_operand:VE 0 "s_register_operand" "=w")
4318 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4321 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4322 [(set_attr "type" "neon_rev<q>")]
4325 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4326 ; allocation. For an intrinsic of form:
4327 ; rD = vbsl_* (rS, rN, rM)
4328 ; We can use any of:
4329 ; vbsl rS, rN, rM (if D = S)
4330 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4331 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4333 (define_insn "neon_vbsl<mode>_internal"
4334 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4335 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4336 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4337 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4341 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4342 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4343 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4344 [(set_attr "type" "neon_bsl<q>")]
4347 (define_expand "neon_vbsl<mode>"
4348 [(set (match_operand:VDQX 0 "s_register_operand" "")
4349 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4350 (match_operand:VDQX 2 "s_register_operand" "")
4351 (match_operand:VDQX 3 "s_register_operand" "")]
4355 /* We can't alias operands together if they have different modes. */
4356 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4360 (define_insn "neon_v<shift_op><sup><mode>"
4361 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4362 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4363 (match_operand:VDQIX 2 "s_register_operand" "w")]
4366 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4367 [(set_attr "type" "neon_shift_imm<q>")]
4371 (define_insn "neon_v<shift_op><sup><mode>"
4372 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4373 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4374 (match_operand:VDQIX 2 "s_register_operand" "w")]
4377 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4378 [(set_attr "type" "neon_sat_shift_imm<q>")]
4382 (define_insn "neon_v<shift_op><sup>_n<mode>"
4383 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4384 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4385 (match_operand:SI 2 "immediate_operand" "i")]
4389 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4390 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4392 [(set_attr "type" "neon_shift_imm<q>")]
4395 ;; vshrn_n, vrshrn_n
4396 (define_insn "neon_v<shift_op>_n<mode>"
4397 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4398 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4399 (match_operand:SI 2 "immediate_operand" "i")]
4403 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4404 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4406 [(set_attr "type" "neon_shift_imm_narrow_q")]
4409 ;; vqshrn_n, vqrshrn_n
4410 (define_insn "neon_v<shift_op><sup>_n<mode>"
4411 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4412 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4413 (match_operand:SI 2 "immediate_operand" "i")]
4417 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4418 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4420 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4423 ;; vqshrun_n, vqrshrun_n
4424 (define_insn "neon_v<shift_op>_n<mode>"
4425 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4426 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4427 (match_operand:SI 2 "immediate_operand" "i")]
4431 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4432 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4434 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4437 (define_insn "neon_vshl_n<mode>"
4438 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4439 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4440 (match_operand:SI 2 "immediate_operand" "i")]
4444 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4445 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4447 [(set_attr "type" "neon_shift_imm<q>")]
4450 (define_insn "neon_vqshl_<sup>_n<mode>"
4451 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4452 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4453 (match_operand:SI 2 "immediate_operand" "i")]
4457 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4458 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4460 [(set_attr "type" "neon_sat_shift_imm<q>")]
4463 (define_insn "neon_vqshlu_n<mode>"
4464 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4465 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4466 (match_operand:SI 2 "immediate_operand" "i")]
4470 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4471 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4473 [(set_attr "type" "neon_sat_shift_imm<q>")]
4476 (define_insn "neon_vshll<sup>_n<mode>"
4477 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4478 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4479 (match_operand:SI 2 "immediate_operand" "i")]
4483 /* The boundaries are: 0 < imm <= size. */
4484 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4485 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4487 [(set_attr "type" "neon_shift_imm_long")]
4491 (define_insn "neon_v<shift_op><sup>_n<mode>"
4492 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4493 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4494 (match_operand:VDQIX 2 "s_register_operand" "w")
4495 (match_operand:SI 3 "immediate_operand" "i")]
4499 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4500 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4502 [(set_attr "type" "neon_shift_acc<q>")]
4505 (define_insn "neon_vsri_n<mode>"
4506 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4507 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4508 (match_operand:VDQIX 2 "s_register_operand" "w")
4509 (match_operand:SI 3 "immediate_operand" "i")]
4513 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4514 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4516 [(set_attr "type" "neon_shift_reg<q>")]
4519 (define_insn "neon_vsli_n<mode>"
4520 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4521 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4522 (match_operand:VDQIX 2 "s_register_operand" "w")
4523 (match_operand:SI 3 "immediate_operand" "i")]
4527 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4528 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4530 [(set_attr "type" "neon_shift_reg<q>")]
4533 (define_insn "neon_vtbl1v8qi"
4534 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4535 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4536 (match_operand:V8QI 2 "s_register_operand" "w")]
4539 "vtbl.8\t%P0, {%P1}, %P2"
4540 [(set_attr "type" "neon_tbl1")]
4543 (define_insn "neon_vtbl2v8qi"
4544 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4545 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4546 (match_operand:V8QI 2 "s_register_operand" "w")]
4551 int tabbase = REGNO (operands[1]);
4553 ops[0] = operands[0];
4554 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4555 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4556 ops[3] = operands[2];
4557 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4561 [(set_attr "type" "neon_tbl2")]
4564 (define_insn "neon_vtbl3v8qi"
4565 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4566 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4567 (match_operand:V8QI 2 "s_register_operand" "w")]
4572 int tabbase = REGNO (operands[1]);
4574 ops[0] = operands[0];
4575 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4576 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4577 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4578 ops[4] = operands[2];
4579 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4583 [(set_attr "type" "neon_tbl3")]
4586 (define_insn "neon_vtbl4v8qi"
4587 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4588 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4589 (match_operand:V8QI 2 "s_register_operand" "w")]
4594 int tabbase = REGNO (operands[1]);
4596 ops[0] = operands[0];
4597 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4598 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4599 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4600 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4601 ops[5] = operands[2];
4602 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4606 [(set_attr "type" "neon_tbl4")]
4609 ;; These three are used by the vec_perm infrastructure for V16QImode.
4610 (define_insn_and_split "neon_vtbl1v16qi"
4611 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4612 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4613 (match_operand:V16QI 2 "s_register_operand" "w")]
4617 "&& reload_completed"
4620 rtx op0, op1, op2, part0, part2;
4624 op1 = gen_lowpart (TImode, operands[1]);
4627 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4628 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4629 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4630 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4632 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4633 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4634 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4635 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4638 [(set_attr "type" "multiple")]
4641 (define_insn_and_split "neon_vtbl2v16qi"
4642 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4643 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4644 (match_operand:V16QI 2 "s_register_operand" "w")]
4648 "&& reload_completed"
4651 rtx op0, op1, op2, part0, part2;
4658 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4659 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4660 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4661 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4663 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4664 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4665 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4666 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4669 [(set_attr "type" "multiple")]
4672 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4673 ;; handle quad-word input modes, producing octa-word output modes. But
4674 ;; that requires us to add support for octa-word vector modes in moves.
4675 ;; That seems overkill for this one use in vec_perm.
4676 (define_insn_and_split "neon_vcombinev16qi"
4677 [(set (match_operand:OI 0 "s_register_operand" "=w")
4678 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4679 (match_operand:V16QI 2 "s_register_operand" "w")]
4683 "&& reload_completed"
4686 neon_split_vcombine (operands);
4689 [(set_attr "type" "multiple")]
4692 (define_insn "neon_vtbx1v8qi"
4693 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4694 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4695 (match_operand:V8QI 2 "s_register_operand" "w")
4696 (match_operand:V8QI 3 "s_register_operand" "w")]
4699 "vtbx.8\t%P0, {%P2}, %P3"
4700 [(set_attr "type" "neon_tbl1")]
4703 (define_insn "neon_vtbx2v8qi"
4704 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4705 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4706 (match_operand:TI 2 "s_register_operand" "w")
4707 (match_operand:V8QI 3 "s_register_operand" "w")]
4712 int tabbase = REGNO (operands[2]);
4714 ops[0] = operands[0];
4715 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4716 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4717 ops[3] = operands[3];
4718 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4722 [(set_attr "type" "neon_tbl2")]
4725 (define_insn "neon_vtbx3v8qi"
4726 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4727 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4728 (match_operand:EI 2 "s_register_operand" "w")
4729 (match_operand:V8QI 3 "s_register_operand" "w")]
4734 int tabbase = REGNO (operands[2]);
4736 ops[0] = operands[0];
4737 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4738 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4739 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4740 ops[4] = operands[3];
4741 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4745 [(set_attr "type" "neon_tbl3")]
4748 (define_insn "neon_vtbx4v8qi"
4749 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4750 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4751 (match_operand:OI 2 "s_register_operand" "w")
4752 (match_operand:V8QI 3 "s_register_operand" "w")]
4757 int tabbase = REGNO (operands[2]);
4759 ops[0] = operands[0];
4760 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4761 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4762 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4763 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4764 ops[5] = operands[3];
4765 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4769 [(set_attr "type" "neon_tbl4")]
4772 (define_expand "neon_vtrn<mode>_internal"
4774 [(set (match_operand:VDQWH 0 "s_register_operand")
4775 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4776 (match_operand:VDQWH 2 "s_register_operand")]
4778 (set (match_operand:VDQWH 3 "s_register_operand")
4779 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4784 ;; Note: Different operand numbering to handle tied registers correctly.
4785 (define_insn "*neon_vtrn<mode>_insn"
4786 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4787 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4788 (match_operand:VDQWH 3 "s_register_operand" "2")]
4790 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4791 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4794 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4795 [(set_attr "type" "neon_permute<q>")]
4798 (define_expand "neon_vzip<mode>_internal"
4800 [(set (match_operand:VDQWH 0 "s_register_operand")
4801 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4802 (match_operand:VDQWH 2 "s_register_operand")]
4804 (set (match_operand:VDQWH 3 "s_register_operand")
4805 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4810 ;; Note: Different operand numbering to handle tied registers correctly.
4811 (define_insn "*neon_vzip<mode>_insn"
4812 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4813 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4814 (match_operand:VDQWH 3 "s_register_operand" "2")]
4816 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4817 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4820 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4821 [(set_attr "type" "neon_zip<q>")]
4824 (define_expand "neon_vuzp<mode>_internal"
4826 [(set (match_operand:VDQWH 0 "s_register_operand")
4827 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4828 (match_operand:VDQWH 2 "s_register_operand")]
4830 (set (match_operand:VDQWH 3 "s_register_operand" "")
4831 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4836 ;; Note: Different operand numbering to handle tied registers correctly.
4837 (define_insn "*neon_vuzp<mode>_insn"
4838 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4839 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4840 (match_operand:VDQWH 3 "s_register_operand" "2")]
4842 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4843 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4846 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4847 [(set_attr "type" "neon_zip<q>")]
4850 (define_expand "vec_load_lanes<mode><mode>"
4851 [(set (match_operand:VDQX 0 "s_register_operand")
4852 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4856 (define_insn "neon_vld1<mode>"
4857 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4858 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4861 "vld1.<V_sz_elem>\t%h0, %A1"
4862 [(set_attr "type" "neon_load1_1reg<q>")]
4865 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4866 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4868 (define_insn "neon_vld1_lane<mode>"
4869 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4870 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4871 (match_operand:VDX 2 "s_register_operand" "0")
4872 (match_operand:SI 3 "immediate_operand" "i")]
4876 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4877 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4878 operands[3] = GEN_INT (lane);
4880 return "vld1.<V_sz_elem>\t%P0, %A1";
4882 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4884 [(set_attr "type" "neon_load1_one_lane<q>")]
4887 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4888 ;; here on big endian targets.
4889 (define_insn "neon_vld1_lane<mode>"
4890 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4891 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4892 (match_operand:VQX 2 "s_register_operand" "0")
4893 (match_operand:SI 3 "immediate_operand" "i")]
4897 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4898 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4899 operands[3] = GEN_INT (lane);
4900 int regno = REGNO (operands[0]);
4901 if (lane >= max / 2)
4905 operands[3] = GEN_INT (lane);
4907 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4909 return "vld1.<V_sz_elem>\t%P0, %A1";
4911 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4913 [(set_attr "type" "neon_load1_one_lane<q>")]
4916 (define_insn "neon_vld1_dup<mode>"
4917 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
4918 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4920 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4921 [(set_attr "type" "neon_load1_all_lanes<q>")]
4924 ;; Special case for DImode. Treat it exactly like a simple load.
4925 (define_expand "neon_vld1_dupdi"
4926 [(set (match_operand:DI 0 "s_register_operand" "")
4927 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4933 (define_insn "neon_vld1_dup<mode>"
4934 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
4935 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4938 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4940 [(set_attr "type" "neon_load1_all_lanes<q>")]
4943 (define_insn_and_split "neon_vld1_dupv2di"
4944 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4945 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4948 "&& reload_completed"
4951 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4952 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4953 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4956 [(set_attr "length" "8")
4957 (set_attr "type" "neon_load1_all_lanes_q")]
4960 (define_expand "vec_store_lanes<mode><mode>"
4961 [(set (match_operand:VDQX 0 "neon_struct_operand")
4962 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4966 (define_insn "neon_vst1<mode>"
4967 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4968 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4971 "vst1.<V_sz_elem>\t%h1, %A0"
4972 [(set_attr "type" "neon_store1_1reg<q>")])
4974 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4975 ;; here on big endian targets.
4976 (define_insn "neon_vst1_lane<mode>"
4977 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4979 [(match_operand:VDX 1 "s_register_operand" "w")
4980 (match_operand:SI 2 "immediate_operand" "i")]
4984 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4985 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4986 operands[2] = GEN_INT (lane);
4988 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4990 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4992 [(set_attr "type" "neon_store1_one_lane<q>")]
4995 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4996 ;; here on big endian targets.
4997 (define_insn "neon_vst1_lane<mode>"
4998 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5000 [(match_operand:VQX 1 "s_register_operand" "w")
5001 (match_operand:SI 2 "immediate_operand" "i")]
5005 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5006 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5007 int regno = REGNO (operands[1]);
5008 if (lane >= max / 2)
5013 operands[2] = GEN_INT (lane);
5014 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5016 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5018 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5020 [(set_attr "type" "neon_store1_one_lane<q>")]
5023 (define_expand "vec_load_lanesti<mode>"
5024 [(set (match_operand:TI 0 "s_register_operand")
5025 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5026 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5030 (define_insn "neon_vld2<mode>"
5031 [(set (match_operand:TI 0 "s_register_operand" "=w")
5032 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5033 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5037 if (<V_sz_elem> == 64)
5038 return "vld1.64\t%h0, %A1";
5040 return "vld2.<V_sz_elem>\t%h0, %A1";
5043 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5044 (const_string "neon_load1_2reg<q>")
5045 (const_string "neon_load2_2reg<q>")))]
5048 (define_expand "vec_load_lanesoi<mode>"
5049 [(set (match_operand:OI 0 "s_register_operand")
5050 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5051 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5055 (define_insn "neon_vld2<mode>"
5056 [(set (match_operand:OI 0 "s_register_operand" "=w")
5057 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5058 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5061 "vld2.<V_sz_elem>\t%h0, %A1"
5062 [(set_attr "type" "neon_load2_2reg_q")])
5064 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5065 ;; here on big endian targets.
5066 (define_insn "neon_vld2_lane<mode>"
5067 [(set (match_operand:TI 0 "s_register_operand" "=w")
5068 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5069 (match_operand:TI 2 "s_register_operand" "0")
5070 (match_operand:SI 3 "immediate_operand" "i")
5071 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5075 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5076 int regno = REGNO (operands[0]);
5078 ops[0] = gen_rtx_REG (DImode, regno);
5079 ops[1] = gen_rtx_REG (DImode, regno + 2);
5080 ops[2] = operands[1];
5081 ops[3] = GEN_INT (lane);
5082 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5085 [(set_attr "type" "neon_load2_one_lane<q>")]
5088 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5089 ;; here on big endian targets.
5090 (define_insn "neon_vld2_lane<mode>"
5091 [(set (match_operand:OI 0 "s_register_operand" "=w")
5092 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5093 (match_operand:OI 2 "s_register_operand" "0")
5094 (match_operand:SI 3 "immediate_operand" "i")
5095 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5099 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5100 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5101 int regno = REGNO (operands[0]);
5103 if (lane >= max / 2)
5108 ops[0] = gen_rtx_REG (DImode, regno);
5109 ops[1] = gen_rtx_REG (DImode, regno + 4);
5110 ops[2] = operands[1];
5111 ops[3] = GEN_INT (lane);
5112 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5115 [(set_attr "type" "neon_load2_one_lane<q>")]
5118 (define_insn "neon_vld2_dup<mode>"
5119 [(set (match_operand:TI 0 "s_register_operand" "=w")
5120 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5121 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5125 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5126 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5128 return "vld1.<V_sz_elem>\t%h0, %A1";
5131 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5132 (const_string "neon_load2_all_lanes<q>")
5133 (const_string "neon_load1_1reg<q>")))]
5136 (define_expand "vec_store_lanesti<mode>"
5137 [(set (match_operand:TI 0 "neon_struct_operand")
5138 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5139 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5143 (define_insn "neon_vst2<mode>"
5144 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5145 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5146 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5150 if (<V_sz_elem> == 64)
5151 return "vst1.64\t%h1, %A0";
5153 return "vst2.<V_sz_elem>\t%h1, %A0";
5156 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5157 (const_string "neon_store1_2reg<q>")
5158 (const_string "neon_store2_one_lane<q>")))]
5161 (define_expand "vec_store_lanesoi<mode>"
5162 [(set (match_operand:OI 0 "neon_struct_operand")
5163 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5164 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5168 (define_insn "neon_vst2<mode>"
5169 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5170 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5171 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5174 "vst2.<V_sz_elem>\t%h1, %A0"
5175 [(set_attr "type" "neon_store2_4reg<q>")]
5178 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5179 ;; here on big endian targets.
5180 (define_insn "neon_vst2_lane<mode>"
5181 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5182 (unspec:<V_two_elem>
5183 [(match_operand:TI 1 "s_register_operand" "w")
5184 (match_operand:SI 2 "immediate_operand" "i")
5185 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5189 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5190 int regno = REGNO (operands[1]);
5192 ops[0] = operands[0];
5193 ops[1] = gen_rtx_REG (DImode, regno);
5194 ops[2] = gen_rtx_REG (DImode, regno + 2);
5195 ops[3] = GEN_INT (lane);
5196 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5199 [(set_attr "type" "neon_store2_one_lane<q>")]
5202 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5203 ;; here on big endian targets.
5204 (define_insn "neon_vst2_lane<mode>"
5205 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5206 (unspec:<V_two_elem>
5207 [(match_operand:OI 1 "s_register_operand" "w")
5208 (match_operand:SI 2 "immediate_operand" "i")
5209 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5213 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5214 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5215 int regno = REGNO (operands[1]);
5217 if (lane >= max / 2)
5222 ops[0] = operands[0];
5223 ops[1] = gen_rtx_REG (DImode, regno);
5224 ops[2] = gen_rtx_REG (DImode, regno + 4);
5225 ops[3] = GEN_INT (lane);
5226 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5229 [(set_attr "type" "neon_store2_one_lane<q>")]
5232 (define_expand "vec_load_lanesei<mode>"
5233 [(set (match_operand:EI 0 "s_register_operand")
5234 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5235 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5239 (define_insn "neon_vld3<mode>"
5240 [(set (match_operand:EI 0 "s_register_operand" "=w")
5241 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5242 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5246 if (<V_sz_elem> == 64)
5247 return "vld1.64\t%h0, %A1";
5249 return "vld3.<V_sz_elem>\t%h0, %A1";
5252 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5253 (const_string "neon_load1_3reg<q>")
5254 (const_string "neon_load3_3reg<q>")))]
5257 (define_expand "vec_load_lanesci<mode>"
5258 [(match_operand:CI 0 "s_register_operand")
5259 (match_operand:CI 1 "neon_struct_operand")
5260 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5263 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5267 (define_expand "neon_vld3<mode>"
5268 [(match_operand:CI 0 "s_register_operand")
5269 (match_operand:CI 1 "neon_struct_operand")
5270 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5275 mem = adjust_address (operands[1], EImode, 0);
5276 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5277 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5278 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5282 (define_insn "neon_vld3qa<mode>"
5283 [(set (match_operand:CI 0 "s_register_operand" "=w")
5284 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5285 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5289 int regno = REGNO (operands[0]);
5291 ops[0] = gen_rtx_REG (DImode, regno);
5292 ops[1] = gen_rtx_REG (DImode, regno + 4);
5293 ops[2] = gen_rtx_REG (DImode, regno + 8);
5294 ops[3] = operands[1];
5295 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5298 [(set_attr "type" "neon_load3_3reg<q>")]
5301 (define_insn "neon_vld3qb<mode>"
5302 [(set (match_operand:CI 0 "s_register_operand" "=w")
5303 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5304 (match_operand:CI 2 "s_register_operand" "0")
5305 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5309 int regno = REGNO (operands[0]);
5311 ops[0] = gen_rtx_REG (DImode, regno + 2);
5312 ops[1] = gen_rtx_REG (DImode, regno + 6);
5313 ops[2] = gen_rtx_REG (DImode, regno + 10);
5314 ops[3] = operands[1];
5315 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5318 [(set_attr "type" "neon_load3_3reg<q>")]
5321 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5322 ;; here on big endian targets.
5323 (define_insn "neon_vld3_lane<mode>"
5324 [(set (match_operand:EI 0 "s_register_operand" "=w")
5325 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5326 (match_operand:EI 2 "s_register_operand" "0")
5327 (match_operand:SI 3 "immediate_operand" "i")
5328 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5332 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5333 int regno = REGNO (operands[0]);
5335 ops[0] = gen_rtx_REG (DImode, regno);
5336 ops[1] = gen_rtx_REG (DImode, regno + 2);
5337 ops[2] = gen_rtx_REG (DImode, regno + 4);
5338 ops[3] = operands[1];
5339 ops[4] = GEN_INT (lane);
5340 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5344 [(set_attr "type" "neon_load3_one_lane<q>")]
5347 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5348 ;; here on big endian targets.
5349 (define_insn "neon_vld3_lane<mode>"
5350 [(set (match_operand:CI 0 "s_register_operand" "=w")
5351 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5352 (match_operand:CI 2 "s_register_operand" "0")
5353 (match_operand:SI 3 "immediate_operand" "i")
5354 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5358 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5359 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5360 int regno = REGNO (operands[0]);
5362 if (lane >= max / 2)
5367 ops[0] = gen_rtx_REG (DImode, regno);
5368 ops[1] = gen_rtx_REG (DImode, regno + 4);
5369 ops[2] = gen_rtx_REG (DImode, regno + 8);
5370 ops[3] = operands[1];
5371 ops[4] = GEN_INT (lane);
5372 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5376 [(set_attr "type" "neon_load3_one_lane<q>")]
5379 (define_insn "neon_vld3_dup<mode>"
5380 [(set (match_operand:EI 0 "s_register_operand" "=w")
5381 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5382 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5386 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5388 int regno = REGNO (operands[0]);
5390 ops[0] = gen_rtx_REG (DImode, regno);
5391 ops[1] = gen_rtx_REG (DImode, regno + 2);
5392 ops[2] = gen_rtx_REG (DImode, regno + 4);
5393 ops[3] = operands[1];
5394 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5398 return "vld1.<V_sz_elem>\t%h0, %A1";
5401 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5402 (const_string "neon_load3_all_lanes<q>")
5403 (const_string "neon_load1_1reg<q>")))])
5405 (define_expand "vec_store_lanesei<mode>"
5406 [(set (match_operand:EI 0 "neon_struct_operand")
5407 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5408 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5412 (define_insn "neon_vst3<mode>"
5413 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5414 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5415 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5419 if (<V_sz_elem> == 64)
5420 return "vst1.64\t%h1, %A0";
5422 return "vst3.<V_sz_elem>\t%h1, %A0";
5425 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5426 (const_string "neon_store1_3reg<q>")
5427 (const_string "neon_store3_one_lane<q>")))])
5429 (define_expand "vec_store_lanesci<mode>"
5430 [(match_operand:CI 0 "neon_struct_operand")
5431 (match_operand:CI 1 "s_register_operand")
5432 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5435 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5439 (define_expand "neon_vst3<mode>"
5440 [(match_operand:CI 0 "neon_struct_operand")
5441 (match_operand:CI 1 "s_register_operand")
5442 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5447 mem = adjust_address (operands[0], EImode, 0);
5448 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5449 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5450 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5454 (define_insn "neon_vst3qa<mode>"
5455 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5456 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5457 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5461 int regno = REGNO (operands[1]);
5463 ops[0] = operands[0];
5464 ops[1] = gen_rtx_REG (DImode, regno);
5465 ops[2] = gen_rtx_REG (DImode, regno + 4);
5466 ops[3] = gen_rtx_REG (DImode, regno + 8);
5467 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5470 [(set_attr "type" "neon_store3_3reg<q>")]
5473 (define_insn "neon_vst3qb<mode>"
5474 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5475 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5476 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5480 int regno = REGNO (operands[1]);
5482 ops[0] = operands[0];
5483 ops[1] = gen_rtx_REG (DImode, regno + 2);
5484 ops[2] = gen_rtx_REG (DImode, regno + 6);
5485 ops[3] = gen_rtx_REG (DImode, regno + 10);
5486 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5489 [(set_attr "type" "neon_store3_3reg<q>")]
5492 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5493 ;; here on big endian targets.
5494 (define_insn "neon_vst3_lane<mode>"
5495 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5496 (unspec:<V_three_elem>
5497 [(match_operand:EI 1 "s_register_operand" "w")
5498 (match_operand:SI 2 "immediate_operand" "i")
5499 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5503 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5504 int regno = REGNO (operands[1]);
5506 ops[0] = operands[0];
5507 ops[1] = gen_rtx_REG (DImode, regno);
5508 ops[2] = gen_rtx_REG (DImode, regno + 2);
5509 ops[3] = gen_rtx_REG (DImode, regno + 4);
5510 ops[4] = GEN_INT (lane);
5511 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5515 [(set_attr "type" "neon_store3_one_lane<q>")]
5518 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5519 ;; here on big endian targets.
5520 (define_insn "neon_vst3_lane<mode>"
5521 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5522 (unspec:<V_three_elem>
5523 [(match_operand:CI 1 "s_register_operand" "w")
5524 (match_operand:SI 2 "immediate_operand" "i")
5525 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5529 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5530 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5531 int regno = REGNO (operands[1]);
5533 if (lane >= max / 2)
5538 ops[0] = operands[0];
5539 ops[1] = gen_rtx_REG (DImode, regno);
5540 ops[2] = gen_rtx_REG (DImode, regno + 4);
5541 ops[3] = gen_rtx_REG (DImode, regno + 8);
5542 ops[4] = GEN_INT (lane);
5543 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5547 [(set_attr "type" "neon_store3_one_lane<q>")]
5550 (define_expand "vec_load_lanesoi<mode>"
5551 [(set (match_operand:OI 0 "s_register_operand")
5552 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5553 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5557 (define_insn "neon_vld4<mode>"
5558 [(set (match_operand:OI 0 "s_register_operand" "=w")
5559 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5560 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5564 if (<V_sz_elem> == 64)
5565 return "vld1.64\t%h0, %A1";
5567 return "vld4.<V_sz_elem>\t%h0, %A1";
5570 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5571 (const_string "neon_load1_4reg<q>")
5572 (const_string "neon_load4_4reg<q>")))]
5575 (define_expand "vec_load_lanesxi<mode>"
5576 [(match_operand:XI 0 "s_register_operand")
5577 (match_operand:XI 1 "neon_struct_operand")
5578 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5581 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5585 (define_expand "neon_vld4<mode>"
5586 [(match_operand:XI 0 "s_register_operand")
5587 (match_operand:XI 1 "neon_struct_operand")
5588 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5593 mem = adjust_address (operands[1], OImode, 0);
5594 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5595 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5596 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5600 (define_insn "neon_vld4qa<mode>"
5601 [(set (match_operand:XI 0 "s_register_operand" "=w")
5602 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5603 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5607 int regno = REGNO (operands[0]);
5609 ops[0] = gen_rtx_REG (DImode, regno);
5610 ops[1] = gen_rtx_REG (DImode, regno + 4);
5611 ops[2] = gen_rtx_REG (DImode, regno + 8);
5612 ops[3] = gen_rtx_REG (DImode, regno + 12);
5613 ops[4] = operands[1];
5614 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5617 [(set_attr "type" "neon_load4_4reg<q>")]
5620 (define_insn "neon_vld4qb<mode>"
5621 [(set (match_operand:XI 0 "s_register_operand" "=w")
5622 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5623 (match_operand:XI 2 "s_register_operand" "0")
5624 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5628 int regno = REGNO (operands[0]);
5630 ops[0] = gen_rtx_REG (DImode, regno + 2);
5631 ops[1] = gen_rtx_REG (DImode, regno + 6);
5632 ops[2] = gen_rtx_REG (DImode, regno + 10);
5633 ops[3] = gen_rtx_REG (DImode, regno + 14);
5634 ops[4] = operands[1];
5635 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5638 [(set_attr "type" "neon_load4_4reg<q>")]
5641 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5642 ;; here on big endian targets.
5643 (define_insn "neon_vld4_lane<mode>"
5644 [(set (match_operand:OI 0 "s_register_operand" "=w")
5645 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5646 (match_operand:OI 2 "s_register_operand" "0")
5647 (match_operand:SI 3 "immediate_operand" "i")
5648 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5652 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5653 int regno = REGNO (operands[0]);
5655 ops[0] = gen_rtx_REG (DImode, regno);
5656 ops[1] = gen_rtx_REG (DImode, regno + 2);
5657 ops[2] = gen_rtx_REG (DImode, regno + 4);
5658 ops[3] = gen_rtx_REG (DImode, regno + 6);
5659 ops[4] = operands[1];
5660 ops[5] = GEN_INT (lane);
5661 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5665 [(set_attr "type" "neon_load4_one_lane<q>")]
5668 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5669 ;; here on big endian targets.
5670 (define_insn "neon_vld4_lane<mode>"
5671 [(set (match_operand:XI 0 "s_register_operand" "=w")
5672 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5673 (match_operand:XI 2 "s_register_operand" "0")
5674 (match_operand:SI 3 "immediate_operand" "i")
5675 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5679 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5680 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5681 int regno = REGNO (operands[0]);
5683 if (lane >= max / 2)
5688 ops[0] = gen_rtx_REG (DImode, regno);
5689 ops[1] = gen_rtx_REG (DImode, regno + 4);
5690 ops[2] = gen_rtx_REG (DImode, regno + 8);
5691 ops[3] = gen_rtx_REG (DImode, regno + 12);
5692 ops[4] = operands[1];
5693 ops[5] = GEN_INT (lane);
5694 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5698 [(set_attr "type" "neon_load4_one_lane<q>")]
5701 (define_insn "neon_vld4_dup<mode>"
5702 [(set (match_operand:OI 0 "s_register_operand" "=w")
5703 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5704 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5708 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5710 int regno = REGNO (operands[0]);
5712 ops[0] = gen_rtx_REG (DImode, regno);
5713 ops[1] = gen_rtx_REG (DImode, regno + 2);
5714 ops[2] = gen_rtx_REG (DImode, regno + 4);
5715 ops[3] = gen_rtx_REG (DImode, regno + 6);
5716 ops[4] = operands[1];
5717 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5722 return "vld1.<V_sz_elem>\t%h0, %A1";
5725 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5726 (const_string "neon_load4_all_lanes<q>")
5727 (const_string "neon_load1_1reg<q>")))]
5730 (define_expand "vec_store_lanesoi<mode>"
5731 [(set (match_operand:OI 0 "neon_struct_operand")
5732 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5733 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5737 (define_insn "neon_vst4<mode>"
5738 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5739 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5740 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5744 if (<V_sz_elem> == 64)
5745 return "vst1.64\t%h1, %A0";
5747 return "vst4.<V_sz_elem>\t%h1, %A0";
5750 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5751 (const_string "neon_store1_4reg<q>")
5752 (const_string "neon_store4_4reg<q>")))]
5755 (define_expand "vec_store_lanesxi<mode>"
5756 [(match_operand:XI 0 "neon_struct_operand")
5757 (match_operand:XI 1 "s_register_operand")
5758 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5761 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5765 (define_expand "neon_vst4<mode>"
5766 [(match_operand:XI 0 "neon_struct_operand")
5767 (match_operand:XI 1 "s_register_operand")
5768 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5773 mem = adjust_address (operands[0], OImode, 0);
5774 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5775 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5776 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5780 (define_insn "neon_vst4qa<mode>"
5781 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5782 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5783 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5787 int regno = REGNO (operands[1]);
5789 ops[0] = operands[0];
5790 ops[1] = gen_rtx_REG (DImode, regno);
5791 ops[2] = gen_rtx_REG (DImode, regno + 4);
5792 ops[3] = gen_rtx_REG (DImode, regno + 8);
5793 ops[4] = gen_rtx_REG (DImode, regno + 12);
5794 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5797 [(set_attr "type" "neon_store4_4reg<q>")]
5800 (define_insn "neon_vst4qb<mode>"
5801 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5802 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5803 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5807 int regno = REGNO (operands[1]);
5809 ops[0] = operands[0];
5810 ops[1] = gen_rtx_REG (DImode, regno + 2);
5811 ops[2] = gen_rtx_REG (DImode, regno + 6);
5812 ops[3] = gen_rtx_REG (DImode, regno + 10);
5813 ops[4] = gen_rtx_REG (DImode, regno + 14);
5814 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5817 [(set_attr "type" "neon_store4_4reg<q>")]
5820 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5821 ;; here on big endian targets.
5822 (define_insn "neon_vst4_lane<mode>"
5823 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5824 (unspec:<V_four_elem>
5825 [(match_operand:OI 1 "s_register_operand" "w")
5826 (match_operand:SI 2 "immediate_operand" "i")
5827 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5831 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5832 int regno = REGNO (operands[1]);
5834 ops[0] = operands[0];
5835 ops[1] = gen_rtx_REG (DImode, regno);
5836 ops[2] = gen_rtx_REG (DImode, regno + 2);
5837 ops[3] = gen_rtx_REG (DImode, regno + 4);
5838 ops[4] = gen_rtx_REG (DImode, regno + 6);
5839 ops[5] = GEN_INT (lane);
5840 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5844 [(set_attr "type" "neon_store4_one_lane<q>")]
5847 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5848 ;; here on big endian targets.
5849 (define_insn "neon_vst4_lane<mode>"
5850 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5851 (unspec:<V_four_elem>
5852 [(match_operand:XI 1 "s_register_operand" "w")
5853 (match_operand:SI 2 "immediate_operand" "i")
5854 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5858 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5859 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5860 int regno = REGNO (operands[1]);
5862 if (lane >= max / 2)
5867 ops[0] = operands[0];
5868 ops[1] = gen_rtx_REG (DImode, regno);
5869 ops[2] = gen_rtx_REG (DImode, regno + 4);
5870 ops[3] = gen_rtx_REG (DImode, regno + 8);
5871 ops[4] = gen_rtx_REG (DImode, regno + 12);
5872 ops[5] = GEN_INT (lane);
5873 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5877 [(set_attr "type" "neon_store4_4reg<q>")]
5880 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5881 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5882 (SE:<V_unpack> (vec_select:<V_HALF>
5883 (match_operand:VU 1 "register_operand" "w")
5884 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5885 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5886 "vmovl.<US><V_sz_elem> %q0, %e1"
5887 [(set_attr "type" "neon_shift_imm_long")]
5890 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5891 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5892 (SE:<V_unpack> (vec_select:<V_HALF>
5893 (match_operand:VU 1 "register_operand" "w")
5894 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5895 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5896 "vmovl.<US><V_sz_elem> %q0, %f1"
5897 [(set_attr "type" "neon_shift_imm_long")]
5900 (define_expand "vec_unpack<US>_hi_<mode>"
5901 [(match_operand:<V_unpack> 0 "register_operand" "")
5902 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5903 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5905 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5908 for (i = 0; i < (<V_mode_nunits>/2); i++)
5909 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5911 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5912 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5919 (define_expand "vec_unpack<US>_lo_<mode>"
5920 [(match_operand:<V_unpack> 0 "register_operand" "")
5921 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5922 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5924 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5927 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5928 RTVEC_ELT (v, i) = GEN_INT (i);
5929 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5930 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5937 (define_insn "neon_vec_<US>mult_lo_<mode>"
5938 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5939 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5940 (match_operand:VU 1 "register_operand" "w")
5941 (match_operand:VU 2 "vect_par_constant_low" "")))
5942 (SE:<V_unpack> (vec_select:<V_HALF>
5943 (match_operand:VU 3 "register_operand" "w")
5945 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5946 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5947 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5950 (define_expand "vec_widen_<US>mult_lo_<mode>"
5951 [(match_operand:<V_unpack> 0 "register_operand" "")
5952 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5953 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5954 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5956 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5959 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5960 RTVEC_ELT (v, i) = GEN_INT (i);
5961 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5963 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5971 (define_insn "neon_vec_<US>mult_hi_<mode>"
5972 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5973 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5974 (match_operand:VU 1 "register_operand" "w")
5975 (match_operand:VU 2 "vect_par_constant_high" "")))
5976 (SE:<V_unpack> (vec_select:<V_HALF>
5977 (match_operand:VU 3 "register_operand" "w")
5979 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5980 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5981 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5984 (define_expand "vec_widen_<US>mult_hi_<mode>"
5985 [(match_operand:<V_unpack> 0 "register_operand" "")
5986 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5987 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5988 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5990 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5993 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5994 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5995 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5997 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6006 (define_insn "neon_vec_<US>shiftl_<mode>"
6007 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6008 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6009 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6012 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6014 [(set_attr "type" "neon_shift_imm_long")]
6017 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6018 [(match_operand:<V_unpack> 0 "register_operand" "")
6019 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6020 (match_operand:SI 2 "immediate_operand" "i")]
6021 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6023 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6024 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6030 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6031 [(match_operand:<V_unpack> 0 "register_operand" "")
6032 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6033 (match_operand:SI 2 "immediate_operand" "i")]
6034 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6036 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6037 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6038 GET_MODE_SIZE (<V_HALF>mode)),
6044 ;; Vectorize for non-neon-quad case
6045 (define_insn "neon_unpack<US>_<mode>"
6046 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6047 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6049 "vmovl.<US><V_sz_elem> %q0, %P1"
6050 [(set_attr "type" "neon_move")]
6053 (define_expand "vec_unpack<US>_lo_<mode>"
6054 [(match_operand:<V_double_width> 0 "register_operand" "")
6055 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6058 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6059 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6060 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6066 (define_expand "vec_unpack<US>_hi_<mode>"
6067 [(match_operand:<V_double_width> 0 "register_operand" "")
6068 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6071 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6072 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6073 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6079 (define_insn "neon_vec_<US>mult_<mode>"
6080 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6081 (mult:<V_widen> (SE:<V_widen>
6082 (match_operand:VDI 1 "register_operand" "w"))
6084 (match_operand:VDI 2 "register_operand" "w"))))]
6086 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6087 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6090 (define_expand "vec_widen_<US>mult_hi_<mode>"
6091 [(match_operand:<V_double_width> 0 "register_operand" "")
6092 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6093 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6096 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6097 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6098 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6105 (define_expand "vec_widen_<US>mult_lo_<mode>"
6106 [(match_operand:<V_double_width> 0 "register_operand" "")
6107 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6108 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6111 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6112 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6113 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6120 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6121 [(match_operand:<V_double_width> 0 "register_operand" "")
6122 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6123 (match_operand:SI 2 "immediate_operand" "i")]
6126 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6127 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6128 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6134 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6135 [(match_operand:<V_double_width> 0 "register_operand" "")
6136 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6137 (match_operand:SI 2 "immediate_operand" "i")]
6140 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6141 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6142 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6148 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6149 ; because the ordering of vector elements in Q registers is different from what
6150 ; the semantics of the instructions require.
6152 (define_insn "vec_pack_trunc_<mode>"
6153 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6154 (vec_concat:<V_narrow_pack>
6155 (truncate:<V_narrow>
6156 (match_operand:VN 1 "register_operand" "w"))
6157 (truncate:<V_narrow>
6158 (match_operand:VN 2 "register_operand" "w"))))]
6159 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6160 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6161 [(set_attr "type" "multiple")
6162 (set_attr "length" "8")]
6165 ;; For the non-quad case.
6166 (define_insn "neon_vec_pack_trunc_<mode>"
6167 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6168 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6169 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6170 "vmovn.i<V_sz_elem>\t%P0, %q1"
6171 [(set_attr "type" "neon_move_narrow_q")]
6174 (define_expand "vec_pack_trunc_<mode>"
6175 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6176 (match_operand:VSHFT 1 "register_operand" "")
6177 (match_operand:VSHFT 2 "register_operand")]
6178 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6180 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6182 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6183 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6184 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6188 (define_insn "neon_vabd<mode>_2"
6189 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
6190 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
6191 (match_operand:VDQ 2 "s_register_operand" "w"))))]
6192 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
6193 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6195 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6196 (const_string "neon_fp_abd_s<q>")
6197 (const_string "neon_abd<q>")))]
6200 (define_insn "neon_vabd<mode>_3"
6201 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
6202 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
6203 (match_operand:VDQ 2 "s_register_operand" "w")]
6205 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
6206 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6208 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6209 (const_string "neon_fp_abd_s<q>")
6210 (const_string "neon_abd<q>")))]
6213 ;; Copy from core-to-neon regs, then extend, not vice-versa
6216 [(set (match_operand:DI 0 "s_register_operand" "")
6217 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6218 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6219 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6220 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6222 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6226 [(set (match_operand:DI 0 "s_register_operand" "")
6227 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6228 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6229 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6230 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6232 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6236 [(set (match_operand:DI 0 "s_register_operand" "")
6237 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6238 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6239 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6240 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6242 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6246 [(set (match_operand:DI 0 "s_register_operand" "")
6247 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6248 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6249 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6250 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6252 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6256 [(set (match_operand:DI 0 "s_register_operand" "")
6257 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6258 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6259 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6260 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6262 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6266 [(set (match_operand:DI 0 "s_register_operand" "")
6267 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6268 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6269 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6270 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6272 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));