1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_expand "movv4hf"
141 [(set (match_operand:V4HF 0 "s_register_operand")
142 (match_operand:V4HF 1 "s_register_operand"))]
143 "TARGET_NEON && TARGET_FP16"
145 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
146 causing an ICE on big-endian because it cannot extract subregs in
148 if (can_create_pseudo_p ())
150 if (!REG_P (operands[0]))
151 operands[1] = force_reg (V4HFmode, operands[1]);
155 (define_expand "movv8hf"
156 [(set (match_operand:V8HF 0 "")
157 (match_operand:V8HF 1 ""))]
158 "TARGET_NEON && TARGET_FP16"
160 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
161 causing an ICE on big-endian because it cannot extract subregs in
163 if (can_create_pseudo_p ())
165 if (!REG_P (operands[0]))
166 operands[1] = force_reg (V8HFmode, operands[1]);
170 (define_insn "*neon_mov<mode>"
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
172 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
177 switch (which_alternative)
180 case 1: case 2: return output_move_neon (operands);
181 default: gcc_unreachable ();
184 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
188 [(set (match_operand:EI 0 "s_register_operand" "")
189 (match_operand:EI 1 "s_register_operand" ""))]
190 "TARGET_NEON && reload_completed"
191 [(set (match_dup 0) (match_dup 1))
192 (set (match_dup 2) (match_dup 3))]
194 int rdest = REGNO (operands[0]);
195 int rsrc = REGNO (operands[1]);
198 dest[0] = gen_rtx_REG (TImode, rdest);
199 src[0] = gen_rtx_REG (TImode, rsrc);
200 dest[1] = gen_rtx_REG (DImode, rdest + 4);
201 src[1] = gen_rtx_REG (DImode, rsrc + 4);
203 neon_disambiguate_copy (operands, dest, src, 2);
207 [(set (match_operand:OI 0 "s_register_operand" "")
208 (match_operand:OI 1 "s_register_operand" ""))]
209 "TARGET_NEON && reload_completed"
210 [(set (match_dup 0) (match_dup 1))
211 (set (match_dup 2) (match_dup 3))]
213 int rdest = REGNO (operands[0]);
214 int rsrc = REGNO (operands[1]);
217 dest[0] = gen_rtx_REG (TImode, rdest);
218 src[0] = gen_rtx_REG (TImode, rsrc);
219 dest[1] = gen_rtx_REG (TImode, rdest + 4);
220 src[1] = gen_rtx_REG (TImode, rsrc + 4);
222 neon_disambiguate_copy (operands, dest, src, 2);
226 [(set (match_operand:CI 0 "s_register_operand" "")
227 (match_operand:CI 1 "s_register_operand" ""))]
228 "TARGET_NEON && reload_completed"
229 [(set (match_dup 0) (match_dup 1))
230 (set (match_dup 2) (match_dup 3))
231 (set (match_dup 4) (match_dup 5))]
233 int rdest = REGNO (operands[0]);
234 int rsrc = REGNO (operands[1]);
237 dest[0] = gen_rtx_REG (TImode, rdest);
238 src[0] = gen_rtx_REG (TImode, rsrc);
239 dest[1] = gen_rtx_REG (TImode, rdest + 4);
240 src[1] = gen_rtx_REG (TImode, rsrc + 4);
241 dest[2] = gen_rtx_REG (TImode, rdest + 8);
242 src[2] = gen_rtx_REG (TImode, rsrc + 8);
244 neon_disambiguate_copy (operands, dest, src, 3);
248 [(set (match_operand:XI 0 "s_register_operand" "")
249 (match_operand:XI 1 "s_register_operand" ""))]
250 "TARGET_NEON && reload_completed"
251 [(set (match_dup 0) (match_dup 1))
252 (set (match_dup 2) (match_dup 3))
253 (set (match_dup 4) (match_dup 5))
254 (set (match_dup 6) (match_dup 7))]
256 int rdest = REGNO (operands[0]);
257 int rsrc = REGNO (operands[1]);
260 dest[0] = gen_rtx_REG (TImode, rdest);
261 src[0] = gen_rtx_REG (TImode, rsrc);
262 dest[1] = gen_rtx_REG (TImode, rdest + 4);
263 src[1] = gen_rtx_REG (TImode, rsrc + 4);
264 dest[2] = gen_rtx_REG (TImode, rdest + 8);
265 src[2] = gen_rtx_REG (TImode, rsrc + 8);
266 dest[3] = gen_rtx_REG (TImode, rdest + 12);
267 src[3] = gen_rtx_REG (TImode, rsrc + 12);
269 neon_disambiguate_copy (operands, dest, src, 4);
272 (define_expand "movmisalign<mode>"
273 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275 UNSPEC_MISALIGNED_ACCESS))]
276 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
279 /* This pattern is not permitted to fail during expansion: if both arguments
280 are non-registers (e.g. memory := constant, which can be created by the
281 auto-vectorizer), force operand 1 into a register. */
282 if (!s_register_operand (operands[0], <MODE>mode)
283 && !s_register_operand (operands[1], <MODE>mode))
284 operands[1] = force_reg (<MODE>mode, operands[1]);
286 if (s_register_operand (operands[0], <MODE>mode))
287 adjust_mem = operands[1];
289 adjust_mem = operands[0];
291 /* Legitimize address. */
292 if (!neon_vector_mem_operand (adjust_mem, 2, true))
293 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
297 (define_insn "*movmisalign<mode>_neon_store"
298 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
299 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vst1.<V_sz_elem>\t{%P1}, %A0"
303 [(set_attr "type" "neon_store1_1reg<q>")])
305 (define_insn "*movmisalign<mode>_neon_load"
306 [(set (match_operand:VDX 0 "s_register_operand" "=w")
307 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
309 UNSPEC_MISALIGNED_ACCESS))]
310 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311 "vld1.<V_sz_elem>\t{%P0}, %A1"
312 [(set_attr "type" "neon_load1_1reg<q>")])
314 (define_insn "*movmisalign<mode>_neon_store"
315 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
316 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vst1.<V_sz_elem>\t{%q1}, %A0"
320 [(set_attr "type" "neon_store1_1reg<q>")])
322 (define_insn "*movmisalign<mode>_neon_load"
323 [(set (match_operand:VQX 0 "s_register_operand" "=w")
324 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
326 UNSPEC_MISALIGNED_ACCESS))]
327 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328 "vld1.<V_sz_elem>\t{%q0}, %A1"
329 [(set_attr "type" "neon_load1_1reg<q>")])
331 (define_insn "vec_set<mode>_internal"
332 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
334 (vec_duplicate:VD_LANE
335 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337 (match_operand:SI 2 "immediate_operand" "i,i")))]
340 int elt = ffs ((int) INTVAL (operands[2])) - 1;
341 if (BYTES_BIG_ENDIAN)
342 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343 operands[2] = GEN_INT (elt);
345 if (which_alternative == 0)
346 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
348 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
350 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
352 (define_insn "vec_set<mode>_internal"
353 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
356 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357 (match_operand:VQ2 3 "s_register_operand" "0,0")
358 (match_operand:SI 2 "immediate_operand" "i,i")))]
361 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363 int elt = elem % half_elts;
364 int hi = (elem / half_elts) * 2;
365 int regno = REGNO (operands[0]);
367 if (BYTES_BIG_ENDIAN)
368 elt = half_elts - 1 - elt;
370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371 operands[2] = GEN_INT (elt);
373 if (which_alternative == 0)
374 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
376 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
378 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
381 (define_insn "vec_setv2di_internal"
382 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
385 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386 (match_operand:V2DI 3 "s_register_operand" "0,0")
387 (match_operand:SI 2 "immediate_operand" "i,i")))]
390 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391 int regno = REGNO (operands[0]) + 2 * elem;
393 operands[0] = gen_rtx_REG (DImode, regno);
395 if (which_alternative == 0)
396 return "vld1.64\t%P0, %A1";
398 return "vmov\t%P0, %Q1, %R1";
400 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
403 (define_expand "vec_set<mode>"
404 [(match_operand:VDQ 0 "s_register_operand" "")
405 (match_operand:<V_elem> 1 "s_register_operand" "")
406 (match_operand:SI 2 "immediate_operand" "")]
409 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
410 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411 GEN_INT (elem), operands[0]));
415 (define_insn "vec_extract<mode>"
416 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
418 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
422 if (BYTES_BIG_ENDIAN)
424 int elt = INTVAL (operands[2]);
425 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426 operands[2] = GEN_INT (elt);
429 if (which_alternative == 0)
430 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
432 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
434 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
437 (define_insn "vec_extract<mode>"
438 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
440 (match_operand:VQ2 1 "s_register_operand" "w,w")
441 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
444 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445 int elt = INTVAL (operands[2]) % half_elts;
446 int hi = (INTVAL (operands[2]) / half_elts) * 2;
447 int regno = REGNO (operands[1]);
449 if (BYTES_BIG_ENDIAN)
450 elt = half_elts - 1 - elt;
452 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453 operands[2] = GEN_INT (elt);
455 if (which_alternative == 0)
456 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
458 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
460 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
463 (define_insn "vec_extractv2di"
464 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
466 (match_operand:V2DI 1 "s_register_operand" "w,w")
467 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
470 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
472 operands[1] = gen_rtx_REG (DImode, regno);
474 if (which_alternative == 0)
475 return "vst1.64\t{%P1}, %A0 @ v2di";
477 return "vmov\t%Q0, %R0, %P1 @ v2di";
479 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
482 (define_expand "vec_init<mode>"
483 [(match_operand:VDQ 0 "s_register_operand" "")
484 (match_operand 1 "" "")]
487 neon_expand_vector_init (operands[0], operands[1]);
491 ;; Doubleword and quadword arithmetic.
493 ;; NOTE: some other instructions also support 64-bit integer
494 ;; element size, which we could potentially use for "long long" operations.
496 (define_insn "*add<mode>3_neon"
497 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499 (match_operand:VDQ 2 "s_register_operand" "w")))]
500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
503 (if_then_else (match_test "<Is_float_mode>")
504 (const_string "neon_fp_addsub_s<q>")
505 (const_string "neon_add<q>")))]
508 (define_insn "add<mode>3_fp16"
510 (match_operand:VH 0 "s_register_operand" "=w")
512 (match_operand:VH 1 "s_register_operand" "w")
513 (match_operand:VH 2 "s_register_operand" "w")))]
514 "TARGET_NEON_FP16INST"
515 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
517 (if_then_else (match_test "<Is_float_mode>")
518 (const_string "neon_fp_addsub_s<q>")
519 (const_string "neon_add<q>")))]
522 (define_insn "adddi3_neon"
523 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
524 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
525 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
526 (clobber (reg:CC CC_REGNUM))]
529 switch (which_alternative)
531 case 0: /* fall through */
532 case 3: return "vadd.i64\t%P0, %P1, %P2";
538 default: gcc_unreachable ();
541 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
542 multiple,multiple,multiple")
543 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
544 (set_attr "length" "*,8,8,*,8,8,8")
545 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
548 (define_insn "*sub<mode>3_neon"
549 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
550 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
551 (match_operand:VDQ 2 "s_register_operand" "w")))]
552 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
553 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
555 (if_then_else (match_test "<Is_float_mode>")
556 (const_string "neon_fp_addsub_s<q>")
557 (const_string "neon_sub<q>")))]
560 (define_insn "sub<mode>3_fp16"
562 (match_operand:VH 0 "s_register_operand" "=w")
564 (match_operand:VH 1 "s_register_operand" "w")
565 (match_operand:VH 2 "s_register_operand" "w")))]
566 "TARGET_NEON_FP16INST"
567 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
568 [(set_attr "type" "neon_sub<q>")]
571 (define_insn "subdi3_neon"
572 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
573 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
574 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
575 (clobber (reg:CC CC_REGNUM))]
578 switch (which_alternative)
580 case 0: /* fall through */
581 case 4: return "vsub.i64\t%P0, %P1, %P2";
582 case 1: /* fall through */
583 case 2: /* fall through */
584 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
585 default: gcc_unreachable ();
588 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
589 (set_attr "conds" "*,clob,clob,clob,*")
590 (set_attr "length" "*,8,8,8,*")
591 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
594 (define_insn "*mul<mode>3_neon"
595 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
596 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
597 (match_operand:VDQW 2 "s_register_operand" "w")))]
598 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
599 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
601 (if_then_else (match_test "<Is_float_mode>")
602 (const_string "neon_fp_mul_s<q>")
603 (const_string "neon_mul_<V_elem_ch><q>")))]
606 (define_insn "mul<mode>3add<mode>_neon"
607 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
608 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
609 (match_operand:VDQW 3 "s_register_operand" "w"))
610 (match_operand:VDQW 1 "s_register_operand" "0")))]
611 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
612 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
614 (if_then_else (match_test "<Is_float_mode>")
615 (const_string "neon_fp_mla_s<q>")
616 (const_string "neon_mla_<V_elem_ch><q>")))]
619 (define_insn "mul<mode>3add<mode>_neon"
620 [(set (match_operand:VH 0 "s_register_operand" "=w")
621 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
622 (match_operand:VH 3 "s_register_operand" "w"))
623 (match_operand:VH 1 "s_register_operand" "0")))]
624 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
625 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
626 [(set_attr "type" "neon_fp_mla_s<q>")]
629 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
630 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
631 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
632 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
633 (match_operand:VDQW 3 "s_register_operand" "w"))))]
634 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
635 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
637 (if_then_else (match_test "<Is_float_mode>")
638 (const_string "neon_fp_mla_s<q>")
639 (const_string "neon_mla_<V_elem_ch><q>")))]
642 ;; Fused multiply-accumulate
643 ;; We define each insn twice here:
644 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
645 ;; to be able to use when converting to FMA.
646 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
647 (define_insn "fma<VCVTF:mode>4"
648 [(set (match_operand:VCVTF 0 "register_operand" "=w")
649 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
650 (match_operand:VCVTF 2 "register_operand" "w")
651 (match_operand:VCVTF 3 "register_operand" "0")))]
652 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
653 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
654 [(set_attr "type" "neon_fp_mla_s<q>")]
657 (define_insn "fma<VCVTF:mode>4_intrinsic"
658 [(set (match_operand:VCVTF 0 "register_operand" "=w")
659 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
660 (match_operand:VCVTF 2 "register_operand" "w")
661 (match_operand:VCVTF 3 "register_operand" "0")))]
662 "TARGET_NEON && TARGET_FMA"
663 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
664 [(set_attr "type" "neon_fp_mla_s<q>")]
667 ;; There is limited support for unsafe-math optimizations using the NEON FP16
668 ;; arithmetic instructions, so only the intrinsic is currently supported.
669 (define_insn "fma<VH:mode>4_intrinsic"
670 [(set (match_operand:VH 0 "register_operand" "=w")
672 (match_operand:VH 1 "register_operand" "w")
673 (match_operand:VH 2 "register_operand" "w")
674 (match_operand:VH 3 "register_operand" "0")))]
675 "TARGET_NEON_FP16INST"
676 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
677 [(set_attr "type" "neon_fp_mla_s<q>")]
680 (define_insn "*fmsub<VCVTF:mode>4"
681 [(set (match_operand:VCVTF 0 "register_operand" "=w")
682 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
683 (match_operand:VCVTF 2 "register_operand" "w")
684 (match_operand:VCVTF 3 "register_operand" "0")))]
685 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
686 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
687 [(set_attr "type" "neon_fp_mla_s<q>")]
690 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
691 [(set (match_operand:VCVTF 0 "register_operand" "=w")
693 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
694 (match_operand:VCVTF 2 "register_operand" "w")
695 (match_operand:VCVTF 3 "register_operand" "0")))]
696 "TARGET_NEON && TARGET_FMA"
697 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
698 [(set_attr "type" "neon_fp_mla_s<q>")]
701 (define_insn "fmsub<VH:mode>4_intrinsic"
702 [(set (match_operand:VH 0 "register_operand" "=w")
704 (neg:VH (match_operand:VH 1 "register_operand" "w"))
705 (match_operand:VH 2 "register_operand" "w")
706 (match_operand:VH 3 "register_operand" "0")))]
707 "TARGET_NEON_FP16INST"
708 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
709 [(set_attr "type" "neon_fp_mla_s<q>")]
712 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
713 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
714 (unspec:VCVTF [(match_operand:VCVTF 1
715 "s_register_operand" "w")]
717 "TARGET_NEON && TARGET_FPU_ARMV8"
718 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
719 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
722 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
723 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
724 (FIXUORS:<V_cmp_result> (unspec:VCVTF
725 [(match_operand:VCVTF 1 "register_operand" "w")]
727 "TARGET_NEON && TARGET_FPU_ARMV8"
728 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
729 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
730 (set_attr "predicable" "no")]
733 (define_insn "ior<mode>3"
734 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
735 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
736 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
739 switch (which_alternative)
741 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
742 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
743 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
744 default: gcc_unreachable ();
747 [(set_attr "type" "neon_logic<q>")]
750 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
751 ;; vorr. We support the pseudo-instruction vand instead, because that
752 ;; corresponds to the canonical form the middle-end expects to use for
753 ;; immediate bitwise-ANDs.
755 (define_insn "and<mode>3"
756 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
757 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
758 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
761 switch (which_alternative)
763 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
764 case 1: return neon_output_logic_immediate ("vand", &operands[2],
765 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
766 default: gcc_unreachable ();
769 [(set_attr "type" "neon_logic<q>")]
772 (define_insn "orn<mode>3_neon"
773 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
774 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
775 (match_operand:VDQ 1 "s_register_operand" "w")))]
777 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
778 [(set_attr "type" "neon_logic<q>")]
781 ;; TODO: investigate whether we should disable
782 ;; this and bicdi3_neon for the A8 in line with the other
784 (define_insn_and_split "orndi3_neon"
785 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
786 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
787 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
795 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
796 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
797 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
802 operands[3] = gen_highpart (SImode, operands[0]);
803 operands[0] = gen_lowpart (SImode, operands[0]);
804 operands[4] = gen_highpart (SImode, operands[2]);
805 operands[2] = gen_lowpart (SImode, operands[2]);
806 operands[5] = gen_highpart (SImode, operands[1]);
807 operands[1] = gen_lowpart (SImode, operands[1]);
811 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
812 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
816 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
817 (set_attr "length" "*,16,8,8")
818 (set_attr "arch" "any,a,t2,t2")]
821 (define_insn "bic<mode>3_neon"
822 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
823 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
824 (match_operand:VDQ 1 "s_register_operand" "w")))]
826 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
827 [(set_attr "type" "neon_logic<q>")]
830 ;; Compare to *anddi_notdi_di.
831 (define_insn "bicdi3_neon"
832 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
833 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
834 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
840 [(set_attr "type" "neon_logic,multiple,multiple")
841 (set_attr "length" "*,8,8")]
844 (define_insn "xor<mode>3"
845 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
846 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
847 (match_operand:VDQ 2 "s_register_operand" "w")))]
849 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
850 [(set_attr "type" "neon_logic<q>")]
853 (define_insn "one_cmpl<mode>2"
854 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
855 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
857 "vmvn\t%<V_reg>0, %<V_reg>1"
858 [(set_attr "type" "neon_move<q>")]
861 (define_insn "abs<mode>2"
862 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
863 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
865 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
867 (if_then_else (match_test "<Is_float_mode>")
868 (const_string "neon_fp_abs_s<q>")
869 (const_string "neon_abs<q>")))]
872 (define_insn "neg<mode>2"
873 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
874 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
876 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
878 (if_then_else (match_test "<Is_float_mode>")
879 (const_string "neon_fp_neg_s<q>")
880 (const_string "neon_neg<q>")))]
883 (define_insn "negdi2_neon"
884 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
885 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
886 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
887 (clobber (reg:CC CC_REGNUM))]
890 [(set_attr "length" "8")
891 (set_attr "type" "multiple")]
894 ; Split negdi2_neon for vfp registers
896 [(set (match_operand:DI 0 "s_register_operand" "")
897 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
898 (clobber (match_scratch:DI 2 ""))
899 (clobber (reg:CC CC_REGNUM))]
900 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
901 [(set (match_dup 2) (const_int 0))
902 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
903 (clobber (reg:CC CC_REGNUM))])]
905 if (!REG_P (operands[2]))
906 operands[2] = operands[0];
910 ; Split negdi2_neon for core registers
912 [(set (match_operand:DI 0 "s_register_operand" "")
913 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
914 (clobber (match_scratch:DI 2 ""))
915 (clobber (reg:CC CC_REGNUM))]
916 "TARGET_32BIT && reload_completed
917 && arm_general_register_operand (operands[0], DImode)"
918 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
919 (clobber (reg:CC CC_REGNUM))])]
923 (define_insn "<absneg_str><mode>2"
924 [(set (match_operand:VH 0 "s_register_operand" "=w")
925 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
926 "TARGET_NEON_FP16INST"
927 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
928 [(set_attr "type" "neon_abs<q>")]
931 (define_expand "neon_v<absneg_str><mode>"
933 (match_operand:VH 0 "s_register_operand")
934 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
935 "TARGET_NEON_FP16INST"
937 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
941 (define_insn "neon_v<fp16_rnd_str><mode>"
942 [(set (match_operand:VH 0 "s_register_operand" "=w")
944 [(match_operand:VH 1 "s_register_operand" "w")]
946 "TARGET_NEON_FP16INST"
947 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
948 [(set_attr "type" "neon_fp_round_s<q>")]
951 (define_insn "neon_vrsqrte<mode>"
952 [(set (match_operand:VH 0 "s_register_operand" "=w")
954 [(match_operand:VH 1 "s_register_operand" "w")]
956 "TARGET_NEON_FP16INST"
957 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
958 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
961 (define_insn "*umin<mode>3_neon"
962 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
963 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
964 (match_operand:VDQIW 2 "s_register_operand" "w")))]
966 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
967 [(set_attr "type" "neon_minmax<q>")]
970 (define_insn "*umax<mode>3_neon"
971 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
972 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
973 (match_operand:VDQIW 2 "s_register_operand" "w")))]
975 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
976 [(set_attr "type" "neon_minmax<q>")]
979 (define_insn "*smin<mode>3_neon"
980 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
981 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
982 (match_operand:VDQW 2 "s_register_operand" "w")))]
984 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
986 (if_then_else (match_test "<Is_float_mode>")
987 (const_string "neon_fp_minmax_s<q>")
988 (const_string "neon_minmax<q>")))]
991 (define_insn "*smax<mode>3_neon"
992 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
993 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
994 (match_operand:VDQW 2 "s_register_operand" "w")))]
996 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
998 (if_then_else (match_test "<Is_float_mode>")
999 (const_string "neon_fp_minmax_s<q>")
1000 (const_string "neon_minmax<q>")))]
1003 ; TODO: V2DI shifts are current disabled because there are bugs in the
1004 ; generic vectorizer code. It ends up creating a V2DI constructor with
1007 (define_insn "vashl<mode>3"
1008 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1009 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1010 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1013 switch (which_alternative)
1015 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1016 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1018 VALID_NEON_QREG_MODE (<MODE>mode),
1020 default: gcc_unreachable ();
1023 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1026 (define_insn "vashr<mode>3_imm"
1027 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1028 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1029 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1032 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1033 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1036 [(set_attr "type" "neon_shift_imm<q>")]
1039 (define_insn "vlshr<mode>3_imm"
1040 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1041 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1042 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1045 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1046 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1049 [(set_attr "type" "neon_shift_imm<q>")]
1052 ; Used for implementing logical shift-right, which is a left-shift by a negative
1053 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1054 ; above, but using an unspec in case GCC tries anything tricky with negative
1057 (define_insn "ashl<mode>3_signed"
1058 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1059 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1060 (match_operand:VDQI 2 "s_register_operand" "w")]
1061 UNSPEC_ASHIFT_SIGNED))]
1063 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1064 [(set_attr "type" "neon_shift_reg<q>")]
1067 ; Used for implementing logical shift-right, which is a left-shift by a negative
1068 ; amount, with unsigned operands.
1070 (define_insn "ashl<mode>3_unsigned"
1071 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1072 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1073 (match_operand:VDQI 2 "s_register_operand" "w")]
1074 UNSPEC_ASHIFT_UNSIGNED))]
1076 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1077 [(set_attr "type" "neon_shift_reg<q>")]
1080 (define_expand "vashr<mode>3"
1081 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1082 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1083 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1086 if (s_register_operand (operands[2], <MODE>mode))
1088 rtx neg = gen_reg_rtx (<MODE>mode);
1089 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1090 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1093 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1097 (define_expand "vlshr<mode>3"
1098 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1099 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1100 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1103 if (s_register_operand (operands[2], <MODE>mode))
1105 rtx neg = gen_reg_rtx (<MODE>mode);
1106 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1107 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1110 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1116 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1117 ;; leaving the upper half uninitalized. This is OK since the shift
1118 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1119 ;; data flow analysis however, we pretend the full register is set
1121 (define_insn "neon_load_count"
1122 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1123 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1124 UNSPEC_LOAD_COUNT))]
1127 vld1.32\t{%P0[0]}, %A1
1128 vmov.32\t%P0[0], %1"
1129 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1132 (define_insn "ashldi3_neon_noclobber"
1133 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1134 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1135 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1136 "TARGET_NEON && reload_completed
1137 && (!CONST_INT_P (operands[2])
1138 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1140 vshl.u64\t%P0, %P1, %2
1141 vshl.u64\t%P0, %P1, %P2"
1142 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1145 (define_insn_and_split "ashldi3_neon"
1146 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r, ?w,w")
1147 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w,w")
1148 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm,i")))
1149 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X, X,X"))
1150 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X, X,X"))
1151 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w,X"))
1152 (clobber (reg:CC_C CC_REGNUM))]
1155 "TARGET_NEON && reload_completed"
1159 if (IS_VFP_REGNUM (REGNO (operands[0])))
1161 if (CONST_INT_P (operands[2]))
1163 if (INTVAL (operands[2]) < 1)
1165 emit_insn (gen_movdi (operands[0], operands[1]));
1168 else if (INTVAL (operands[2]) > 63)
1169 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1173 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1174 operands[2] = operands[5];
1177 /* Ditch the unnecessary clobbers. */
1178 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1183 /* The shift expanders support either full overlap or no overlap. */
1184 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1185 || REGNO (operands[0]) == REGNO (operands[1]));
1187 if (operands[2] == CONST1_RTX (SImode))
1188 /* This clobbers CC. */
1189 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1191 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1192 operands[2], operands[3], operands[4]);
1196 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1197 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1198 (set_attr "type" "multiple")]
1201 ; The shift amount needs to be negated for right-shifts
1202 (define_insn "signed_shift_di3_neon"
1203 [(set (match_operand:DI 0 "s_register_operand" "=w")
1204 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1205 (match_operand:DI 2 "s_register_operand" " w")]
1206 UNSPEC_ASHIFT_SIGNED))]
1207 "TARGET_NEON && reload_completed"
1208 "vshl.s64\t%P0, %P1, %P2"
1209 [(set_attr "type" "neon_shift_reg")]
1212 ; The shift amount needs to be negated for right-shifts
1213 (define_insn "unsigned_shift_di3_neon"
1214 [(set (match_operand:DI 0 "s_register_operand" "=w")
1215 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1216 (match_operand:DI 2 "s_register_operand" " w")]
1217 UNSPEC_ASHIFT_UNSIGNED))]
1218 "TARGET_NEON && reload_completed"
1219 "vshl.u64\t%P0, %P1, %P2"
1220 [(set_attr "type" "neon_shift_reg")]
1223 (define_insn "ashrdi3_neon_imm_noclobber"
1224 [(set (match_operand:DI 0 "s_register_operand" "=w")
1225 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1226 (match_operand:DI 2 "const_int_operand" " i")))]
1227 "TARGET_NEON && reload_completed
1228 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1229 "vshr.s64\t%P0, %P1, %2"
1230 [(set_attr "type" "neon_shift_imm")]
1233 (define_insn "lshrdi3_neon_imm_noclobber"
1234 [(set (match_operand:DI 0 "s_register_operand" "=w")
1235 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1236 (match_operand:DI 2 "const_int_operand" " i")))]
1237 "TARGET_NEON && reload_completed
1238 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1239 "vshr.u64\t%P0, %P1, %2"
1240 [(set_attr "type" "neon_shift_imm")]
1245 (define_insn_and_split "<shift>di3_neon"
1246 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?&r,?w,?w")
1247 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1248 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1249 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1250 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1251 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1252 (clobber (reg:CC CC_REGNUM))]
1255 "TARGET_NEON && reload_completed"
1259 if (IS_VFP_REGNUM (REGNO (operands[0])))
1261 if (CONST_INT_P (operands[2]))
1263 if (INTVAL (operands[2]) < 1)
1265 emit_insn (gen_movdi (operands[0], operands[1]));
1268 else if (INTVAL (operands[2]) > 64)
1269 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1271 /* Ditch the unnecessary clobbers. */
1272 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1278 /* We must use a negative left-shift. */
1279 emit_insn (gen_negsi2 (operands[3], operands[2]));
1280 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1281 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1287 /* The shift expanders support either full overlap or no overlap. */
1288 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1289 || REGNO (operands[0]) == REGNO (operands[1]));
1291 if (operands[2] == CONST1_RTX (SImode))
1292 /* This clobbers CC. */
1293 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1295 /* This clobbers CC (ASHIFTRT by register only). */
1296 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1297 operands[2], operands[3], operands[4]);
1302 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1303 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1304 (set_attr "type" "multiple")]
1307 ;; Widening operations
1309 (define_expand "widen_ssum<mode>3"
1310 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1311 (plus:<V_double_width>
1312 (sign_extend:<V_double_width>
1313 (match_operand:VQI 1 "s_register_operand" ""))
1314 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1317 machine_mode mode = GET_MODE (operands[1]);
1320 p1 = arm_simd_vect_par_cnst_half (mode, false);
1321 p2 = arm_simd_vect_par_cnst_half (mode, true);
1323 if (operands[0] != operands[2])
1324 emit_move_insn (operands[0], operands[2]);
1326 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1330 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1338 (define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3"
1339 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1341 (sign_extend:<VW:V_widen>
1343 (match_operand:VQI 1 "s_register_operand" "%w")
1344 (match_operand:VQI 2 "vect_par_constant_low" "")))
1345 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1348 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1349 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1351 [(set_attr "type" "neon_add_widen")])
1353 (define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3"
1354 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1356 (sign_extend:<VW:V_widen>
1357 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1358 (match_operand:VQI 2 "vect_par_constant_high" "")))
1359 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1362 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1363 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1365 [(set_attr "type" "neon_add_widen")])
1367 (define_insn "widen_ssum<mode>3"
1368 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1370 (sign_extend:<V_widen>
1371 (match_operand:VW 1 "s_register_operand" "%w"))
1372 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1374 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1375 [(set_attr "type" "neon_add_widen")]
1378 (define_expand "widen_usum<mode>3"
1379 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1380 (plus:<V_double_width>
1381 (zero_extend:<V_double_width>
1382 (match_operand:VQI 1 "s_register_operand" ""))
1383 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1386 machine_mode mode = GET_MODE (operands[1]);
1389 p1 = arm_simd_vect_par_cnst_half (mode, false);
1390 p2 = arm_simd_vect_par_cnst_half (mode, true);
1392 if (operands[0] != operands[2])
1393 emit_move_insn (operands[0], operands[2]);
1395 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1399 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1407 (define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3"
1408 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1410 (zero_extend:<VW:V_widen>
1412 (match_operand:VQI 1 "s_register_operand" "%w")
1413 (match_operand:VQI 2 "vect_par_constant_low" "")))
1414 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1417 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1418 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1420 [(set_attr "type" "neon_add_widen")])
1422 (define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3"
1423 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1425 (zero_extend:<VW:V_widen>
1426 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1427 (match_operand:VQI 2 "vect_par_constant_high" "")))
1428 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1431 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1432 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1434 [(set_attr "type" "neon_add_widen")])
1436 (define_insn "widen_usum<mode>3"
1437 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1438 (plus:<V_widen> (zero_extend:<V_widen>
1439 (match_operand:VW 1 "s_register_operand" "%w"))
1440 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1442 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1443 [(set_attr "type" "neon_add_widen")]
1446 ;; Helpers for quad-word reduction operations
1448 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1449 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1450 ; N/2-element vector.
1452 (define_insn "quad_halves_<code>v4si"
1453 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1455 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1456 (parallel [(const_int 0) (const_int 1)]))
1457 (vec_select:V2SI (match_dup 1)
1458 (parallel [(const_int 2) (const_int 3)]))))]
1460 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1461 [(set_attr "vqh_mnem" "<VQH_mnem>")
1462 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1465 (define_insn "quad_halves_<code>v4sf"
1466 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1468 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1469 (parallel [(const_int 0) (const_int 1)]))
1470 (vec_select:V2SF (match_dup 1)
1471 (parallel [(const_int 2) (const_int 3)]))))]
1472 "TARGET_NEON && flag_unsafe_math_optimizations"
1473 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1474 [(set_attr "vqh_mnem" "<VQH_mnem>")
1475 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1478 (define_insn "quad_halves_<code>v8hi"
1479 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1481 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1482 (parallel [(const_int 0) (const_int 1)
1483 (const_int 2) (const_int 3)]))
1484 (vec_select:V4HI (match_dup 1)
1485 (parallel [(const_int 4) (const_int 5)
1486 (const_int 6) (const_int 7)]))))]
1488 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1489 [(set_attr "vqh_mnem" "<VQH_mnem>")
1490 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1493 (define_insn "quad_halves_<code>v16qi"
1494 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1496 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1497 (parallel [(const_int 0) (const_int 1)
1498 (const_int 2) (const_int 3)
1499 (const_int 4) (const_int 5)
1500 (const_int 6) (const_int 7)]))
1501 (vec_select:V8QI (match_dup 1)
1502 (parallel [(const_int 8) (const_int 9)
1503 (const_int 10) (const_int 11)
1504 (const_int 12) (const_int 13)
1505 (const_int 14) (const_int 15)]))))]
1507 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1508 [(set_attr "vqh_mnem" "<VQH_mnem>")
1509 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1512 (define_expand "move_hi_quad_<mode>"
1513 [(match_operand:ANY128 0 "s_register_operand" "")
1514 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1517 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1518 GET_MODE_SIZE (<V_HALF>mode)),
1523 (define_expand "move_lo_quad_<mode>"
1524 [(match_operand:ANY128 0 "s_register_operand" "")
1525 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1528 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1534 ;; Reduction operations
1536 (define_expand "reduc_plus_scal_<mode>"
1537 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1538 (match_operand:VD 1 "s_register_operand" "")]
1539 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1541 rtx vec = gen_reg_rtx (<MODE>mode);
1542 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1543 &gen_neon_vpadd_internal<mode>);
1544 /* The same result is actually computed into every element. */
1545 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1549 (define_expand "reduc_plus_scal_<mode>"
1550 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1551 (match_operand:VQ 1 "s_register_operand" "")]
1552 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1553 && !BYTES_BIG_ENDIAN"
1555 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1557 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1558 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1563 (define_expand "reduc_plus_scal_v2di"
1564 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1565 (match_operand:V2DI 1 "s_register_operand" "")]
1566 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1568 rtx vec = gen_reg_rtx (V2DImode);
1570 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1571 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1576 (define_insn "arm_reduc_plus_internal_v2di"
1577 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1578 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1580 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1581 "vadd.i64\t%e0, %e1, %f1"
1582 [(set_attr "type" "neon_add_q")]
1585 (define_expand "reduc_smin_scal_<mode>"
1586 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1587 (match_operand:VD 1 "s_register_operand" "")]
1588 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1590 rtx vec = gen_reg_rtx (<MODE>mode);
1592 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1593 &gen_neon_vpsmin<mode>);
1594 /* The result is computed into every element of the vector. */
1595 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1599 (define_expand "reduc_smin_scal_<mode>"
1600 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1601 (match_operand:VQ 1 "s_register_operand" "")]
1602 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1603 && !BYTES_BIG_ENDIAN"
1605 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1607 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1608 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1613 (define_expand "reduc_smax_scal_<mode>"
1614 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1615 (match_operand:VD 1 "s_register_operand" "")]
1616 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1618 rtx vec = gen_reg_rtx (<MODE>mode);
1619 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1620 &gen_neon_vpsmax<mode>);
1621 /* The result is computed into every element of the vector. */
1622 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1626 (define_expand "reduc_smax_scal_<mode>"
1627 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1628 (match_operand:VQ 1 "s_register_operand" "")]
1629 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1630 && !BYTES_BIG_ENDIAN"
1632 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1634 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1635 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1640 (define_expand "reduc_umin_scal_<mode>"
1641 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1642 (match_operand:VDI 1 "s_register_operand" "")]
1645 rtx vec = gen_reg_rtx (<MODE>mode);
1646 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1647 &gen_neon_vpumin<mode>);
1648 /* The result is computed into every element of the vector. */
1649 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1653 (define_expand "reduc_umin_scal_<mode>"
1654 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1655 (match_operand:VQI 1 "s_register_operand" "")]
1656 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1658 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1660 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1661 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1666 (define_expand "reduc_umax_scal_<mode>"
1667 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1668 (match_operand:VDI 1 "s_register_operand" "")]
1671 rtx vec = gen_reg_rtx (<MODE>mode);
1672 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1673 &gen_neon_vpumax<mode>);
1674 /* The result is computed into every element of the vector. */
1675 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1679 (define_expand "reduc_umax_scal_<mode>"
1680 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1681 (match_operand:VQI 1 "s_register_operand" "")]
1682 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1684 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1686 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1687 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1692 (define_insn "neon_vpadd_internal<mode>"
1693 [(set (match_operand:VD 0 "s_register_operand" "=w")
1694 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1695 (match_operand:VD 2 "s_register_operand" "w")]
1698 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1699 ;; Assume this schedules like vadd.
1701 (if_then_else (match_test "<Is_float_mode>")
1702 (const_string "neon_fp_reduc_add_s<q>")
1703 (const_string "neon_reduc_add<q>")))]
1706 (define_insn "neon_vpaddv4hf"
1708 (match_operand:V4HF 0 "s_register_operand" "=w")
1709 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1710 (match_operand:V4HF 2 "s_register_operand" "w")]
1712 "TARGET_NEON_FP16INST"
1713 "vpadd.f16\t%P0, %P1, %P2"
1714 [(set_attr "type" "neon_reduc_add")]
1717 (define_insn "neon_vpsmin<mode>"
1718 [(set (match_operand:VD 0 "s_register_operand" "=w")
1719 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1720 (match_operand:VD 2 "s_register_operand" "w")]
1723 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1725 (if_then_else (match_test "<Is_float_mode>")
1726 (const_string "neon_fp_reduc_minmax_s<q>")
1727 (const_string "neon_reduc_minmax<q>")))]
1730 (define_insn "neon_vpsmax<mode>"
1731 [(set (match_operand:VD 0 "s_register_operand" "=w")
1732 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1733 (match_operand:VD 2 "s_register_operand" "w")]
1736 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1738 (if_then_else (match_test "<Is_float_mode>")
1739 (const_string "neon_fp_reduc_minmax_s<q>")
1740 (const_string "neon_reduc_minmax<q>")))]
1743 (define_insn "neon_vpumin<mode>"
1744 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1745 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1746 (match_operand:VDI 2 "s_register_operand" "w")]
1749 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1750 [(set_attr "type" "neon_reduc_minmax<q>")]
1753 (define_insn "neon_vpumax<mode>"
1754 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1755 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1756 (match_operand:VDI 2 "s_register_operand" "w")]
1759 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1760 [(set_attr "type" "neon_reduc_minmax<q>")]
1763 ;; Saturating arithmetic
1765 ; NOTE: Neon supports many more saturating variants of instructions than the
1766 ; following, but these are all GCC currently understands.
1767 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1768 ; yet either, although these patterns may be used by intrinsics when they're
1771 (define_insn "*ss_add<mode>_neon"
1772 [(set (match_operand:VD 0 "s_register_operand" "=w")
1773 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1774 (match_operand:VD 2 "s_register_operand" "w")))]
1776 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1777 [(set_attr "type" "neon_qadd<q>")]
1780 (define_insn "*us_add<mode>_neon"
1781 [(set (match_operand:VD 0 "s_register_operand" "=w")
1782 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1783 (match_operand:VD 2 "s_register_operand" "w")))]
1785 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1786 [(set_attr "type" "neon_qadd<q>")]
1789 (define_insn "*ss_sub<mode>_neon"
1790 [(set (match_operand:VD 0 "s_register_operand" "=w")
1791 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1792 (match_operand:VD 2 "s_register_operand" "w")))]
1794 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1795 [(set_attr "type" "neon_qsub<q>")]
1798 (define_insn "*us_sub<mode>_neon"
1799 [(set (match_operand:VD 0 "s_register_operand" "=w")
1800 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1801 (match_operand:VD 2 "s_register_operand" "w")))]
1803 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1804 [(set_attr "type" "neon_qsub<q>")]
1807 ;; Conditional instructions. These are comparisons with conditional moves for
1808 ;; vectors. They perform the assignment:
1810 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1812 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1815 (define_expand "vcond<mode><mode>"
1816 [(set (match_operand:VDQW 0 "s_register_operand" "")
1818 (match_operator 3 "comparison_operator"
1819 [(match_operand:VDQW 4 "s_register_operand" "")
1820 (match_operand:VDQW 5 "nonmemory_operand" "")])
1821 (match_operand:VDQW 1 "s_register_operand" "")
1822 (match_operand:VDQW 2 "s_register_operand" "")))]
1823 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1826 int use_zero_form = 0;
1827 int swap_bsl_operands = 0;
1828 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1829 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1831 rtx (*base_comparison) (rtx, rtx, rtx);
1832 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1834 switch (GET_CODE (operands[3]))
1841 if (operands[5] == CONST0_RTX (<MODE>mode))
1848 if (!REG_P (operands[5]))
1849 operands[5] = force_reg (<MODE>mode, operands[5]);
1852 switch (GET_CODE (operands[3]))
1862 base_comparison = gen_neon_vcge<mode>;
1863 complimentary_comparison = gen_neon_vcgt<mode>;
1871 base_comparison = gen_neon_vcgt<mode>;
1872 complimentary_comparison = gen_neon_vcge<mode>;
1877 base_comparison = gen_neon_vceq<mode>;
1878 complimentary_comparison = gen_neon_vceq<mode>;
1884 switch (GET_CODE (operands[3]))
1891 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1892 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1898 Note that there also exist direct comparison against 0 forms,
1899 so catch those as a special case. */
1903 switch (GET_CODE (operands[3]))
1906 base_comparison = gen_neon_vclt<mode>;
1909 base_comparison = gen_neon_vcle<mode>;
1912 /* Do nothing, other zero form cases already have the correct
1919 emit_insn (base_comparison (mask, operands[4], operands[5]));
1921 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1928 /* Vector compare returns false for lanes which are unordered, so if we use
1929 the inverse of the comparison we actually want to emit, then
1930 swap the operands to BSL, we will end up with the correct result.
1931 Note that a NE NaN and NaN NE b are true for all a, b.
1933 Our transformations are:
1938 a NE b -> !(a EQ b) */
1941 emit_insn (base_comparison (mask, operands[4], operands[5]));
1943 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1945 swap_bsl_operands = 1;
1948 /* We check (a > b || b > a). combining these comparisons give us
1949 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1950 will then give us (a == b || a UNORDERED b) as intended. */
1952 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1953 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1954 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1955 swap_bsl_operands = 1;
1958 /* Operands are ORDERED iff (a > b || b >= a).
1959 Swapping the operands to BSL will give the UNORDERED case. */
1960 swap_bsl_operands = 1;
1963 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1964 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1965 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1971 if (swap_bsl_operands)
1972 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1975 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1980 (define_expand "vcondu<mode><mode>"
1981 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1983 (match_operator 3 "arm_comparison_operator"
1984 [(match_operand:VDQIW 4 "s_register_operand" "")
1985 (match_operand:VDQIW 5 "s_register_operand" "")])
1986 (match_operand:VDQIW 1 "s_register_operand" "")
1987 (match_operand:VDQIW 2 "s_register_operand" "")))]
1991 int inverse = 0, immediate_zero = 0;
1993 mask = gen_reg_rtx (<V_cmp_result>mode);
1995 if (operands[5] == CONST0_RTX (<MODE>mode))
1997 else if (!REG_P (operands[5]))
1998 operands[5] = force_reg (<MODE>mode, operands[5]);
2000 switch (GET_CODE (operands[3]))
2003 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2007 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2011 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2016 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2018 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2023 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2025 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2029 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2038 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2041 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2047 ;; Patterns for builtins.
2049 ; good for plain vadd, vaddq.
2051 (define_expand "neon_vadd<mode>"
2052 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2053 (match_operand:VCVTF 1 "s_register_operand" "w")
2054 (match_operand:VCVTF 2 "s_register_operand" "w")]
2057 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2058 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2060 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2065 (define_expand "neon_vadd<mode>"
2066 [(match_operand:VH 0 "s_register_operand")
2067 (match_operand:VH 1 "s_register_operand")
2068 (match_operand:VH 2 "s_register_operand")]
2069 "TARGET_NEON_FP16INST"
2071 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2075 (define_expand "neon_vsub<mode>"
2076 [(match_operand:VH 0 "s_register_operand")
2077 (match_operand:VH 1 "s_register_operand")
2078 (match_operand:VH 2 "s_register_operand")]
2079 "TARGET_NEON_FP16INST"
2081 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2085 ; Note that NEON operations don't support the full IEEE 754 standard: in
2086 ; particular, denormal values are flushed to zero. This means that GCC cannot
2087 ; use those instructions for autovectorization, etc. unless
2088 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2089 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2090 ; header) must work in either case: if -funsafe-math-optimizations is given,
2091 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2092 ; expand to unspecs (which may potentially limit the extent to which they might
2093 ; be optimized by generic code).
2095 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2097 (define_insn "neon_vadd<mode>_unspec"
2098 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2099 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2100 (match_operand:VCVTF 2 "s_register_operand" "w")]
2103 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2105 (if_then_else (match_test "<Is_float_mode>")
2106 (const_string "neon_fp_addsub_s<q>")
2107 (const_string "neon_add<q>")))]
2110 (define_insn "neon_vaddl<sup><mode>"
2111 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2112 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2113 (match_operand:VDI 2 "s_register_operand" "w")]
2116 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2117 [(set_attr "type" "neon_add_long")]
2120 (define_insn "neon_vaddw<sup><mode>"
2121 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2122 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2123 (match_operand:VDI 2 "s_register_operand" "w")]
2126 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2127 [(set_attr "type" "neon_add_widen")]
2132 (define_insn "neon_v<r>hadd<sup><mode>"
2133 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2134 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2135 (match_operand:VDQIW 2 "s_register_operand" "w")]
2138 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2139 [(set_attr "type" "neon_add_halve_q")]
2142 (define_insn "neon_vqadd<sup><mode>"
2143 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2144 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2145 (match_operand:VDQIX 2 "s_register_operand" "w")]
2148 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2149 [(set_attr "type" "neon_qadd<q>")]
2152 (define_insn "neon_v<r>addhn<mode>"
2153 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2154 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2155 (match_operand:VN 2 "s_register_operand" "w")]
2158 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2159 [(set_attr "type" "neon_add_halve_narrow_q")]
2162 ;; Polynomial and Float multiplication.
2163 (define_insn "neon_vmul<pf><mode>"
2164 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2165 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2166 (match_operand:VPF 2 "s_register_operand" "w")]
2169 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2171 (if_then_else (match_test "<Is_float_mode>")
2172 (const_string "neon_fp_mul_s<q>")
2173 (const_string "neon_mul_<V_elem_ch><q>")))]
2176 (define_insn "neon_vmulf<mode>"
2178 (match_operand:VH 0 "s_register_operand" "=w")
2180 (match_operand:VH 1 "s_register_operand" "w")
2181 (match_operand:VH 2 "s_register_operand" "w")))]
2182 "TARGET_NEON_FP16INST"
2183 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2184 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2187 (define_expand "neon_vmla<mode>"
2188 [(match_operand:VDQW 0 "s_register_operand" "=w")
2189 (match_operand:VDQW 1 "s_register_operand" "0")
2190 (match_operand:VDQW 2 "s_register_operand" "w")
2191 (match_operand:VDQW 3 "s_register_operand" "w")]
2194 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2195 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2196 operands[2], operands[3]));
2198 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2199 operands[2], operands[3]));
2203 (define_expand "neon_vfma<VCVTF:mode>"
2204 [(match_operand:VCVTF 0 "s_register_operand")
2205 (match_operand:VCVTF 1 "s_register_operand")
2206 (match_operand:VCVTF 2 "s_register_operand")
2207 (match_operand:VCVTF 3 "s_register_operand")]
2208 "TARGET_NEON && TARGET_FMA"
2210 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2215 (define_expand "neon_vfma<VH:mode>"
2216 [(match_operand:VH 0 "s_register_operand")
2217 (match_operand:VH 1 "s_register_operand")
2218 (match_operand:VH 2 "s_register_operand")
2219 (match_operand:VH 3 "s_register_operand")]
2220 "TARGET_NEON_FP16INST"
2222 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2227 (define_expand "neon_vfms<VCVTF:mode>"
2228 [(match_operand:VCVTF 0 "s_register_operand")
2229 (match_operand:VCVTF 1 "s_register_operand")
2230 (match_operand:VCVTF 2 "s_register_operand")
2231 (match_operand:VCVTF 3 "s_register_operand")]
2232 "TARGET_NEON && TARGET_FMA"
2234 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2239 (define_expand "neon_vfms<VH:mode>"
2240 [(match_operand:VH 0 "s_register_operand")
2241 (match_operand:VH 1 "s_register_operand")
2242 (match_operand:VH 2 "s_register_operand")
2243 (match_operand:VH 3 "s_register_operand")]
2244 "TARGET_NEON_FP16INST"
2246 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2251 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2253 (define_insn "neon_vmla<mode>_unspec"
2254 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2255 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2256 (match_operand:VDQW 2 "s_register_operand" "w")
2257 (match_operand:VDQW 3 "s_register_operand" "w")]
2260 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2262 (if_then_else (match_test "<Is_float_mode>")
2263 (const_string "neon_fp_mla_s<q>")
2264 (const_string "neon_mla_<V_elem_ch><q>")))]
2267 (define_insn "neon_vmlal<sup><mode>"
2268 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2269 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2270 (match_operand:VW 2 "s_register_operand" "w")
2271 (match_operand:VW 3 "s_register_operand" "w")]
2274 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2275 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2278 (define_expand "neon_vmls<mode>"
2279 [(match_operand:VDQW 0 "s_register_operand" "=w")
2280 (match_operand:VDQW 1 "s_register_operand" "0")
2281 (match_operand:VDQW 2 "s_register_operand" "w")
2282 (match_operand:VDQW 3 "s_register_operand" "w")]
2285 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2286 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2287 operands[1], operands[2], operands[3]));
2289 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2290 operands[2], operands[3]));
2294 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2296 (define_insn "neon_vmls<mode>_unspec"
2297 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2298 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2299 (match_operand:VDQW 2 "s_register_operand" "w")
2300 (match_operand:VDQW 3 "s_register_operand" "w")]
2303 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2305 (if_then_else (match_test "<Is_float_mode>")
2306 (const_string "neon_fp_mla_s<q>")
2307 (const_string "neon_mla_<V_elem_ch><q>")))]
2310 (define_insn "neon_vmlsl<sup><mode>"
2311 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2312 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2313 (match_operand:VW 2 "s_register_operand" "w")
2314 (match_operand:VW 3 "s_register_operand" "w")]
2317 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2318 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2321 ;; vqdmulh, vqrdmulh
2322 (define_insn "neon_vq<r>dmulh<mode>"
2323 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2324 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2325 (match_operand:VMDQI 2 "s_register_operand" "w")]
2328 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2329 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2332 ;; vqrdmlah, vqrdmlsh
2333 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2334 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2335 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2336 (match_operand:VMDQI 2 "s_register_operand" "w")
2337 (match_operand:VMDQI 3 "s_register_operand" "w")]
2340 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2341 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2344 (define_insn "neon_vqdmlal<mode>"
2345 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2346 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2347 (match_operand:VMDI 2 "s_register_operand" "w")
2348 (match_operand:VMDI 3 "s_register_operand" "w")]
2351 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2352 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2355 (define_insn "neon_vqdmlsl<mode>"
2356 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2357 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2358 (match_operand:VMDI 2 "s_register_operand" "w")
2359 (match_operand:VMDI 3 "s_register_operand" "w")]
2362 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2363 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2366 (define_insn "neon_vmull<sup><mode>"
2367 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2368 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2369 (match_operand:VW 2 "s_register_operand" "w")]
2372 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2373 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2376 (define_insn "neon_vqdmull<mode>"
2377 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2378 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2379 (match_operand:VMDI 2 "s_register_operand" "w")]
2382 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2383 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2386 (define_expand "neon_vsub<mode>"
2387 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2388 (match_operand:VCVTF 1 "s_register_operand" "w")
2389 (match_operand:VCVTF 2 "s_register_operand" "w")]
2392 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2393 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2395 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2400 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2402 (define_insn "neon_vsub<mode>_unspec"
2403 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2404 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2405 (match_operand:VCVTF 2 "s_register_operand" "w")]
2408 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2410 (if_then_else (match_test "<Is_float_mode>")
2411 (const_string "neon_fp_addsub_s<q>")
2412 (const_string "neon_sub<q>")))]
2415 (define_insn "neon_vsubl<sup><mode>"
2416 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2417 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2418 (match_operand:VDI 2 "s_register_operand" "w")]
2421 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2422 [(set_attr "type" "neon_sub_long")]
2425 (define_insn "neon_vsubw<sup><mode>"
2426 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2427 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2428 (match_operand:VDI 2 "s_register_operand" "w")]
2431 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2432 [(set_attr "type" "neon_sub_widen")]
2435 (define_insn "neon_vqsub<sup><mode>"
2436 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2437 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2438 (match_operand:VDQIX 2 "s_register_operand" "w")]
2441 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2442 [(set_attr "type" "neon_qsub<q>")]
2445 (define_insn "neon_vhsub<sup><mode>"
2446 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2447 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2448 (match_operand:VDQIW 2 "s_register_operand" "w")]
2451 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2452 [(set_attr "type" "neon_sub_halve<q>")]
2455 (define_insn "neon_v<r>subhn<mode>"
2456 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2457 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2458 (match_operand:VN 2 "s_register_operand" "w")]
2461 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2462 [(set_attr "type" "neon_sub_halve_narrow_q")]
2465 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2466 ;; without unsafe math optimizations.
2467 (define_expand "neon_vc<cmp_op><mode>"
2468 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2470 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2471 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2474 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2476 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2477 && !flag_unsafe_math_optimizations)
2479 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2480 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2481 whereas this expander iterates over the integer modes as well,
2482 but we will never expand to UNSPECs for the integer comparisons. */
2486 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2491 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2500 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2507 (define_insn "neon_vc<cmp_op><mode>_insn"
2508 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2510 (COMPARISONS:<V_cmp_result>
2511 (match_operand:VDQW 1 "s_register_operand" "w,w")
2512 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2513 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2514 && !flag_unsafe_math_optimizations)"
2517 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2519 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2520 ? "f" : "<cmp_type>",
2521 which_alternative == 0
2522 ? "%<V_reg>2" : "#0");
2523 output_asm_insn (pattern, operands);
2527 (if_then_else (match_operand 2 "zero_operand")
2528 (const_string "neon_compare_zero<q>")
2529 (const_string "neon_compare<q>")))]
2532 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2533 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2534 (unspec:<V_cmp_result>
2535 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2536 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2541 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2543 which_alternative == 0
2544 ? "%<V_reg>2" : "#0");
2545 output_asm_insn (pattern, operands);
2548 [(set_attr "type" "neon_fp_compare_s<q>")]
2551 (define_expand "neon_vc<cmp_op><mode>"
2552 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2555 (match_operand:VH 1 "s_register_operand")
2556 (match_operand:VH 2 "reg_or_zero_operand")))]
2557 "TARGET_NEON_FP16INST"
2559 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2561 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2562 && !flag_unsafe_math_optimizations)
2564 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2565 (operands[0], operands[1], operands[2]));
2568 (gen_neon_vc<cmp_op><mode>_fp16insn
2569 (operands[0], operands[1], operands[2]));
2573 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
2574 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2576 (COMPARISONS:<V_cmp_result>
2577 (match_operand:VH 1 "s_register_operand" "w,w")
2578 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2579 "TARGET_NEON_FP16INST
2580 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2581 && !flag_unsafe_math_optimizations)"
2584 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2586 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2587 ? "f" : "<cmp_type>",
2588 which_alternative == 0
2589 ? "%<V_reg>2" : "#0");
2590 output_asm_insn (pattern, operands);
2594 (if_then_else (match_operand 2 "zero_operand")
2595 (const_string "neon_compare_zero<q>")
2596 (const_string "neon_compare<q>")))])
2598 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2600 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2601 (unspec:<V_cmp_result>
2602 [(match_operand:VH 1 "s_register_operand" "w,w")
2603 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2605 "TARGET_NEON_FP16INST"
2608 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2610 which_alternative == 0
2611 ? "%<V_reg>2" : "#0");
2612 output_asm_insn (pattern, operands);
2615 [(set_attr "type" "neon_fp_compare_s<q>")])
2617 (define_insn "neon_vc<cmp_op>u<mode>"
2618 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2620 (GTUGEU:<V_cmp_result>
2621 (match_operand:VDQIW 1 "s_register_operand" "w")
2622 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2624 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2625 [(set_attr "type" "neon_compare<q>")]
2628 (define_expand "neon_vca<cmp_op><mode>"
2629 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2631 (GTGE:<V_cmp_result>
2632 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2633 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2636 if (flag_unsafe_math_optimizations)
2637 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2640 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2647 (define_insn "neon_vca<cmp_op><mode>_insn"
2648 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2650 (GTGE:<V_cmp_result>
2651 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2652 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2653 "TARGET_NEON && flag_unsafe_math_optimizations"
2654 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2655 [(set_attr "type" "neon_fp_compare_s<q>")]
2658 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2659 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2660 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2661 (match_operand:VCVTF 2 "s_register_operand" "w")]
2664 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2665 [(set_attr "type" "neon_fp_compare_s<q>")]
2668 (define_expand "neon_vca<cmp_op><mode>"
2670 (match_operand:<V_cmp_result> 0 "s_register_operand")
2672 (GLTE:<V_cmp_result>
2673 (abs:VH (match_operand:VH 1 "s_register_operand"))
2674 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2675 "TARGET_NEON_FP16INST"
2677 if (flag_unsafe_math_optimizations)
2678 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2679 (operands[0], operands[1], operands[2]));
2681 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2682 (operands[0], operands[1], operands[2]));
2686 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
2688 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2690 (GLTE:<V_cmp_result>
2691 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2692 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2693 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2694 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2695 [(set_attr "type" "neon_fp_compare_s<q>")]
2698 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2699 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2700 (unspec:<V_cmp_result>
2701 [(match_operand:VH 1 "s_register_operand" "w")
2702 (match_operand:VH 2 "s_register_operand" "w")]
2705 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2706 [(set_attr "type" "neon_fp_compare_s<q>")]
2709 (define_expand "neon_vc<cmp_op>z<mode>"
2711 (match_operand:<V_cmp_result> 0 "s_register_operand")
2712 (COMPARISONS:<V_cmp_result>
2713 (match_operand:VH 1 "s_register_operand")
2715 "TARGET_NEON_FP16INST"
2717 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2718 CONST0_RTX (<MODE>mode)));
2722 (define_insn "neon_vtst<mode>"
2723 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2724 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2725 (match_operand:VDQIW 2 "s_register_operand" "w")]
2728 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2729 [(set_attr "type" "neon_tst<q>")]
2732 (define_insn "neon_vabd<sup><mode>"
2733 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2734 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2735 (match_operand:VDQIW 2 "s_register_operand" "w")]
2738 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2739 [(set_attr "type" "neon_abd<q>")]
2742 (define_insn "neon_vabd<mode>"
2743 [(set (match_operand:VH 0 "s_register_operand" "=w")
2744 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2745 (match_operand:VH 2 "s_register_operand" "w")]
2747 "TARGET_NEON_FP16INST"
2748 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2749 [(set_attr "type" "neon_abd<q>")]
2752 (define_insn "neon_vabdf<mode>"
2753 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2754 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2755 (match_operand:VCVTF 2 "s_register_operand" "w")]
2758 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2759 [(set_attr "type" "neon_fp_abd_s<q>")]
2762 (define_insn "neon_vabdl<sup><mode>"
2763 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2764 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2765 (match_operand:VW 2 "s_register_operand" "w")]
2768 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2769 [(set_attr "type" "neon_abd_long")]
2772 (define_insn "neon_vaba<sup><mode>"
2773 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2774 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2775 (match_operand:VDQIW 3 "s_register_operand" "w")]
2777 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2779 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2780 [(set_attr "type" "neon_arith_acc<q>")]
2783 (define_insn "neon_vabal<sup><mode>"
2784 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2785 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2786 (match_operand:VW 3 "s_register_operand" "w")]
2788 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2790 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2791 [(set_attr "type" "neon_arith_acc<q>")]
2794 (define_insn "neon_v<maxmin><sup><mode>"
2795 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2796 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2797 (match_operand:VDQIW 2 "s_register_operand" "w")]
2800 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2801 [(set_attr "type" "neon_minmax<q>")]
2804 (define_insn "neon_v<maxmin>f<mode>"
2805 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2806 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2807 (match_operand:VCVTF 2 "s_register_operand" "w")]
2810 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2811 [(set_attr "type" "neon_fp_minmax_s<q>")]
2814 (define_insn "neon_v<maxmin>f<mode>"
2815 [(set (match_operand:VH 0 "s_register_operand" "=w")
2817 [(match_operand:VH 1 "s_register_operand" "w")
2818 (match_operand:VH 2 "s_register_operand" "w")]
2820 "TARGET_NEON_FP16INST"
2821 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2822 [(set_attr "type" "neon_fp_minmax_s<q>")]
2825 (define_insn "neon_vp<maxmin>fv4hf"
2826 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2828 [(match_operand:V4HF 1 "s_register_operand" "w")
2829 (match_operand:V4HF 2 "s_register_operand" "w")]
2831 "TARGET_NEON_FP16INST"
2832 "vp<maxmin>.f16\t%P0, %P1, %P2"
2833 [(set_attr "type" "neon_reduc_minmax")]
2836 (define_insn "neon_<fmaxmin_op><mode>"
2838 (match_operand:VH 0 "s_register_operand" "=w")
2840 [(match_operand:VH 1 "s_register_operand" "w")
2841 (match_operand:VH 2 "s_register_operand" "w")]
2843 "TARGET_NEON_FP16INST"
2844 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2845 [(set_attr "type" "neon_fp_minmax_s<q>")]
2848 ;; v<maxmin>nm intrinsics.
2849 (define_insn "neon_<fmaxmin_op><mode>"
2850 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2851 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2852 (match_operand:VCVTF 2 "s_register_operand" "w")]
2854 "TARGET_NEON && TARGET_FPU_ARMV8"
2855 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2856 [(set_attr "type" "neon_fp_minmax_s<q>")]
2859 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2860 (define_insn "<fmaxmin><mode>3"
2861 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2862 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2863 (match_operand:VCVTF 2 "s_register_operand" "w")]
2865 "TARGET_NEON && TARGET_FPU_ARMV8"
2866 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2867 [(set_attr "type" "neon_fp_minmax_s<q>")]
2870 (define_expand "neon_vpadd<mode>"
2871 [(match_operand:VD 0 "s_register_operand" "=w")
2872 (match_operand:VD 1 "s_register_operand" "w")
2873 (match_operand:VD 2 "s_register_operand" "w")]
2876 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2881 (define_insn "neon_vpaddl<sup><mode>"
2882 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2883 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2886 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2887 [(set_attr "type" "neon_reduc_add_long")]
2890 (define_insn "neon_vpadal<sup><mode>"
2891 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2892 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2893 (match_operand:VDQIW 2 "s_register_operand" "w")]
2896 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2897 [(set_attr "type" "neon_reduc_add_acc")]
2900 (define_insn "neon_vp<maxmin><sup><mode>"
2901 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2902 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2903 (match_operand:VDI 2 "s_register_operand" "w")]
2906 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2907 [(set_attr "type" "neon_reduc_minmax<q>")]
2910 (define_insn "neon_vp<maxmin>f<mode>"
2911 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2912 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2913 (match_operand:VCVTF 2 "s_register_operand" "w")]
2916 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2917 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2920 (define_insn "neon_vrecps<mode>"
2921 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2922 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2923 (match_operand:VCVTF 2 "s_register_operand" "w")]
2926 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2927 [(set_attr "type" "neon_fp_recps_s<q>")]
2930 (define_insn "neon_vrecps<mode>"
2932 (match_operand:VH 0 "s_register_operand" "=w")
2933 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2934 (match_operand:VH 2 "s_register_operand" "w")]
2936 "TARGET_NEON_FP16INST"
2937 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2938 [(set_attr "type" "neon_fp_recps_s<q>")]
2941 (define_insn "neon_vrsqrts<mode>"
2942 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2943 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2944 (match_operand:VCVTF 2 "s_register_operand" "w")]
2947 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2948 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2951 (define_insn "neon_vrsqrts<mode>"
2953 (match_operand:VH 0 "s_register_operand" "=w")
2954 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2955 (match_operand:VH 2 "s_register_operand" "w")]
2957 "TARGET_NEON_FP16INST"
2958 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2959 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2962 (define_expand "neon_vabs<mode>"
2963 [(match_operand:VDQW 0 "s_register_operand" "")
2964 (match_operand:VDQW 1 "s_register_operand" "")]
2967 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2971 (define_insn "neon_vqabs<mode>"
2972 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2973 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2976 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2977 [(set_attr "type" "neon_qabs<q>")]
2980 (define_insn "neon_bswap<mode>"
2981 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2982 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2984 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2985 [(set_attr "type" "neon_rev<q>")]
2988 (define_expand "neon_vneg<mode>"
2989 [(match_operand:VDQW 0 "s_register_operand" "")
2990 (match_operand:VDQW 1 "s_register_operand" "")]
2993 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2997 (define_expand "neon_copysignf<mode>"
2998 [(match_operand:VCVTF 0 "register_operand")
2999 (match_operand:VCVTF 1 "register_operand")
3000 (match_operand:VCVTF 2 "register_operand")]
3004 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3005 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
3006 rtvec v = rtvec_alloc (n_elt);
3008 /* Create bitmask for vector select. */
3009 for (i = 0; i < n_elt; ++i)
3010 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
3012 emit_move_insn (v_bitmask,
3013 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
3014 emit_move_insn (operands[0], operands[2]);
3015 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3016 <VCVTF:V_cmp_result>mode, 0);
3017 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3024 (define_insn "neon_vqneg<mode>"
3025 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3026 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3029 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3030 [(set_attr "type" "neon_qneg<q>")]
3033 (define_insn "neon_vcls<mode>"
3034 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3035 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3038 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3039 [(set_attr "type" "neon_cls<q>")]
3042 (define_insn "clz<mode>2"
3043 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3044 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3046 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3047 [(set_attr "type" "neon_cnt<q>")]
3050 (define_expand "neon_vclz<mode>"
3051 [(match_operand:VDQIW 0 "s_register_operand" "")
3052 (match_operand:VDQIW 1 "s_register_operand" "")]
3055 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3059 (define_insn "popcount<mode>2"
3060 [(set (match_operand:VE 0 "s_register_operand" "=w")
3061 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3063 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3064 [(set_attr "type" "neon_cnt<q>")]
3067 (define_expand "neon_vcnt<mode>"
3068 [(match_operand:VE 0 "s_register_operand" "=w")
3069 (match_operand:VE 1 "s_register_operand" "w")]
3072 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3076 (define_insn "neon_vrecpe<mode>"
3077 [(set (match_operand:VH 0 "s_register_operand" "=w")
3078 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3080 "TARGET_NEON_FP16INST"
3081 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3082 [(set_attr "type" "neon_fp_recpe_s<q>")]
3085 (define_insn "neon_vrecpe<mode>"
3086 [(set (match_operand:V32 0 "s_register_operand" "=w")
3087 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3090 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3091 [(set_attr "type" "neon_fp_recpe_s<q>")]
3094 (define_insn "neon_vrsqrte<mode>"
3095 [(set (match_operand:V32 0 "s_register_operand" "=w")
3096 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3099 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3100 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3103 (define_expand "neon_vmvn<mode>"
3104 [(match_operand:VDQIW 0 "s_register_operand" "")
3105 (match_operand:VDQIW 1 "s_register_operand" "")]
3108 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3112 (define_insn "neon_vget_lane<mode>_sext_internal"
3113 [(set (match_operand:SI 0 "s_register_operand" "=r")
3115 (vec_select:<V_elem>
3116 (match_operand:VD 1 "s_register_operand" "w")
3117 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3120 if (BYTES_BIG_ENDIAN)
3122 int elt = INTVAL (operands[2]);
3123 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3124 operands[2] = GEN_INT (elt);
3126 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3128 [(set_attr "type" "neon_to_gp")]
3131 (define_insn "neon_vget_lane<mode>_zext_internal"
3132 [(set (match_operand:SI 0 "s_register_operand" "=r")
3134 (vec_select:<V_elem>
3135 (match_operand:VD 1 "s_register_operand" "w")
3136 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3139 if (BYTES_BIG_ENDIAN)
3141 int elt = INTVAL (operands[2]);
3142 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3143 operands[2] = GEN_INT (elt);
3145 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3147 [(set_attr "type" "neon_to_gp")]
3150 (define_insn "neon_vget_lane<mode>_sext_internal"
3151 [(set (match_operand:SI 0 "s_register_operand" "=r")
3153 (vec_select:<V_elem>
3154 (match_operand:VQ2 1 "s_register_operand" "w")
3155 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3159 int regno = REGNO (operands[1]);
3160 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3161 unsigned int elt = INTVAL (operands[2]);
3162 unsigned int elt_adj = elt % halfelts;
3164 if (BYTES_BIG_ENDIAN)
3165 elt_adj = halfelts - 1 - elt_adj;
3167 ops[0] = operands[0];
3168 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3169 ops[2] = GEN_INT (elt_adj);
3170 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3174 [(set_attr "type" "neon_to_gp_q")]
3177 (define_insn "neon_vget_lane<mode>_zext_internal"
3178 [(set (match_operand:SI 0 "s_register_operand" "=r")
3180 (vec_select:<V_elem>
3181 (match_operand:VQ2 1 "s_register_operand" "w")
3182 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3186 int regno = REGNO (operands[1]);
3187 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3188 unsigned int elt = INTVAL (operands[2]);
3189 unsigned int elt_adj = elt % halfelts;
3191 if (BYTES_BIG_ENDIAN)
3192 elt_adj = halfelts - 1 - elt_adj;
3194 ops[0] = operands[0];
3195 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3196 ops[2] = GEN_INT (elt_adj);
3197 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3201 [(set_attr "type" "neon_to_gp_q")]
3204 (define_expand "neon_vget_lane<mode>"
3205 [(match_operand:<V_ext> 0 "s_register_operand" "")
3206 (match_operand:VDQW 1 "s_register_operand" "")
3207 (match_operand:SI 2 "immediate_operand" "")]
3210 if (BYTES_BIG_ENDIAN)
3212 /* The intrinsics are defined in terms of a model where the
3213 element ordering in memory is vldm order, whereas the generic
3214 RTL is defined in terms of a model where the element ordering
3215 in memory is array order. Convert the lane number to conform
3217 unsigned int elt = INTVAL (operands[2]);
3218 unsigned int reg_nelts
3219 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3220 elt ^= reg_nelts - 1;
3221 operands[2] = GEN_INT (elt);
3224 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3225 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
3227 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3233 (define_expand "neon_vget_laneu<mode>"
3234 [(match_operand:<V_ext> 0 "s_register_operand" "")
3235 (match_operand:VDQIW 1 "s_register_operand" "")
3236 (match_operand:SI 2 "immediate_operand" "")]
3239 if (BYTES_BIG_ENDIAN)
3241 /* The intrinsics are defined in terms of a model where the
3242 element ordering in memory is vldm order, whereas the generic
3243 RTL is defined in terms of a model where the element ordering
3244 in memory is array order. Convert the lane number to conform
3246 unsigned int elt = INTVAL (operands[2]);
3247 unsigned int reg_nelts
3248 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3249 elt ^= reg_nelts - 1;
3250 operands[2] = GEN_INT (elt);
3253 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3254 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
3256 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3262 (define_expand "neon_vget_lanedi"
3263 [(match_operand:DI 0 "s_register_operand" "=r")
3264 (match_operand:DI 1 "s_register_operand" "w")
3265 (match_operand:SI 2 "immediate_operand" "")]
3268 emit_move_insn (operands[0], operands[1]);
3272 (define_expand "neon_vget_lanev2di"
3273 [(match_operand:DI 0 "s_register_operand" "")
3274 (match_operand:V2DI 1 "s_register_operand" "")
3275 (match_operand:SI 2 "immediate_operand" "")]
3280 if (BYTES_BIG_ENDIAN)
3282 /* The intrinsics are defined in terms of a model where the
3283 element ordering in memory is vldm order, whereas the generic
3284 RTL is defined in terms of a model where the element ordering
3285 in memory is array order. Convert the lane number to conform
3287 unsigned int elt = INTVAL (operands[2]);
3288 unsigned int reg_nelts = 2;
3289 elt ^= reg_nelts - 1;
3290 operands[2] = GEN_INT (elt);
3293 lane = INTVAL (operands[2]);
3294 gcc_assert ((lane ==0) || (lane == 1));
3295 emit_move_insn (operands[0], lane == 0
3296 ? gen_lowpart (DImode, operands[1])
3297 : gen_highpart (DImode, operands[1]));
3301 (define_expand "neon_vset_lane<mode>"
3302 [(match_operand:VDQ 0 "s_register_operand" "=w")
3303 (match_operand:<V_elem> 1 "s_register_operand" "r")
3304 (match_operand:VDQ 2 "s_register_operand" "0")
3305 (match_operand:SI 3 "immediate_operand" "i")]
3308 unsigned int elt = INTVAL (operands[3]);
3310 if (BYTES_BIG_ENDIAN)
3312 unsigned int reg_nelts
3313 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3314 elt ^= reg_nelts - 1;
3317 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3318 GEN_INT (1 << elt), operands[2]));
3322 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3324 (define_expand "neon_vset_lanedi"
3325 [(match_operand:DI 0 "s_register_operand" "=w")
3326 (match_operand:DI 1 "s_register_operand" "r")
3327 (match_operand:DI 2 "s_register_operand" "0")
3328 (match_operand:SI 3 "immediate_operand" "i")]
3331 emit_move_insn (operands[0], operands[1]);
3335 (define_expand "neon_vcreate<mode>"
3336 [(match_operand:VD_RE 0 "s_register_operand" "")
3337 (match_operand:DI 1 "general_operand" "")]
3340 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3341 emit_move_insn (operands[0], src);
3345 (define_insn "neon_vdup_n<mode>"
3346 [(set (match_operand:VX 0 "s_register_operand" "=w")
3347 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3349 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3350 [(set_attr "type" "neon_from_gp<q>")]
3353 (define_insn "neon_vdup_nv4hf"
3354 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3355 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3358 [(set_attr "type" "neon_from_gp")]
3361 (define_insn "neon_vdup_nv8hf"
3362 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3363 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3366 [(set_attr "type" "neon_from_gp_q")]
3369 (define_insn "neon_vdup_n<mode>"
3370 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3371 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3374 vdup.<V_sz_elem>\t%<V_reg>0, %1
3375 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3376 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3379 (define_expand "neon_vdup_ndi"
3380 [(match_operand:DI 0 "s_register_operand" "=w")
3381 (match_operand:DI 1 "s_register_operand" "r")]
3384 emit_move_insn (operands[0], operands[1]);
3389 (define_insn "neon_vdup_nv2di"
3390 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3391 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3394 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3395 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3396 [(set_attr "length" "8")
3397 (set_attr "type" "multiple")]
3400 (define_insn "neon_vdup_lane<mode>_internal"
3401 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3403 (vec_select:<V_elem>
3404 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3405 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3408 if (BYTES_BIG_ENDIAN)
3410 int elt = INTVAL (operands[2]);
3411 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3412 operands[2] = GEN_INT (elt);
3415 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3417 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3419 [(set_attr "type" "neon_dup<q>")]
3422 (define_insn "neon_vdup_lane<mode>_internal"
3423 [(set (match_operand:VH 0 "s_register_operand" "=w")
3425 (vec_select:<V_elem>
3426 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3427 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3428 "TARGET_NEON && TARGET_FP16"
3430 if (BYTES_BIG_ENDIAN)
3432 int elt = INTVAL (operands[2]);
3433 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3434 operands[2] = GEN_INT (elt);
3437 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3439 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3441 [(set_attr "type" "neon_dup<q>")]
3444 (define_expand "neon_vdup_lane<mode>"
3445 [(match_operand:VDQW 0 "s_register_operand" "=w")
3446 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3447 (match_operand:SI 2 "immediate_operand" "i")]
3450 if (BYTES_BIG_ENDIAN)
3452 unsigned int elt = INTVAL (operands[2]);
3453 unsigned int reg_nelts
3454 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3455 elt ^= reg_nelts - 1;
3456 operands[2] = GEN_INT (elt);
3458 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3463 (define_expand "neon_vdup_lane<mode>"
3464 [(match_operand:VH 0 "s_register_operand")
3465 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3466 (match_operand:SI 2 "immediate_operand")]
3467 "TARGET_NEON && TARGET_FP16"
3469 if (BYTES_BIG_ENDIAN)
3471 unsigned int elt = INTVAL (operands[2]);
3472 unsigned int reg_nelts
3473 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3474 elt ^= reg_nelts - 1;
3475 operands[2] = GEN_INT (elt);
3477 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3482 ; Scalar index is ignored, since only zero is valid here.
3483 (define_expand "neon_vdup_lanedi"
3484 [(match_operand:DI 0 "s_register_operand" "=w")
3485 (match_operand:DI 1 "s_register_operand" "w")
3486 (match_operand:SI 2 "immediate_operand" "i")]
3489 emit_move_insn (operands[0], operands[1]);
3493 ; Likewise for v2di, as the DImode second operand has only a single element.
3494 (define_expand "neon_vdup_lanev2di"
3495 [(match_operand:V2DI 0 "s_register_operand" "=w")
3496 (match_operand:DI 1 "s_register_operand" "w")
3497 (match_operand:SI 2 "immediate_operand" "i")]
3500 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3504 ; Disabled before reload because we don't want combine doing something silly,
3505 ; but used by the post-reload expansion of neon_vcombine.
3506 (define_insn "*neon_vswp<mode>"
3507 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3508 (match_operand:VDQX 1 "s_register_operand" "+w"))
3509 (set (match_dup 1) (match_dup 0))]
3510 "TARGET_NEON && reload_completed"
3511 "vswp\t%<V_reg>0, %<V_reg>1"
3512 [(set_attr "type" "neon_permute<q>")]
3515 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3517 ;; FIXME: A different implementation of this builtin could make it much
3518 ;; more likely that we wouldn't actually need to output anything (we could make
3519 ;; it so that the reg allocator puts things in the right places magically
3520 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3522 (define_insn_and_split "neon_vcombine<mode>"
3523 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3524 (vec_concat:<V_DOUBLE>
3525 (match_operand:VDX 1 "s_register_operand" "w")
3526 (match_operand:VDX 2 "s_register_operand" "w")))]
3529 "&& reload_completed"
3532 neon_split_vcombine (operands);
3535 [(set_attr "type" "multiple")]
3538 (define_expand "neon_vget_high<mode>"
3539 [(match_operand:<V_HALF> 0 "s_register_operand")
3540 (match_operand:VQX 1 "s_register_operand")]
3543 emit_move_insn (operands[0],
3544 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3545 GET_MODE_SIZE (<V_HALF>mode)));
3549 (define_expand "neon_vget_low<mode>"
3550 [(match_operand:<V_HALF> 0 "s_register_operand")
3551 (match_operand:VQX 1 "s_register_operand")]
3554 emit_move_insn (operands[0],
3555 simplify_gen_subreg (<V_HALF>mode, operands[1],
3560 (define_insn "float<mode><V_cvtto>2"
3561 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3562 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3563 "TARGET_NEON && !flag_rounding_math"
3564 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3565 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3568 (define_insn "floatuns<mode><V_cvtto>2"
3569 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3570 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3571 "TARGET_NEON && !flag_rounding_math"
3572 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3573 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3576 (define_insn "fix_trunc<mode><V_cvtto>2"
3577 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3578 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3580 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3581 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3584 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3585 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3586 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3588 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3589 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3592 (define_insn "neon_vcvt<sup><mode>"
3593 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3594 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3597 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3598 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3601 (define_insn "neon_vcvt<sup><mode>"
3602 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3603 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3606 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3607 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3610 (define_insn "neon_vcvtv4sfv4hf"
3611 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3612 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3614 "TARGET_NEON && TARGET_FP16"
3615 "vcvt.f32.f16\t%q0, %P1"
3616 [(set_attr "type" "neon_fp_cvt_widen_h")]
3619 (define_insn "neon_vcvtv4hfv4sf"
3620 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3621 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3623 "TARGET_NEON && TARGET_FP16"
3624 "vcvt.f16.f32\t%P0, %q1"
3625 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3628 (define_insn "neon_vcvt<sup><mode>"
3630 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3632 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
3634 "TARGET_NEON_FP16INST"
3635 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
3636 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3639 (define_insn "neon_vcvt<sup><mode>"
3641 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3643 [(match_operand:VH 1 "s_register_operand" "w")]
3645 "TARGET_NEON_FP16INST"
3646 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3647 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3650 (define_insn "neon_vcvt<sup>_n<mode>"
3651 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3652 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3653 (match_operand:SI 2 "immediate_operand" "i")]
3657 arm_const_bounds (operands[2], 1, 33);
3658 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3660 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3663 (define_insn "neon_vcvt<sup>_n<mode>"
3664 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3666 [(match_operand:VH 1 "s_register_operand" "w")
3667 (match_operand:SI 2 "immediate_operand" "i")]
3669 "TARGET_NEON_FP16INST"
3671 arm_const_bounds (operands[2], 0, 17);
3672 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
3674 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3677 (define_insn "neon_vcvt<sup>_n<mode>"
3678 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3679 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3680 (match_operand:SI 2 "immediate_operand" "i")]
3684 arm_const_bounds (operands[2], 1, 33);
3685 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3687 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3690 (define_insn "neon_vcvt<sup>_n<mode>"
3691 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3693 [(match_operand:VCVTHI 1 "s_register_operand" "w")
3694 (match_operand:SI 2 "immediate_operand" "i")]
3696 "TARGET_NEON_FP16INST"
3698 arm_const_bounds (operands[2], 0, 17);
3699 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
3701 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
3704 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
3706 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
3708 [(match_operand:VH 1 "s_register_operand" "w")]
3710 "TARGET_NEON_FP16INST"
3711 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
3712 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
3715 (define_insn "neon_vmovn<mode>"
3716 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3717 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3720 "vmovn.<V_if_elem>\t%P0, %q1"
3721 [(set_attr "type" "neon_shift_imm_narrow_q")]
3724 (define_insn "neon_vqmovn<sup><mode>"
3725 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3726 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3729 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3730 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3733 (define_insn "neon_vqmovun<mode>"
3734 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3735 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3738 "vqmovun.<V_s_elem>\t%P0, %q1"
3739 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3742 (define_insn "neon_vmovl<sup><mode>"
3743 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3744 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3747 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3748 [(set_attr "type" "neon_shift_imm_long")]
3751 (define_insn "neon_vmul_lane<mode>"
3752 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3753 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3754 (match_operand:VMD 2 "s_register_operand"
3755 "<scalar_mul_constraint>")
3756 (match_operand:SI 3 "immediate_operand" "i")]
3760 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3763 (if_then_else (match_test "<Is_float_mode>")
3764 (const_string "neon_fp_mul_s_scalar<q>")
3765 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3768 (define_insn "neon_vmul_lane<mode>"
3769 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3770 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3771 (match_operand:<V_HALF> 2 "s_register_operand"
3772 "<scalar_mul_constraint>")
3773 (match_operand:SI 3 "immediate_operand" "i")]
3777 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3780 (if_then_else (match_test "<Is_float_mode>")
3781 (const_string "neon_fp_mul_s_scalar<q>")
3782 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3785 (define_insn "neon_vmul_lane<mode>"
3786 [(set (match_operand:VH 0 "s_register_operand" "=w")
3787 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3788 (match_operand:V4HF 2 "s_register_operand"
3789 "<scalar_mul_constraint>")
3790 (match_operand:SI 3 "immediate_operand" "i")]
3792 "TARGET_NEON_FP16INST"
3793 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
3794 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
3797 (define_insn "neon_vmull<sup>_lane<mode>"
3798 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3799 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3800 (match_operand:VMDI 2 "s_register_operand"
3801 "<scalar_mul_constraint>")
3802 (match_operand:SI 3 "immediate_operand" "i")]
3806 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3808 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3811 (define_insn "neon_vqdmull_lane<mode>"
3812 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3813 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3814 (match_operand:VMDI 2 "s_register_operand"
3815 "<scalar_mul_constraint>")
3816 (match_operand:SI 3 "immediate_operand" "i")]
3817 UNSPEC_VQDMULL_LANE))]
3820 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3822 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3825 (define_insn "neon_vq<r>dmulh_lane<mode>"
3826 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3827 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3828 (match_operand:<V_HALF> 2 "s_register_operand"
3829 "<scalar_mul_constraint>")
3830 (match_operand:SI 3 "immediate_operand" "i")]
3834 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3836 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3839 (define_insn "neon_vq<r>dmulh_lane<mode>"
3840 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3841 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3842 (match_operand:VMDI 2 "s_register_operand"
3843 "<scalar_mul_constraint>")
3844 (match_operand:SI 3 "immediate_operand" "i")]
3848 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3850 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3853 ;; vqrdmlah_lane, vqrdmlsh_lane
3854 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3855 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3856 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3857 (match_operand:VMQI 2 "s_register_operand" "w")
3858 (match_operand:<V_HALF> 3 "s_register_operand"
3859 "<scalar_mul_constraint>")
3860 (match_operand:SI 4 "immediate_operand" "i")]
3865 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3867 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3870 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3871 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3872 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3873 (match_operand:VMDI 2 "s_register_operand" "w")
3874 (match_operand:VMDI 3 "s_register_operand"
3875 "<scalar_mul_constraint>")
3876 (match_operand:SI 4 "immediate_operand" "i")]
3881 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3883 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3886 (define_insn "neon_vmla_lane<mode>"
3887 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3888 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3889 (match_operand:VMD 2 "s_register_operand" "w")
3890 (match_operand:VMD 3 "s_register_operand"
3891 "<scalar_mul_constraint>")
3892 (match_operand:SI 4 "immediate_operand" "i")]
3896 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3899 (if_then_else (match_test "<Is_float_mode>")
3900 (const_string "neon_fp_mla_s_scalar<q>")
3901 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3904 (define_insn "neon_vmla_lane<mode>"
3905 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3906 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3907 (match_operand:VMQ 2 "s_register_operand" "w")
3908 (match_operand:<V_HALF> 3 "s_register_operand"
3909 "<scalar_mul_constraint>")
3910 (match_operand:SI 4 "immediate_operand" "i")]
3914 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3917 (if_then_else (match_test "<Is_float_mode>")
3918 (const_string "neon_fp_mla_s_scalar<q>")
3919 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3922 (define_insn "neon_vmlal<sup>_lane<mode>"
3923 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3924 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3925 (match_operand:VMDI 2 "s_register_operand" "w")
3926 (match_operand:VMDI 3 "s_register_operand"
3927 "<scalar_mul_constraint>")
3928 (match_operand:SI 4 "immediate_operand" "i")]
3932 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3934 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3937 (define_insn "neon_vqdmlal_lane<mode>"
3938 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3939 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3940 (match_operand:VMDI 2 "s_register_operand" "w")
3941 (match_operand:VMDI 3 "s_register_operand"
3942 "<scalar_mul_constraint>")
3943 (match_operand:SI 4 "immediate_operand" "i")]
3944 UNSPEC_VQDMLAL_LANE))]
3947 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3949 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3952 (define_insn "neon_vmls_lane<mode>"
3953 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3954 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3955 (match_operand:VMD 2 "s_register_operand" "w")
3956 (match_operand:VMD 3 "s_register_operand"
3957 "<scalar_mul_constraint>")
3958 (match_operand:SI 4 "immediate_operand" "i")]
3962 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3965 (if_then_else (match_test "<Is_float_mode>")
3966 (const_string "neon_fp_mla_s_scalar<q>")
3967 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3970 (define_insn "neon_vmls_lane<mode>"
3971 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3972 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3973 (match_operand:VMQ 2 "s_register_operand" "w")
3974 (match_operand:<V_HALF> 3 "s_register_operand"
3975 "<scalar_mul_constraint>")
3976 (match_operand:SI 4 "immediate_operand" "i")]
3980 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3983 (if_then_else (match_test "<Is_float_mode>")
3984 (const_string "neon_fp_mla_s_scalar<q>")
3985 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3988 (define_insn "neon_vmlsl<sup>_lane<mode>"
3989 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3990 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3991 (match_operand:VMDI 2 "s_register_operand" "w")
3992 (match_operand:VMDI 3 "s_register_operand"
3993 "<scalar_mul_constraint>")
3994 (match_operand:SI 4 "immediate_operand" "i")]
3998 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4000 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4003 (define_insn "neon_vqdmlsl_lane<mode>"
4004 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4005 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4006 (match_operand:VMDI 2 "s_register_operand" "w")
4007 (match_operand:VMDI 3 "s_register_operand"
4008 "<scalar_mul_constraint>")
4009 (match_operand:SI 4 "immediate_operand" "i")]
4010 UNSPEC_VQDMLSL_LANE))]
4013 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4015 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4018 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4019 ; core register into a temp register, then use a scalar taken from that. This
4020 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4021 ; or extracted from another vector. The latter case it's currently better to
4022 ; use the "_lane" variant, and the former case can probably be implemented
4023 ; using vld1_lane, but that hasn't been done yet.
4025 (define_expand "neon_vmul_n<mode>"
4026 [(match_operand:VMD 0 "s_register_operand" "")
4027 (match_operand:VMD 1 "s_register_operand" "")
4028 (match_operand:<V_elem> 2 "s_register_operand" "")]
4031 rtx tmp = gen_reg_rtx (<MODE>mode);
4032 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4033 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4038 (define_expand "neon_vmul_n<mode>"
4039 [(match_operand:VMQ 0 "s_register_operand" "")
4040 (match_operand:VMQ 1 "s_register_operand" "")
4041 (match_operand:<V_elem> 2 "s_register_operand" "")]
4044 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4045 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4046 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4051 (define_expand "neon_vmul_n<mode>"
4052 [(match_operand:VH 0 "s_register_operand")
4053 (match_operand:VH 1 "s_register_operand")
4054 (match_operand:<V_elem> 2 "s_register_operand")]
4055 "TARGET_NEON_FP16INST"
4057 rtx tmp = gen_reg_rtx (V4HFmode);
4058 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4059 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4064 (define_expand "neon_vmulls_n<mode>"
4065 [(match_operand:<V_widen> 0 "s_register_operand" "")
4066 (match_operand:VMDI 1 "s_register_operand" "")
4067 (match_operand:<V_elem> 2 "s_register_operand" "")]
4070 rtx tmp = gen_reg_rtx (<MODE>mode);
4071 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4072 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4077 (define_expand "neon_vmullu_n<mode>"
4078 [(match_operand:<V_widen> 0 "s_register_operand" "")
4079 (match_operand:VMDI 1 "s_register_operand" "")
4080 (match_operand:<V_elem> 2 "s_register_operand" "")]
4083 rtx tmp = gen_reg_rtx (<MODE>mode);
4084 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4085 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4090 (define_expand "neon_vqdmull_n<mode>"
4091 [(match_operand:<V_widen> 0 "s_register_operand" "")
4092 (match_operand:VMDI 1 "s_register_operand" "")
4093 (match_operand:<V_elem> 2 "s_register_operand" "")]
4096 rtx tmp = gen_reg_rtx (<MODE>mode);
4097 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4098 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4103 (define_expand "neon_vqdmulh_n<mode>"
4104 [(match_operand:VMDI 0 "s_register_operand" "")
4105 (match_operand:VMDI 1 "s_register_operand" "")
4106 (match_operand:<V_elem> 2 "s_register_operand" "")]
4109 rtx tmp = gen_reg_rtx (<MODE>mode);
4110 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4111 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4116 (define_expand "neon_vqrdmulh_n<mode>"
4117 [(match_operand:VMDI 0 "s_register_operand" "")
4118 (match_operand:VMDI 1 "s_register_operand" "")
4119 (match_operand:<V_elem> 2 "s_register_operand" "")]
4122 rtx tmp = gen_reg_rtx (<MODE>mode);
4123 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4124 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4129 (define_expand "neon_vqdmulh_n<mode>"
4130 [(match_operand:VMQI 0 "s_register_operand" "")
4131 (match_operand:VMQI 1 "s_register_operand" "")
4132 (match_operand:<V_elem> 2 "s_register_operand" "")]
4135 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4136 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4137 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4142 (define_expand "neon_vqrdmulh_n<mode>"
4143 [(match_operand:VMQI 0 "s_register_operand" "")
4144 (match_operand:VMQI 1 "s_register_operand" "")
4145 (match_operand:<V_elem> 2 "s_register_operand" "")]
4148 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4149 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4150 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4155 (define_expand "neon_vmla_n<mode>"
4156 [(match_operand:VMD 0 "s_register_operand" "")
4157 (match_operand:VMD 1 "s_register_operand" "")
4158 (match_operand:VMD 2 "s_register_operand" "")
4159 (match_operand:<V_elem> 3 "s_register_operand" "")]
4162 rtx tmp = gen_reg_rtx (<MODE>mode);
4163 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4164 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4169 (define_expand "neon_vmla_n<mode>"
4170 [(match_operand:VMQ 0 "s_register_operand" "")
4171 (match_operand:VMQ 1 "s_register_operand" "")
4172 (match_operand:VMQ 2 "s_register_operand" "")
4173 (match_operand:<V_elem> 3 "s_register_operand" "")]
4176 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4177 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4178 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4183 (define_expand "neon_vmlals_n<mode>"
4184 [(match_operand:<V_widen> 0 "s_register_operand" "")
4185 (match_operand:<V_widen> 1 "s_register_operand" "")
4186 (match_operand:VMDI 2 "s_register_operand" "")
4187 (match_operand:<V_elem> 3 "s_register_operand" "")]
4190 rtx tmp = gen_reg_rtx (<MODE>mode);
4191 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4192 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4197 (define_expand "neon_vmlalu_n<mode>"
4198 [(match_operand:<V_widen> 0 "s_register_operand" "")
4199 (match_operand:<V_widen> 1 "s_register_operand" "")
4200 (match_operand:VMDI 2 "s_register_operand" "")
4201 (match_operand:<V_elem> 3 "s_register_operand" "")]
4204 rtx tmp = gen_reg_rtx (<MODE>mode);
4205 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4206 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4211 (define_expand "neon_vqdmlal_n<mode>"
4212 [(match_operand:<V_widen> 0 "s_register_operand" "")
4213 (match_operand:<V_widen> 1 "s_register_operand" "")
4214 (match_operand:VMDI 2 "s_register_operand" "")
4215 (match_operand:<V_elem> 3 "s_register_operand" "")]
4218 rtx tmp = gen_reg_rtx (<MODE>mode);
4219 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4220 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4225 (define_expand "neon_vmls_n<mode>"
4226 [(match_operand:VMD 0 "s_register_operand" "")
4227 (match_operand:VMD 1 "s_register_operand" "")
4228 (match_operand:VMD 2 "s_register_operand" "")
4229 (match_operand:<V_elem> 3 "s_register_operand" "")]
4232 rtx tmp = gen_reg_rtx (<MODE>mode);
4233 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4234 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4239 (define_expand "neon_vmls_n<mode>"
4240 [(match_operand:VMQ 0 "s_register_operand" "")
4241 (match_operand:VMQ 1 "s_register_operand" "")
4242 (match_operand:VMQ 2 "s_register_operand" "")
4243 (match_operand:<V_elem> 3 "s_register_operand" "")]
4246 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4247 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4248 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4253 (define_expand "neon_vmlsls_n<mode>"
4254 [(match_operand:<V_widen> 0 "s_register_operand" "")
4255 (match_operand:<V_widen> 1 "s_register_operand" "")
4256 (match_operand:VMDI 2 "s_register_operand" "")
4257 (match_operand:<V_elem> 3 "s_register_operand" "")]
4260 rtx tmp = gen_reg_rtx (<MODE>mode);
4261 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4262 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4267 (define_expand "neon_vmlslu_n<mode>"
4268 [(match_operand:<V_widen> 0 "s_register_operand" "")
4269 (match_operand:<V_widen> 1 "s_register_operand" "")
4270 (match_operand:VMDI 2 "s_register_operand" "")
4271 (match_operand:<V_elem> 3 "s_register_operand" "")]
4274 rtx tmp = gen_reg_rtx (<MODE>mode);
4275 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4276 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4281 (define_expand "neon_vqdmlsl_n<mode>"
4282 [(match_operand:<V_widen> 0 "s_register_operand" "")
4283 (match_operand:<V_widen> 1 "s_register_operand" "")
4284 (match_operand:VMDI 2 "s_register_operand" "")
4285 (match_operand:<V_elem> 3 "s_register_operand" "")]
4288 rtx tmp = gen_reg_rtx (<MODE>mode);
4289 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4290 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4295 (define_insn "neon_vext<mode>"
4296 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4297 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4298 (match_operand:VDQX 2 "s_register_operand" "w")
4299 (match_operand:SI 3 "immediate_operand" "i")]
4303 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4304 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4306 [(set_attr "type" "neon_ext<q>")]
4309 (define_insn "neon_vrev64<mode>"
4310 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4311 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4314 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4315 [(set_attr "type" "neon_rev<q>")]
4318 (define_insn "neon_vrev32<mode>"
4319 [(set (match_operand:VX 0 "s_register_operand" "=w")
4320 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4323 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4324 [(set_attr "type" "neon_rev<q>")]
4327 (define_insn "neon_vrev16<mode>"
4328 [(set (match_operand:VE 0 "s_register_operand" "=w")
4329 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4332 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4333 [(set_attr "type" "neon_rev<q>")]
4336 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4337 ; allocation. For an intrinsic of form:
4338 ; rD = vbsl_* (rS, rN, rM)
4339 ; We can use any of:
4340 ; vbsl rS, rN, rM (if D = S)
4341 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4342 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4344 (define_insn "neon_vbsl<mode>_internal"
4345 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4346 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4347 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4348 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4352 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4353 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4354 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4355 [(set_attr "type" "neon_bsl<q>")]
4358 (define_expand "neon_vbsl<mode>"
4359 [(set (match_operand:VDQX 0 "s_register_operand" "")
4360 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4361 (match_operand:VDQX 2 "s_register_operand" "")
4362 (match_operand:VDQX 3 "s_register_operand" "")]
4366 /* We can't alias operands together if they have different modes. */
4367 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4371 (define_insn "neon_v<shift_op><sup><mode>"
4372 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4373 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4374 (match_operand:VDQIX 2 "s_register_operand" "w")]
4377 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4378 [(set_attr "type" "neon_shift_imm<q>")]
4382 (define_insn "neon_v<shift_op><sup><mode>"
4383 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4384 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4385 (match_operand:VDQIX 2 "s_register_operand" "w")]
4388 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4389 [(set_attr "type" "neon_sat_shift_imm<q>")]
4393 (define_insn "neon_v<shift_op><sup>_n<mode>"
4394 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4395 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4396 (match_operand:SI 2 "immediate_operand" "i")]
4400 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4401 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4403 [(set_attr "type" "neon_shift_imm<q>")]
4406 ;; vshrn_n, vrshrn_n
4407 (define_insn "neon_v<shift_op>_n<mode>"
4408 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4409 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4410 (match_operand:SI 2 "immediate_operand" "i")]
4414 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4415 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4417 [(set_attr "type" "neon_shift_imm_narrow_q")]
4420 ;; vqshrn_n, vqrshrn_n
4421 (define_insn "neon_v<shift_op><sup>_n<mode>"
4422 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4423 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4424 (match_operand:SI 2 "immediate_operand" "i")]
4428 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4429 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4431 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4434 ;; vqshrun_n, vqrshrun_n
4435 (define_insn "neon_v<shift_op>_n<mode>"
4436 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4437 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4438 (match_operand:SI 2 "immediate_operand" "i")]
4442 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4443 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4445 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4448 (define_insn "neon_vshl_n<mode>"
4449 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4450 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4451 (match_operand:SI 2 "immediate_operand" "i")]
4455 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4456 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4458 [(set_attr "type" "neon_shift_imm<q>")]
4461 (define_insn "neon_vqshl_<sup>_n<mode>"
4462 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4463 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4464 (match_operand:SI 2 "immediate_operand" "i")]
4468 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4469 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4471 [(set_attr "type" "neon_sat_shift_imm<q>")]
4474 (define_insn "neon_vqshlu_n<mode>"
4475 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4476 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4477 (match_operand:SI 2 "immediate_operand" "i")]
4481 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4482 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4484 [(set_attr "type" "neon_sat_shift_imm<q>")]
4487 (define_insn "neon_vshll<sup>_n<mode>"
4488 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4489 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4490 (match_operand:SI 2 "immediate_operand" "i")]
4494 /* The boundaries are: 0 < imm <= size. */
4495 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4496 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4498 [(set_attr "type" "neon_shift_imm_long")]
4502 (define_insn "neon_v<shift_op><sup>_n<mode>"
4503 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4504 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4505 (match_operand:VDQIX 2 "s_register_operand" "w")
4506 (match_operand:SI 3 "immediate_operand" "i")]
4510 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4511 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4513 [(set_attr "type" "neon_shift_acc<q>")]
4516 (define_insn "neon_vsri_n<mode>"
4517 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4518 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4519 (match_operand:VDQIX 2 "s_register_operand" "w")
4520 (match_operand:SI 3 "immediate_operand" "i")]
4524 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4525 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4527 [(set_attr "type" "neon_shift_reg<q>")]
4530 (define_insn "neon_vsli_n<mode>"
4531 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4532 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4533 (match_operand:VDQIX 2 "s_register_operand" "w")
4534 (match_operand:SI 3 "immediate_operand" "i")]
4538 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4539 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4541 [(set_attr "type" "neon_shift_reg<q>")]
4544 (define_insn "neon_vtbl1v8qi"
4545 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4546 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4547 (match_operand:V8QI 2 "s_register_operand" "w")]
4550 "vtbl.8\t%P0, {%P1}, %P2"
4551 [(set_attr "type" "neon_tbl1")]
4554 (define_insn "neon_vtbl2v8qi"
4555 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4556 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4557 (match_operand:V8QI 2 "s_register_operand" "w")]
4562 int tabbase = REGNO (operands[1]);
4564 ops[0] = operands[0];
4565 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4566 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4567 ops[3] = operands[2];
4568 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4572 [(set_attr "type" "neon_tbl2")]
4575 (define_insn "neon_vtbl3v8qi"
4576 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4577 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4578 (match_operand:V8QI 2 "s_register_operand" "w")]
4583 int tabbase = REGNO (operands[1]);
4585 ops[0] = operands[0];
4586 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4587 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4588 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4589 ops[4] = operands[2];
4590 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4594 [(set_attr "type" "neon_tbl3")]
4597 (define_insn "neon_vtbl4v8qi"
4598 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4599 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4600 (match_operand:V8QI 2 "s_register_operand" "w")]
4605 int tabbase = REGNO (operands[1]);
4607 ops[0] = operands[0];
4608 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4609 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4610 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4611 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4612 ops[5] = operands[2];
4613 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4617 [(set_attr "type" "neon_tbl4")]
4620 ;; These three are used by the vec_perm infrastructure for V16QImode.
4621 (define_insn_and_split "neon_vtbl1v16qi"
4622 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4623 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4624 (match_operand:V16QI 2 "s_register_operand" "w")]
4628 "&& reload_completed"
4631 rtx op0, op1, op2, part0, part2;
4635 op1 = gen_lowpart (TImode, operands[1]);
4638 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4639 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4640 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4641 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4643 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4644 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4645 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4646 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4649 [(set_attr "type" "multiple")]
4652 (define_insn_and_split "neon_vtbl2v16qi"
4653 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4654 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4655 (match_operand:V16QI 2 "s_register_operand" "w")]
4659 "&& reload_completed"
4662 rtx op0, op1, op2, part0, part2;
4669 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4670 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4671 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4672 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4674 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4675 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4676 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4677 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4680 [(set_attr "type" "multiple")]
4683 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4684 ;; handle quad-word input modes, producing octa-word output modes. But
4685 ;; that requires us to add support for octa-word vector modes in moves.
4686 ;; That seems overkill for this one use in vec_perm.
4687 (define_insn_and_split "neon_vcombinev16qi"
4688 [(set (match_operand:OI 0 "s_register_operand" "=w")
4689 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4690 (match_operand:V16QI 2 "s_register_operand" "w")]
4694 "&& reload_completed"
4697 neon_split_vcombine (operands);
4700 [(set_attr "type" "multiple")]
4703 (define_insn "neon_vtbx1v8qi"
4704 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4705 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4706 (match_operand:V8QI 2 "s_register_operand" "w")
4707 (match_operand:V8QI 3 "s_register_operand" "w")]
4710 "vtbx.8\t%P0, {%P2}, %P3"
4711 [(set_attr "type" "neon_tbl1")]
4714 (define_insn "neon_vtbx2v8qi"
4715 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4716 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4717 (match_operand:TI 2 "s_register_operand" "w")
4718 (match_operand:V8QI 3 "s_register_operand" "w")]
4723 int tabbase = REGNO (operands[2]);
4725 ops[0] = operands[0];
4726 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4727 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4728 ops[3] = operands[3];
4729 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4733 [(set_attr "type" "neon_tbl2")]
4736 (define_insn "neon_vtbx3v8qi"
4737 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4738 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4739 (match_operand:EI 2 "s_register_operand" "w")
4740 (match_operand:V8QI 3 "s_register_operand" "w")]
4745 int tabbase = REGNO (operands[2]);
4747 ops[0] = operands[0];
4748 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4749 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4750 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4751 ops[4] = operands[3];
4752 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4756 [(set_attr "type" "neon_tbl3")]
4759 (define_insn "neon_vtbx4v8qi"
4760 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4761 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4762 (match_operand:OI 2 "s_register_operand" "w")
4763 (match_operand:V8QI 3 "s_register_operand" "w")]
4768 int tabbase = REGNO (operands[2]);
4770 ops[0] = operands[0];
4771 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4772 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4773 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4774 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4775 ops[5] = operands[3];
4776 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4780 [(set_attr "type" "neon_tbl4")]
4783 (define_expand "neon_vtrn<mode>_internal"
4785 [(set (match_operand:VDQWH 0 "s_register_operand")
4786 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4787 (match_operand:VDQWH 2 "s_register_operand")]
4789 (set (match_operand:VDQWH 3 "s_register_operand")
4790 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4795 ;; Note: Different operand numbering to handle tied registers correctly.
4796 (define_insn "*neon_vtrn<mode>_insn"
4797 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4798 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4799 (match_operand:VDQWH 3 "s_register_operand" "2")]
4801 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4802 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4805 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4806 [(set_attr "type" "neon_permute<q>")]
4809 (define_expand "neon_vzip<mode>_internal"
4811 [(set (match_operand:VDQWH 0 "s_register_operand")
4812 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4813 (match_operand:VDQWH 2 "s_register_operand")]
4815 (set (match_operand:VDQWH 3 "s_register_operand")
4816 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4821 ;; Note: Different operand numbering to handle tied registers correctly.
4822 (define_insn "*neon_vzip<mode>_insn"
4823 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4824 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4825 (match_operand:VDQWH 3 "s_register_operand" "2")]
4827 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4828 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4831 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4832 [(set_attr "type" "neon_zip<q>")]
4835 (define_expand "neon_vuzp<mode>_internal"
4837 [(set (match_operand:VDQWH 0 "s_register_operand")
4838 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4839 (match_operand:VDQWH 2 "s_register_operand")]
4841 (set (match_operand:VDQWH 3 "s_register_operand" "")
4842 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4847 ;; Note: Different operand numbering to handle tied registers correctly.
4848 (define_insn "*neon_vuzp<mode>_insn"
4849 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4850 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4851 (match_operand:VDQWH 3 "s_register_operand" "2")]
4853 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4854 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4857 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4858 [(set_attr "type" "neon_zip<q>")]
4861 (define_expand "vec_load_lanes<mode><mode>"
4862 [(set (match_operand:VDQX 0 "s_register_operand")
4863 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4867 (define_insn "neon_vld1<mode>"
4868 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4869 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4872 "vld1.<V_sz_elem>\t%h0, %A1"
4873 [(set_attr "type" "neon_load1_1reg<q>")]
4876 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4877 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4879 (define_insn "neon_vld1_lane<mode>"
4880 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4881 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4882 (match_operand:VDX 2 "s_register_operand" "0")
4883 (match_operand:SI 3 "immediate_operand" "i")]
4887 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4888 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4889 operands[3] = GEN_INT (lane);
4891 return "vld1.<V_sz_elem>\t%P0, %A1";
4893 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4895 [(set_attr "type" "neon_load1_one_lane<q>")]
4898 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4899 ;; here on big endian targets.
4900 (define_insn "neon_vld1_lane<mode>"
4901 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4902 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4903 (match_operand:VQX 2 "s_register_operand" "0")
4904 (match_operand:SI 3 "immediate_operand" "i")]
4908 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4909 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4910 operands[3] = GEN_INT (lane);
4911 int regno = REGNO (operands[0]);
4912 if (lane >= max / 2)
4916 operands[3] = GEN_INT (lane);
4918 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4920 return "vld1.<V_sz_elem>\t%P0, %A1";
4922 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4924 [(set_attr "type" "neon_load1_one_lane<q>")]
4927 (define_insn "neon_vld1_dup<mode>"
4928 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
4929 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4931 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4932 [(set_attr "type" "neon_load1_all_lanes<q>")]
4935 ;; Special case for DImode. Treat it exactly like a simple load.
4936 (define_expand "neon_vld1_dupdi"
4937 [(set (match_operand:DI 0 "s_register_operand" "")
4938 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4944 (define_insn "neon_vld1_dup<mode>"
4945 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
4946 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4949 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4951 [(set_attr "type" "neon_load1_all_lanes<q>")]
4954 (define_insn_and_split "neon_vld1_dupv2di"
4955 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4956 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4959 "&& reload_completed"
4962 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4963 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4964 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4967 [(set_attr "length" "8")
4968 (set_attr "type" "neon_load1_all_lanes_q")]
4971 (define_expand "vec_store_lanes<mode><mode>"
4972 [(set (match_operand:VDQX 0 "neon_struct_operand")
4973 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4977 (define_insn "neon_vst1<mode>"
4978 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4979 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4982 "vst1.<V_sz_elem>\t%h1, %A0"
4983 [(set_attr "type" "neon_store1_1reg<q>")])
4985 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4986 ;; here on big endian targets.
4987 (define_insn "neon_vst1_lane<mode>"
4988 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4990 [(match_operand:VDX 1 "s_register_operand" "w")
4991 (match_operand:SI 2 "immediate_operand" "i")]
4995 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4996 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4997 operands[2] = GEN_INT (lane);
4999 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5001 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5003 [(set_attr "type" "neon_store1_one_lane<q>")]
5006 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5007 ;; here on big endian targets.
5008 (define_insn "neon_vst1_lane<mode>"
5009 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5011 [(match_operand:VQX 1 "s_register_operand" "w")
5012 (match_operand:SI 2 "immediate_operand" "i")]
5016 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5017 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5018 int regno = REGNO (operands[1]);
5019 if (lane >= max / 2)
5024 operands[2] = GEN_INT (lane);
5025 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5027 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5029 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5031 [(set_attr "type" "neon_store1_one_lane<q>")]
5034 (define_expand "vec_load_lanesti<mode>"
5035 [(set (match_operand:TI 0 "s_register_operand")
5036 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5037 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5041 (define_insn "neon_vld2<mode>"
5042 [(set (match_operand:TI 0 "s_register_operand" "=w")
5043 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5044 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5048 if (<V_sz_elem> == 64)
5049 return "vld1.64\t%h0, %A1";
5051 return "vld2.<V_sz_elem>\t%h0, %A1";
5054 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5055 (const_string "neon_load1_2reg<q>")
5056 (const_string "neon_load2_2reg<q>")))]
5059 (define_expand "vec_load_lanesoi<mode>"
5060 [(set (match_operand:OI 0 "s_register_operand")
5061 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5062 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5066 (define_insn "neon_vld2<mode>"
5067 [(set (match_operand:OI 0 "s_register_operand" "=w")
5068 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5069 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5072 "vld2.<V_sz_elem>\t%h0, %A1"
5073 [(set_attr "type" "neon_load2_2reg_q")])
5075 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5076 ;; here on big endian targets.
5077 (define_insn "neon_vld2_lane<mode>"
5078 [(set (match_operand:TI 0 "s_register_operand" "=w")
5079 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5080 (match_operand:TI 2 "s_register_operand" "0")
5081 (match_operand:SI 3 "immediate_operand" "i")
5082 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5086 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5087 int regno = REGNO (operands[0]);
5089 ops[0] = gen_rtx_REG (DImode, regno);
5090 ops[1] = gen_rtx_REG (DImode, regno + 2);
5091 ops[2] = operands[1];
5092 ops[3] = GEN_INT (lane);
5093 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5096 [(set_attr "type" "neon_load2_one_lane<q>")]
5099 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5100 ;; here on big endian targets.
5101 (define_insn "neon_vld2_lane<mode>"
5102 [(set (match_operand:OI 0 "s_register_operand" "=w")
5103 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5104 (match_operand:OI 2 "s_register_operand" "0")
5105 (match_operand:SI 3 "immediate_operand" "i")
5106 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5110 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5111 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5112 int regno = REGNO (operands[0]);
5114 if (lane >= max / 2)
5119 ops[0] = gen_rtx_REG (DImode, regno);
5120 ops[1] = gen_rtx_REG (DImode, regno + 4);
5121 ops[2] = operands[1];
5122 ops[3] = GEN_INT (lane);
5123 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5126 [(set_attr "type" "neon_load2_one_lane<q>")]
5129 (define_insn "neon_vld2_dup<mode>"
5130 [(set (match_operand:TI 0 "s_register_operand" "=w")
5131 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5132 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5136 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5137 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5139 return "vld1.<V_sz_elem>\t%h0, %A1";
5142 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5143 (const_string "neon_load2_all_lanes<q>")
5144 (const_string "neon_load1_1reg<q>")))]
5147 (define_expand "vec_store_lanesti<mode>"
5148 [(set (match_operand:TI 0 "neon_struct_operand")
5149 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5150 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5154 (define_insn "neon_vst2<mode>"
5155 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5156 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5157 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5161 if (<V_sz_elem> == 64)
5162 return "vst1.64\t%h1, %A0";
5164 return "vst2.<V_sz_elem>\t%h1, %A0";
5167 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5168 (const_string "neon_store1_2reg<q>")
5169 (const_string "neon_store2_one_lane<q>")))]
5172 (define_expand "vec_store_lanesoi<mode>"
5173 [(set (match_operand:OI 0 "neon_struct_operand")
5174 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5175 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5179 (define_insn "neon_vst2<mode>"
5180 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5181 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5182 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5185 "vst2.<V_sz_elem>\t%h1, %A0"
5186 [(set_attr "type" "neon_store2_4reg<q>")]
5189 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5190 ;; here on big endian targets.
5191 (define_insn "neon_vst2_lane<mode>"
5192 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5193 (unspec:<V_two_elem>
5194 [(match_operand:TI 1 "s_register_operand" "w")
5195 (match_operand:SI 2 "immediate_operand" "i")
5196 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5200 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5201 int regno = REGNO (operands[1]);
5203 ops[0] = operands[0];
5204 ops[1] = gen_rtx_REG (DImode, regno);
5205 ops[2] = gen_rtx_REG (DImode, regno + 2);
5206 ops[3] = GEN_INT (lane);
5207 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5210 [(set_attr "type" "neon_store2_one_lane<q>")]
5213 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5214 ;; here on big endian targets.
5215 (define_insn "neon_vst2_lane<mode>"
5216 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5217 (unspec:<V_two_elem>
5218 [(match_operand:OI 1 "s_register_operand" "w")
5219 (match_operand:SI 2 "immediate_operand" "i")
5220 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5224 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5225 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5226 int regno = REGNO (operands[1]);
5228 if (lane >= max / 2)
5233 ops[0] = operands[0];
5234 ops[1] = gen_rtx_REG (DImode, regno);
5235 ops[2] = gen_rtx_REG (DImode, regno + 4);
5236 ops[3] = GEN_INT (lane);
5237 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5240 [(set_attr "type" "neon_store2_one_lane<q>")]
5243 (define_expand "vec_load_lanesei<mode>"
5244 [(set (match_operand:EI 0 "s_register_operand")
5245 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5246 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5250 (define_insn "neon_vld3<mode>"
5251 [(set (match_operand:EI 0 "s_register_operand" "=w")
5252 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5253 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5257 if (<V_sz_elem> == 64)
5258 return "vld1.64\t%h0, %A1";
5260 return "vld3.<V_sz_elem>\t%h0, %A1";
5263 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5264 (const_string "neon_load1_3reg<q>")
5265 (const_string "neon_load3_3reg<q>")))]
5268 (define_expand "vec_load_lanesci<mode>"
5269 [(match_operand:CI 0 "s_register_operand")
5270 (match_operand:CI 1 "neon_struct_operand")
5271 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5274 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5278 (define_expand "neon_vld3<mode>"
5279 [(match_operand:CI 0 "s_register_operand")
5280 (match_operand:CI 1 "neon_struct_operand")
5281 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5286 mem = adjust_address (operands[1], EImode, 0);
5287 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5288 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5289 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5293 (define_insn "neon_vld3qa<mode>"
5294 [(set (match_operand:CI 0 "s_register_operand" "=w")
5295 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5296 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5300 int regno = REGNO (operands[0]);
5302 ops[0] = gen_rtx_REG (DImode, regno);
5303 ops[1] = gen_rtx_REG (DImode, regno + 4);
5304 ops[2] = gen_rtx_REG (DImode, regno + 8);
5305 ops[3] = operands[1];
5306 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5309 [(set_attr "type" "neon_load3_3reg<q>")]
5312 (define_insn "neon_vld3qb<mode>"
5313 [(set (match_operand:CI 0 "s_register_operand" "=w")
5314 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5315 (match_operand:CI 2 "s_register_operand" "0")
5316 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5320 int regno = REGNO (operands[0]);
5322 ops[0] = gen_rtx_REG (DImode, regno + 2);
5323 ops[1] = gen_rtx_REG (DImode, regno + 6);
5324 ops[2] = gen_rtx_REG (DImode, regno + 10);
5325 ops[3] = operands[1];
5326 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5329 [(set_attr "type" "neon_load3_3reg<q>")]
5332 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5333 ;; here on big endian targets.
5334 (define_insn "neon_vld3_lane<mode>"
5335 [(set (match_operand:EI 0 "s_register_operand" "=w")
5336 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5337 (match_operand:EI 2 "s_register_operand" "0")
5338 (match_operand:SI 3 "immediate_operand" "i")
5339 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5343 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5344 int regno = REGNO (operands[0]);
5346 ops[0] = gen_rtx_REG (DImode, regno);
5347 ops[1] = gen_rtx_REG (DImode, regno + 2);
5348 ops[2] = gen_rtx_REG (DImode, regno + 4);
5349 ops[3] = operands[1];
5350 ops[4] = GEN_INT (lane);
5351 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5355 [(set_attr "type" "neon_load3_one_lane<q>")]
5358 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5359 ;; here on big endian targets.
5360 (define_insn "neon_vld3_lane<mode>"
5361 [(set (match_operand:CI 0 "s_register_operand" "=w")
5362 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5363 (match_operand:CI 2 "s_register_operand" "0")
5364 (match_operand:SI 3 "immediate_operand" "i")
5365 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5369 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5370 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5371 int regno = REGNO (operands[0]);
5373 if (lane >= max / 2)
5378 ops[0] = gen_rtx_REG (DImode, regno);
5379 ops[1] = gen_rtx_REG (DImode, regno + 4);
5380 ops[2] = gen_rtx_REG (DImode, regno + 8);
5381 ops[3] = operands[1];
5382 ops[4] = GEN_INT (lane);
5383 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5387 [(set_attr "type" "neon_load3_one_lane<q>")]
5390 (define_insn "neon_vld3_dup<mode>"
5391 [(set (match_operand:EI 0 "s_register_operand" "=w")
5392 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5393 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5397 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5399 int regno = REGNO (operands[0]);
5401 ops[0] = gen_rtx_REG (DImode, regno);
5402 ops[1] = gen_rtx_REG (DImode, regno + 2);
5403 ops[2] = gen_rtx_REG (DImode, regno + 4);
5404 ops[3] = operands[1];
5405 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5409 return "vld1.<V_sz_elem>\t%h0, %A1";
5412 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5413 (const_string "neon_load3_all_lanes<q>")
5414 (const_string "neon_load1_1reg<q>")))])
5416 (define_expand "vec_store_lanesei<mode>"
5417 [(set (match_operand:EI 0 "neon_struct_operand")
5418 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5419 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5423 (define_insn "neon_vst3<mode>"
5424 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5425 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5426 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5430 if (<V_sz_elem> == 64)
5431 return "vst1.64\t%h1, %A0";
5433 return "vst3.<V_sz_elem>\t%h1, %A0";
5436 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5437 (const_string "neon_store1_3reg<q>")
5438 (const_string "neon_store3_one_lane<q>")))])
5440 (define_expand "vec_store_lanesci<mode>"
5441 [(match_operand:CI 0 "neon_struct_operand")
5442 (match_operand:CI 1 "s_register_operand")
5443 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5446 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5450 (define_expand "neon_vst3<mode>"
5451 [(match_operand:CI 0 "neon_struct_operand")
5452 (match_operand:CI 1 "s_register_operand")
5453 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5458 mem = adjust_address (operands[0], EImode, 0);
5459 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5460 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5461 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5465 (define_insn "neon_vst3qa<mode>"
5466 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5467 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5468 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5472 int regno = REGNO (operands[1]);
5474 ops[0] = operands[0];
5475 ops[1] = gen_rtx_REG (DImode, regno);
5476 ops[2] = gen_rtx_REG (DImode, regno + 4);
5477 ops[3] = gen_rtx_REG (DImode, regno + 8);
5478 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5481 [(set_attr "type" "neon_store3_3reg<q>")]
5484 (define_insn "neon_vst3qb<mode>"
5485 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5486 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5487 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5491 int regno = REGNO (operands[1]);
5493 ops[0] = operands[0];
5494 ops[1] = gen_rtx_REG (DImode, regno + 2);
5495 ops[2] = gen_rtx_REG (DImode, regno + 6);
5496 ops[3] = gen_rtx_REG (DImode, regno + 10);
5497 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5500 [(set_attr "type" "neon_store3_3reg<q>")]
5503 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5504 ;; here on big endian targets.
5505 (define_insn "neon_vst3_lane<mode>"
5506 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5507 (unspec:<V_three_elem>
5508 [(match_operand:EI 1 "s_register_operand" "w")
5509 (match_operand:SI 2 "immediate_operand" "i")
5510 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5514 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5515 int regno = REGNO (operands[1]);
5517 ops[0] = operands[0];
5518 ops[1] = gen_rtx_REG (DImode, regno);
5519 ops[2] = gen_rtx_REG (DImode, regno + 2);
5520 ops[3] = gen_rtx_REG (DImode, regno + 4);
5521 ops[4] = GEN_INT (lane);
5522 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5526 [(set_attr "type" "neon_store3_one_lane<q>")]
5529 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5530 ;; here on big endian targets.
5531 (define_insn "neon_vst3_lane<mode>"
5532 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5533 (unspec:<V_three_elem>
5534 [(match_operand:CI 1 "s_register_operand" "w")
5535 (match_operand:SI 2 "immediate_operand" "i")
5536 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5540 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5541 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5542 int regno = REGNO (operands[1]);
5544 if (lane >= max / 2)
5549 ops[0] = operands[0];
5550 ops[1] = gen_rtx_REG (DImode, regno);
5551 ops[2] = gen_rtx_REG (DImode, regno + 4);
5552 ops[3] = gen_rtx_REG (DImode, regno + 8);
5553 ops[4] = GEN_INT (lane);
5554 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5558 [(set_attr "type" "neon_store3_one_lane<q>")]
5561 (define_expand "vec_load_lanesoi<mode>"
5562 [(set (match_operand:OI 0 "s_register_operand")
5563 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5564 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5568 (define_insn "neon_vld4<mode>"
5569 [(set (match_operand:OI 0 "s_register_operand" "=w")
5570 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5571 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5575 if (<V_sz_elem> == 64)
5576 return "vld1.64\t%h0, %A1";
5578 return "vld4.<V_sz_elem>\t%h0, %A1";
5581 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5582 (const_string "neon_load1_4reg<q>")
5583 (const_string "neon_load4_4reg<q>")))]
5586 (define_expand "vec_load_lanesxi<mode>"
5587 [(match_operand:XI 0 "s_register_operand")
5588 (match_operand:XI 1 "neon_struct_operand")
5589 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5592 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5596 (define_expand "neon_vld4<mode>"
5597 [(match_operand:XI 0 "s_register_operand")
5598 (match_operand:XI 1 "neon_struct_operand")
5599 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5604 mem = adjust_address (operands[1], OImode, 0);
5605 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5606 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5607 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5611 (define_insn "neon_vld4qa<mode>"
5612 [(set (match_operand:XI 0 "s_register_operand" "=w")
5613 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5614 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5618 int regno = REGNO (operands[0]);
5620 ops[0] = gen_rtx_REG (DImode, regno);
5621 ops[1] = gen_rtx_REG (DImode, regno + 4);
5622 ops[2] = gen_rtx_REG (DImode, regno + 8);
5623 ops[3] = gen_rtx_REG (DImode, regno + 12);
5624 ops[4] = operands[1];
5625 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5628 [(set_attr "type" "neon_load4_4reg<q>")]
5631 (define_insn "neon_vld4qb<mode>"
5632 [(set (match_operand:XI 0 "s_register_operand" "=w")
5633 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5634 (match_operand:XI 2 "s_register_operand" "0")
5635 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5639 int regno = REGNO (operands[0]);
5641 ops[0] = gen_rtx_REG (DImode, regno + 2);
5642 ops[1] = gen_rtx_REG (DImode, regno + 6);
5643 ops[2] = gen_rtx_REG (DImode, regno + 10);
5644 ops[3] = gen_rtx_REG (DImode, regno + 14);
5645 ops[4] = operands[1];
5646 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5649 [(set_attr "type" "neon_load4_4reg<q>")]
5652 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5653 ;; here on big endian targets.
5654 (define_insn "neon_vld4_lane<mode>"
5655 [(set (match_operand:OI 0 "s_register_operand" "=w")
5656 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5657 (match_operand:OI 2 "s_register_operand" "0")
5658 (match_operand:SI 3 "immediate_operand" "i")
5659 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5663 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5664 int regno = REGNO (operands[0]);
5666 ops[0] = gen_rtx_REG (DImode, regno);
5667 ops[1] = gen_rtx_REG (DImode, regno + 2);
5668 ops[2] = gen_rtx_REG (DImode, regno + 4);
5669 ops[3] = gen_rtx_REG (DImode, regno + 6);
5670 ops[4] = operands[1];
5671 ops[5] = GEN_INT (lane);
5672 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5676 [(set_attr "type" "neon_load4_one_lane<q>")]
5679 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5680 ;; here on big endian targets.
5681 (define_insn "neon_vld4_lane<mode>"
5682 [(set (match_operand:XI 0 "s_register_operand" "=w")
5683 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5684 (match_operand:XI 2 "s_register_operand" "0")
5685 (match_operand:SI 3 "immediate_operand" "i")
5686 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5690 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5691 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5692 int regno = REGNO (operands[0]);
5694 if (lane >= max / 2)
5699 ops[0] = gen_rtx_REG (DImode, regno);
5700 ops[1] = gen_rtx_REG (DImode, regno + 4);
5701 ops[2] = gen_rtx_REG (DImode, regno + 8);
5702 ops[3] = gen_rtx_REG (DImode, regno + 12);
5703 ops[4] = operands[1];
5704 ops[5] = GEN_INT (lane);
5705 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5709 [(set_attr "type" "neon_load4_one_lane<q>")]
5712 (define_insn "neon_vld4_dup<mode>"
5713 [(set (match_operand:OI 0 "s_register_operand" "=w")
5714 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5715 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5719 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5721 int regno = REGNO (operands[0]);
5723 ops[0] = gen_rtx_REG (DImode, regno);
5724 ops[1] = gen_rtx_REG (DImode, regno + 2);
5725 ops[2] = gen_rtx_REG (DImode, regno + 4);
5726 ops[3] = gen_rtx_REG (DImode, regno + 6);
5727 ops[4] = operands[1];
5728 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5733 return "vld1.<V_sz_elem>\t%h0, %A1";
5736 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5737 (const_string "neon_load4_all_lanes<q>")
5738 (const_string "neon_load1_1reg<q>")))]
5741 (define_expand "vec_store_lanesoi<mode>"
5742 [(set (match_operand:OI 0 "neon_struct_operand")
5743 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5744 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5748 (define_insn "neon_vst4<mode>"
5749 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5750 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5751 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5755 if (<V_sz_elem> == 64)
5756 return "vst1.64\t%h1, %A0";
5758 return "vst4.<V_sz_elem>\t%h1, %A0";
5761 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5762 (const_string "neon_store1_4reg<q>")
5763 (const_string "neon_store4_4reg<q>")))]
5766 (define_expand "vec_store_lanesxi<mode>"
5767 [(match_operand:XI 0 "neon_struct_operand")
5768 (match_operand:XI 1 "s_register_operand")
5769 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5772 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5776 (define_expand "neon_vst4<mode>"
5777 [(match_operand:XI 0 "neon_struct_operand")
5778 (match_operand:XI 1 "s_register_operand")
5779 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5784 mem = adjust_address (operands[0], OImode, 0);
5785 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5786 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5787 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5791 (define_insn "neon_vst4qa<mode>"
5792 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5793 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5794 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5798 int regno = REGNO (operands[1]);
5800 ops[0] = operands[0];
5801 ops[1] = gen_rtx_REG (DImode, regno);
5802 ops[2] = gen_rtx_REG (DImode, regno + 4);
5803 ops[3] = gen_rtx_REG (DImode, regno + 8);
5804 ops[4] = gen_rtx_REG (DImode, regno + 12);
5805 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5808 [(set_attr "type" "neon_store4_4reg<q>")]
5811 (define_insn "neon_vst4qb<mode>"
5812 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5813 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5814 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5818 int regno = REGNO (operands[1]);
5820 ops[0] = operands[0];
5821 ops[1] = gen_rtx_REG (DImode, regno + 2);
5822 ops[2] = gen_rtx_REG (DImode, regno + 6);
5823 ops[3] = gen_rtx_REG (DImode, regno + 10);
5824 ops[4] = gen_rtx_REG (DImode, regno + 14);
5825 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5828 [(set_attr "type" "neon_store4_4reg<q>")]
5831 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5832 ;; here on big endian targets.
5833 (define_insn "neon_vst4_lane<mode>"
5834 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5835 (unspec:<V_four_elem>
5836 [(match_operand:OI 1 "s_register_operand" "w")
5837 (match_operand:SI 2 "immediate_operand" "i")
5838 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5842 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5843 int regno = REGNO (operands[1]);
5845 ops[0] = operands[0];
5846 ops[1] = gen_rtx_REG (DImode, regno);
5847 ops[2] = gen_rtx_REG (DImode, regno + 2);
5848 ops[3] = gen_rtx_REG (DImode, regno + 4);
5849 ops[4] = gen_rtx_REG (DImode, regno + 6);
5850 ops[5] = GEN_INT (lane);
5851 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5855 [(set_attr "type" "neon_store4_one_lane<q>")]
5858 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5859 ;; here on big endian targets.
5860 (define_insn "neon_vst4_lane<mode>"
5861 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5862 (unspec:<V_four_elem>
5863 [(match_operand:XI 1 "s_register_operand" "w")
5864 (match_operand:SI 2 "immediate_operand" "i")
5865 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5869 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5870 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5871 int regno = REGNO (operands[1]);
5873 if (lane >= max / 2)
5878 ops[0] = operands[0];
5879 ops[1] = gen_rtx_REG (DImode, regno);
5880 ops[2] = gen_rtx_REG (DImode, regno + 4);
5881 ops[3] = gen_rtx_REG (DImode, regno + 8);
5882 ops[4] = gen_rtx_REG (DImode, regno + 12);
5883 ops[5] = GEN_INT (lane);
5884 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5888 [(set_attr "type" "neon_store4_4reg<q>")]
5891 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5892 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5893 (SE:<V_unpack> (vec_select:<V_HALF>
5894 (match_operand:VU 1 "register_operand" "w")
5895 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5896 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5897 "vmovl.<US><V_sz_elem> %q0, %e1"
5898 [(set_attr "type" "neon_shift_imm_long")]
5901 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5902 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5903 (SE:<V_unpack> (vec_select:<V_HALF>
5904 (match_operand:VU 1 "register_operand" "w")
5905 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5906 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5907 "vmovl.<US><V_sz_elem> %q0, %f1"
5908 [(set_attr "type" "neon_shift_imm_long")]
5911 (define_expand "vec_unpack<US>_hi_<mode>"
5912 [(match_operand:<V_unpack> 0 "register_operand" "")
5913 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5914 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5916 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5919 for (i = 0; i < (<V_mode_nunits>/2); i++)
5920 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5922 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5923 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5930 (define_expand "vec_unpack<US>_lo_<mode>"
5931 [(match_operand:<V_unpack> 0 "register_operand" "")
5932 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5933 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5935 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5938 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5939 RTVEC_ELT (v, i) = GEN_INT (i);
5940 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5941 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5948 (define_insn "neon_vec_<US>mult_lo_<mode>"
5949 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5950 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5951 (match_operand:VU 1 "register_operand" "w")
5952 (match_operand:VU 2 "vect_par_constant_low" "")))
5953 (SE:<V_unpack> (vec_select:<V_HALF>
5954 (match_operand:VU 3 "register_operand" "w")
5956 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5957 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5958 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5961 (define_expand "vec_widen_<US>mult_lo_<mode>"
5962 [(match_operand:<V_unpack> 0 "register_operand" "")
5963 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5964 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5965 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5967 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5970 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5971 RTVEC_ELT (v, i) = GEN_INT (i);
5972 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5974 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5982 (define_insn "neon_vec_<US>mult_hi_<mode>"
5983 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5984 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5985 (match_operand:VU 1 "register_operand" "w")
5986 (match_operand:VU 2 "vect_par_constant_high" "")))
5987 (SE:<V_unpack> (vec_select:<V_HALF>
5988 (match_operand:VU 3 "register_operand" "w")
5990 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5991 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5992 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5995 (define_expand "vec_widen_<US>mult_hi_<mode>"
5996 [(match_operand:<V_unpack> 0 "register_operand" "")
5997 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5998 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5999 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6001 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6004 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6005 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6006 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6008 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6017 (define_insn "neon_vec_<US>shiftl_<mode>"
6018 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6019 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6020 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6023 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6025 [(set_attr "type" "neon_shift_imm_long")]
6028 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6029 [(match_operand:<V_unpack> 0 "register_operand" "")
6030 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6031 (match_operand:SI 2 "immediate_operand" "i")]
6032 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6034 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6035 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6041 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6042 [(match_operand:<V_unpack> 0 "register_operand" "")
6043 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6044 (match_operand:SI 2 "immediate_operand" "i")]
6045 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6047 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6048 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6049 GET_MODE_SIZE (<V_HALF>mode)),
6055 ;; Vectorize for non-neon-quad case
6056 (define_insn "neon_unpack<US>_<mode>"
6057 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6058 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6060 "vmovl.<US><V_sz_elem> %q0, %P1"
6061 [(set_attr "type" "neon_move")]
6064 (define_expand "vec_unpack<US>_lo_<mode>"
6065 [(match_operand:<V_double_width> 0 "register_operand" "")
6066 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6069 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6070 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6071 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6077 (define_expand "vec_unpack<US>_hi_<mode>"
6078 [(match_operand:<V_double_width> 0 "register_operand" "")
6079 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6082 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6083 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6084 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6090 (define_insn "neon_vec_<US>mult_<mode>"
6091 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6092 (mult:<V_widen> (SE:<V_widen>
6093 (match_operand:VDI 1 "register_operand" "w"))
6095 (match_operand:VDI 2 "register_operand" "w"))))]
6097 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6098 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6101 (define_expand "vec_widen_<US>mult_hi_<mode>"
6102 [(match_operand:<V_double_width> 0 "register_operand" "")
6103 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6104 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6107 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6108 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6109 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6116 (define_expand "vec_widen_<US>mult_lo_<mode>"
6117 [(match_operand:<V_double_width> 0 "register_operand" "")
6118 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6119 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6122 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6123 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6124 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6131 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6132 [(match_operand:<V_double_width> 0 "register_operand" "")
6133 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6134 (match_operand:SI 2 "immediate_operand" "i")]
6137 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6138 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6139 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6145 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6146 [(match_operand:<V_double_width> 0 "register_operand" "")
6147 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6148 (match_operand:SI 2 "immediate_operand" "i")]
6151 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6152 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6153 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6159 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6160 ; because the ordering of vector elements in Q registers is different from what
6161 ; the semantics of the instructions require.
6163 (define_insn "vec_pack_trunc_<mode>"
6164 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6165 (vec_concat:<V_narrow_pack>
6166 (truncate:<V_narrow>
6167 (match_operand:VN 1 "register_operand" "w"))
6168 (truncate:<V_narrow>
6169 (match_operand:VN 2 "register_operand" "w"))))]
6170 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6171 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6172 [(set_attr "type" "multiple")
6173 (set_attr "length" "8")]
6176 ;; For the non-quad case.
6177 (define_insn "neon_vec_pack_trunc_<mode>"
6178 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6179 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6180 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6181 "vmovn.i<V_sz_elem>\t%P0, %q1"
6182 [(set_attr "type" "neon_move_narrow_q")]
6185 (define_expand "vec_pack_trunc_<mode>"
6186 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6187 (match_operand:VSHFT 1 "register_operand" "")
6188 (match_operand:VSHFT 2 "register_operand")]
6189 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6191 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6193 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6194 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6195 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6199 (define_insn "neon_vabd<mode>_2"
6200 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
6201 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
6202 (match_operand:VDQ 2 "s_register_operand" "w"))))]
6203 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
6204 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6206 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6207 (const_string "neon_fp_abd_s<q>")
6208 (const_string "neon_abd<q>")))]
6211 (define_insn "neon_vabd<mode>_3"
6212 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
6213 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
6214 (match_operand:VDQ 2 "s_register_operand" "w")]
6216 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
6217 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6219 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
6220 (const_string "neon_fp_abd_s<q>")
6221 (const_string "neon_abd<q>")))]
6224 ;; Copy from core-to-neon regs, then extend, not vice-versa
6227 [(set (match_operand:DI 0 "s_register_operand" "")
6228 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6229 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6230 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6231 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6233 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6237 [(set (match_operand:DI 0 "s_register_operand" "")
6238 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6239 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6240 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6241 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6243 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6247 [(set (match_operand:DI 0 "s_register_operand" "")
6248 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6249 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6250 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6251 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6253 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6257 [(set (match_operand:DI 0 "s_register_operand" "")
6258 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6259 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6260 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6261 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6263 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6267 [(set (match_operand:DI 0 "s_register_operand" "")
6268 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6269 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6270 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6271 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6273 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6277 [(set (match_operand:DI 0 "s_register_operand" "")
6278 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6279 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6280 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6281 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6283 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));