1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_expand "movv4hf"
141 [(set (match_operand:V4HF 0 "s_register_operand")
142 (match_operand:V4HF 1 "s_register_operand"))]
143 "TARGET_NEON && TARGET_FP16"
145 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
146 causing an ICE on big-endian because it cannot extract subregs in
148 if (can_create_pseudo_p ())
150 if (!REG_P (operands[0]))
151 operands[1] = force_reg (V4HFmode, operands[1]);
155 (define_expand "movv8hf"
156 [(set (match_operand:V8HF 0 "")
157 (match_operand:V8HF 1 ""))]
158 "TARGET_NEON && TARGET_FP16"
160 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
161 causing an ICE on big-endian because it cannot extract subregs in
163 if (can_create_pseudo_p ())
165 if (!REG_P (operands[0]))
166 operands[1] = force_reg (V8HFmode, operands[1]);
170 (define_insn "*neon_mov<mode>"
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
172 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
177 switch (which_alternative)
180 case 1: case 2: return output_move_neon (operands);
181 default: gcc_unreachable ();
184 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
188 [(set (match_operand:EI 0 "s_register_operand" "")
189 (match_operand:EI 1 "s_register_operand" ""))]
190 "TARGET_NEON && reload_completed"
191 [(set (match_dup 0) (match_dup 1))
192 (set (match_dup 2) (match_dup 3))]
194 int rdest = REGNO (operands[0]);
195 int rsrc = REGNO (operands[1]);
198 dest[0] = gen_rtx_REG (TImode, rdest);
199 src[0] = gen_rtx_REG (TImode, rsrc);
200 dest[1] = gen_rtx_REG (DImode, rdest + 4);
201 src[1] = gen_rtx_REG (DImode, rsrc + 4);
203 neon_disambiguate_copy (operands, dest, src, 2);
207 [(set (match_operand:OI 0 "s_register_operand" "")
208 (match_operand:OI 1 "s_register_operand" ""))]
209 "TARGET_NEON && reload_completed"
210 [(set (match_dup 0) (match_dup 1))
211 (set (match_dup 2) (match_dup 3))]
213 int rdest = REGNO (operands[0]);
214 int rsrc = REGNO (operands[1]);
217 dest[0] = gen_rtx_REG (TImode, rdest);
218 src[0] = gen_rtx_REG (TImode, rsrc);
219 dest[1] = gen_rtx_REG (TImode, rdest + 4);
220 src[1] = gen_rtx_REG (TImode, rsrc + 4);
222 neon_disambiguate_copy (operands, dest, src, 2);
226 [(set (match_operand:CI 0 "s_register_operand" "")
227 (match_operand:CI 1 "s_register_operand" ""))]
228 "TARGET_NEON && reload_completed"
229 [(set (match_dup 0) (match_dup 1))
230 (set (match_dup 2) (match_dup 3))
231 (set (match_dup 4) (match_dup 5))]
233 int rdest = REGNO (operands[0]);
234 int rsrc = REGNO (operands[1]);
237 dest[0] = gen_rtx_REG (TImode, rdest);
238 src[0] = gen_rtx_REG (TImode, rsrc);
239 dest[1] = gen_rtx_REG (TImode, rdest + 4);
240 src[1] = gen_rtx_REG (TImode, rsrc + 4);
241 dest[2] = gen_rtx_REG (TImode, rdest + 8);
242 src[2] = gen_rtx_REG (TImode, rsrc + 8);
244 neon_disambiguate_copy (operands, dest, src, 3);
248 [(set (match_operand:XI 0 "s_register_operand" "")
249 (match_operand:XI 1 "s_register_operand" ""))]
250 "TARGET_NEON && reload_completed"
251 [(set (match_dup 0) (match_dup 1))
252 (set (match_dup 2) (match_dup 3))
253 (set (match_dup 4) (match_dup 5))
254 (set (match_dup 6) (match_dup 7))]
256 int rdest = REGNO (operands[0]);
257 int rsrc = REGNO (operands[1]);
260 dest[0] = gen_rtx_REG (TImode, rdest);
261 src[0] = gen_rtx_REG (TImode, rsrc);
262 dest[1] = gen_rtx_REG (TImode, rdest + 4);
263 src[1] = gen_rtx_REG (TImode, rsrc + 4);
264 dest[2] = gen_rtx_REG (TImode, rdest + 8);
265 src[2] = gen_rtx_REG (TImode, rsrc + 8);
266 dest[3] = gen_rtx_REG (TImode, rdest + 12);
267 src[3] = gen_rtx_REG (TImode, rsrc + 12);
269 neon_disambiguate_copy (operands, dest, src, 4);
272 (define_expand "movmisalign<mode>"
273 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275 UNSPEC_MISALIGNED_ACCESS))]
276 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
279 /* This pattern is not permitted to fail during expansion: if both arguments
280 are non-registers (e.g. memory := constant, which can be created by the
281 auto-vectorizer), force operand 1 into a register. */
282 if (!s_register_operand (operands[0], <MODE>mode)
283 && !s_register_operand (operands[1], <MODE>mode))
284 operands[1] = force_reg (<MODE>mode, operands[1]);
286 if (s_register_operand (operands[0], <MODE>mode))
287 adjust_mem = operands[1];
289 adjust_mem = operands[0];
291 /* Legitimize address. */
292 if (!neon_vector_mem_operand (adjust_mem, 2, true))
293 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
297 (define_insn "*movmisalign<mode>_neon_store"
298 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
299 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vst1.<V_sz_elem>\t{%P1}, %A0"
303 [(set_attr "type" "neon_store1_1reg<q>")])
305 (define_insn "*movmisalign<mode>_neon_load"
306 [(set (match_operand:VDX 0 "s_register_operand" "=w")
307 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
309 UNSPEC_MISALIGNED_ACCESS))]
310 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311 "vld1.<V_sz_elem>\t{%P0}, %A1"
312 [(set_attr "type" "neon_load1_1reg<q>")])
314 (define_insn "*movmisalign<mode>_neon_store"
315 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
316 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vst1.<V_sz_elem>\t{%q1}, %A0"
320 [(set_attr "type" "neon_store1_1reg<q>")])
322 (define_insn "*movmisalign<mode>_neon_load"
323 [(set (match_operand:VQX 0 "s_register_operand" "=w")
324 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
326 UNSPEC_MISALIGNED_ACCESS))]
327 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328 "vld1.<V_sz_elem>\t{%q0}, %A1"
329 [(set_attr "type" "neon_load1_1reg<q>")])
331 (define_insn "vec_set<mode>_internal"
332 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
334 (vec_duplicate:VD_LANE
335 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337 (match_operand:SI 2 "immediate_operand" "i,i")))]
340 int elt = ffs ((int) INTVAL (operands[2])) - 1;
341 if (BYTES_BIG_ENDIAN)
342 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343 operands[2] = GEN_INT (elt);
345 if (which_alternative == 0)
346 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
348 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
350 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
352 (define_insn "vec_set<mode>_internal"
353 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
356 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357 (match_operand:VQ2 3 "s_register_operand" "0,0")
358 (match_operand:SI 2 "immediate_operand" "i,i")))]
361 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363 int elt = elem % half_elts;
364 int hi = (elem / half_elts) * 2;
365 int regno = REGNO (operands[0]);
367 if (BYTES_BIG_ENDIAN)
368 elt = half_elts - 1 - elt;
370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371 operands[2] = GEN_INT (elt);
373 if (which_alternative == 0)
374 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
376 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
378 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
381 (define_insn "vec_setv2di_internal"
382 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
385 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386 (match_operand:V2DI 3 "s_register_operand" "0,0")
387 (match_operand:SI 2 "immediate_operand" "i,i")))]
390 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391 int regno = REGNO (operands[0]) + 2 * elem;
393 operands[0] = gen_rtx_REG (DImode, regno);
395 if (which_alternative == 0)
396 return "vld1.64\t%P0, %A1";
398 return "vmov\t%P0, %Q1, %R1";
400 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
403 (define_expand "vec_set<mode>"
404 [(match_operand:VDQ 0 "s_register_operand" "")
405 (match_operand:<V_elem> 1 "s_register_operand" "")
406 (match_operand:SI 2 "immediate_operand" "")]
409 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
410 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411 GEN_INT (elem), operands[0]));
415 (define_insn "vec_extract<mode>"
416 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
418 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
422 if (BYTES_BIG_ENDIAN)
424 int elt = INTVAL (operands[2]);
425 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426 operands[2] = GEN_INT (elt);
429 if (which_alternative == 0)
430 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
432 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
434 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
437 (define_insn "vec_extract<mode>"
438 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
440 (match_operand:VQ2 1 "s_register_operand" "w,w")
441 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
444 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445 int elt = INTVAL (operands[2]) % half_elts;
446 int hi = (INTVAL (operands[2]) / half_elts) * 2;
447 int regno = REGNO (operands[1]);
449 if (BYTES_BIG_ENDIAN)
450 elt = half_elts - 1 - elt;
452 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453 operands[2] = GEN_INT (elt);
455 if (which_alternative == 0)
456 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
458 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
460 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
463 (define_insn "vec_extractv2di"
464 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
466 (match_operand:V2DI 1 "s_register_operand" "w,w")
467 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
470 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
472 operands[1] = gen_rtx_REG (DImode, regno);
474 if (which_alternative == 0)
475 return "vst1.64\t{%P1}, %A0 @ v2di";
477 return "vmov\t%Q0, %R0, %P1 @ v2di";
479 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
482 (define_expand "vec_init<mode>"
483 [(match_operand:VDQ 0 "s_register_operand" "")
484 (match_operand 1 "" "")]
487 neon_expand_vector_init (operands[0], operands[1]);
491 ;; Doubleword and quadword arithmetic.
493 ;; NOTE: some other instructions also support 64-bit integer
494 ;; element size, which we could potentially use for "long long" operations.
496 (define_insn "*add<mode>3_neon"
497 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499 (match_operand:VDQ 2 "s_register_operand" "w")))]
500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
503 (if_then_else (match_test "<Is_float_mode>")
504 (const_string "neon_fp_addsub_s<q>")
505 (const_string "neon_add<q>")))]
508 (define_insn "adddi3_neon"
509 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
510 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
511 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
512 (clobber (reg:CC CC_REGNUM))]
515 switch (which_alternative)
517 case 0: /* fall through */
518 case 3: return "vadd.i64\t%P0, %P1, %P2";
524 default: gcc_unreachable ();
527 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
528 multiple,multiple,multiple")
529 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
530 (set_attr "length" "*,8,8,*,8,8,8")
531 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
534 (define_insn "*sub<mode>3_neon"
535 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
536 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
537 (match_operand:VDQ 2 "s_register_operand" "w")))]
538 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
539 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
541 (if_then_else (match_test "<Is_float_mode>")
542 (const_string "neon_fp_addsub_s<q>")
543 (const_string "neon_sub<q>")))]
546 (define_insn "subdi3_neon"
547 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
548 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
549 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
550 (clobber (reg:CC CC_REGNUM))]
553 switch (which_alternative)
555 case 0: /* fall through */
556 case 4: return "vsub.i64\t%P0, %P1, %P2";
557 case 1: /* fall through */
558 case 2: /* fall through */
559 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
560 default: gcc_unreachable ();
563 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
564 (set_attr "conds" "*,clob,clob,clob,*")
565 (set_attr "length" "*,8,8,8,*")
566 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
569 (define_insn "*mul<mode>3_neon"
570 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
571 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
572 (match_operand:VDQW 2 "s_register_operand" "w")))]
573 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
574 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
576 (if_then_else (match_test "<Is_float_mode>")
577 (const_string "neon_fp_mul_s<q>")
578 (const_string "neon_mul_<V_elem_ch><q>")))]
581 (define_insn "mul<mode>3add<mode>_neon"
582 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
583 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
584 (match_operand:VDQW 3 "s_register_operand" "w"))
585 (match_operand:VDQW 1 "s_register_operand" "0")))]
586 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
587 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
589 (if_then_else (match_test "<Is_float_mode>")
590 (const_string "neon_fp_mla_s<q>")
591 (const_string "neon_mla_<V_elem_ch><q>")))]
594 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
595 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
596 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
597 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
598 (match_operand:VDQW 3 "s_register_operand" "w"))))]
599 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
600 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
602 (if_then_else (match_test "<Is_float_mode>")
603 (const_string "neon_fp_mla_s<q>")
604 (const_string "neon_mla_<V_elem_ch><q>")))]
607 ;; Fused multiply-accumulate
608 ;; We define each insn twice here:
609 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
610 ;; to be able to use when converting to FMA.
611 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
612 (define_insn "fma<VCVTF:mode>4"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
618 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
622 (define_insn "fma<VCVTF:mode>4_intrinsic"
623 [(set (match_operand:VCVTF 0 "register_operand" "=w")
624 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
625 (match_operand:VCVTF 2 "register_operand" "w")
626 (match_operand:VCVTF 3 "register_operand" "0")))]
627 "TARGET_NEON && TARGET_FMA"
628 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
629 [(set_attr "type" "neon_fp_mla_s<q>")]
632 (define_insn "*fmsub<VCVTF:mode>4"
633 [(set (match_operand:VCVTF 0 "register_operand" "=w")
634 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
635 (match_operand:VCVTF 2 "register_operand" "w")
636 (match_operand:VCVTF 3 "register_operand" "0")))]
637 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
638 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
639 [(set_attr "type" "neon_fp_mla_s<q>")]
642 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
643 [(set (match_operand:VCVTF 0 "register_operand" "=w")
644 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
645 (match_operand:VCVTF 2 "register_operand" "w")
646 (match_operand:VCVTF 3 "register_operand" "0")))]
647 "TARGET_NEON && TARGET_FMA"
648 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
649 [(set_attr "type" "neon_fp_mla_s<q>")]
652 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
653 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
654 (unspec:VCVTF [(match_operand:VCVTF 1
655 "s_register_operand" "w")]
657 "TARGET_NEON && TARGET_FPU_ARMV8"
658 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
659 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
662 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
663 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
664 (FIXUORS:<V_cmp_result> (unspec:VCVTF
665 [(match_operand:VCVTF 1 "register_operand" "w")]
667 "TARGET_NEON && TARGET_FPU_ARMV8"
668 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
669 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
670 (set_attr "predicable" "no")]
673 (define_insn "ior<mode>3"
674 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
675 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
676 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
679 switch (which_alternative)
681 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
682 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
683 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
684 default: gcc_unreachable ();
687 [(set_attr "type" "neon_logic<q>")]
690 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
691 ;; vorr. We support the pseudo-instruction vand instead, because that
692 ;; corresponds to the canonical form the middle-end expects to use for
693 ;; immediate bitwise-ANDs.
695 (define_insn "and<mode>3"
696 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
697 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
698 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
701 switch (which_alternative)
703 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
704 case 1: return neon_output_logic_immediate ("vand", &operands[2],
705 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
706 default: gcc_unreachable ();
709 [(set_attr "type" "neon_logic<q>")]
712 (define_insn "orn<mode>3_neon"
713 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
714 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
715 (match_operand:VDQ 1 "s_register_operand" "w")))]
717 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
718 [(set_attr "type" "neon_logic<q>")]
721 ;; TODO: investigate whether we should disable
722 ;; this and bicdi3_neon for the A8 in line with the other
724 (define_insn_and_split "orndi3_neon"
725 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
726 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
727 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
735 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
736 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
737 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
742 operands[3] = gen_highpart (SImode, operands[0]);
743 operands[0] = gen_lowpart (SImode, operands[0]);
744 operands[4] = gen_highpart (SImode, operands[2]);
745 operands[2] = gen_lowpart (SImode, operands[2]);
746 operands[5] = gen_highpart (SImode, operands[1]);
747 operands[1] = gen_lowpart (SImode, operands[1]);
751 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
752 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
756 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
757 (set_attr "length" "*,16,8,8")
758 (set_attr "arch" "any,a,t2,t2")]
761 (define_insn "bic<mode>3_neon"
762 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
763 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
764 (match_operand:VDQ 1 "s_register_operand" "w")))]
766 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
767 [(set_attr "type" "neon_logic<q>")]
770 ;; Compare to *anddi_notdi_di.
771 (define_insn "bicdi3_neon"
772 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
773 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
774 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
780 [(set_attr "type" "neon_logic,multiple,multiple")
781 (set_attr "length" "*,8,8")]
784 (define_insn "xor<mode>3"
785 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
786 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
787 (match_operand:VDQ 2 "s_register_operand" "w")))]
789 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
790 [(set_attr "type" "neon_logic<q>")]
793 (define_insn "one_cmpl<mode>2"
794 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
795 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
797 "vmvn\t%<V_reg>0, %<V_reg>1"
798 [(set_attr "type" "neon_move<q>")]
801 (define_insn "abs<mode>2"
802 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
803 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
805 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
807 (if_then_else (match_test "<Is_float_mode>")
808 (const_string "neon_fp_abs_s<q>")
809 (const_string "neon_abs<q>")))]
812 (define_insn "neg<mode>2"
813 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
814 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
816 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
818 (if_then_else (match_test "<Is_float_mode>")
819 (const_string "neon_fp_neg_s<q>")
820 (const_string "neon_neg<q>")))]
823 (define_insn "negdi2_neon"
824 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
825 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
826 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
827 (clobber (reg:CC CC_REGNUM))]
830 [(set_attr "length" "8")
831 (set_attr "type" "multiple")]
834 ; Split negdi2_neon for vfp registers
836 [(set (match_operand:DI 0 "s_register_operand" "")
837 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
838 (clobber (match_scratch:DI 2 ""))
839 (clobber (reg:CC CC_REGNUM))]
840 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
841 [(set (match_dup 2) (const_int 0))
842 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
843 (clobber (reg:CC CC_REGNUM))])]
845 if (!REG_P (operands[2]))
846 operands[2] = operands[0];
850 ; Split negdi2_neon for core registers
852 [(set (match_operand:DI 0 "s_register_operand" "")
853 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
854 (clobber (match_scratch:DI 2 ""))
855 (clobber (reg:CC CC_REGNUM))]
856 "TARGET_32BIT && reload_completed
857 && arm_general_register_operand (operands[0], DImode)"
858 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
859 (clobber (reg:CC CC_REGNUM))])]
863 (define_insn "*umin<mode>3_neon"
864 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
865 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
866 (match_operand:VDQIW 2 "s_register_operand" "w")))]
868 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
869 [(set_attr "type" "neon_minmax<q>")]
872 (define_insn "*umax<mode>3_neon"
873 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
874 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
875 (match_operand:VDQIW 2 "s_register_operand" "w")))]
877 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
878 [(set_attr "type" "neon_minmax<q>")]
881 (define_insn "*smin<mode>3_neon"
882 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
883 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
884 (match_operand:VDQW 2 "s_register_operand" "w")))]
886 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
888 (if_then_else (match_test "<Is_float_mode>")
889 (const_string "neon_fp_minmax_s<q>")
890 (const_string "neon_minmax<q>")))]
893 (define_insn "*smax<mode>3_neon"
894 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
895 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
896 (match_operand:VDQW 2 "s_register_operand" "w")))]
898 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
900 (if_then_else (match_test "<Is_float_mode>")
901 (const_string "neon_fp_minmax_s<q>")
902 (const_string "neon_minmax<q>")))]
905 ; TODO: V2DI shifts are current disabled because there are bugs in the
906 ; generic vectorizer code. It ends up creating a V2DI constructor with
909 (define_insn "vashl<mode>3"
910 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
911 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
912 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
915 switch (which_alternative)
917 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
918 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
920 VALID_NEON_QREG_MODE (<MODE>mode),
922 default: gcc_unreachable ();
925 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
928 (define_insn "vashr<mode>3_imm"
929 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
930 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
931 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
934 return neon_output_shift_immediate ("vshr", 's', &operands[2],
935 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
938 [(set_attr "type" "neon_shift_imm<q>")]
941 (define_insn "vlshr<mode>3_imm"
942 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
943 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
944 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
947 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
948 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
951 [(set_attr "type" "neon_shift_imm<q>")]
954 ; Used for implementing logical shift-right, which is a left-shift by a negative
955 ; amount, with signed operands. This is essentially the same as ashl<mode>3
956 ; above, but using an unspec in case GCC tries anything tricky with negative
959 (define_insn "ashl<mode>3_signed"
960 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
961 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
962 (match_operand:VDQI 2 "s_register_operand" "w")]
963 UNSPEC_ASHIFT_SIGNED))]
965 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
966 [(set_attr "type" "neon_shift_reg<q>")]
969 ; Used for implementing logical shift-right, which is a left-shift by a negative
970 ; amount, with unsigned operands.
972 (define_insn "ashl<mode>3_unsigned"
973 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
974 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
975 (match_operand:VDQI 2 "s_register_operand" "w")]
976 UNSPEC_ASHIFT_UNSIGNED))]
978 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
979 [(set_attr "type" "neon_shift_reg<q>")]
982 (define_expand "vashr<mode>3"
983 [(set (match_operand:VDQIW 0 "s_register_operand" "")
984 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
985 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
988 if (s_register_operand (operands[2], <MODE>mode))
990 rtx neg = gen_reg_rtx (<MODE>mode);
991 emit_insn (gen_neg<mode>2 (neg, operands[2]));
992 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
995 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
999 (define_expand "vlshr<mode>3"
1000 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1001 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1002 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1005 if (s_register_operand (operands[2], <MODE>mode))
1007 rtx neg = gen_reg_rtx (<MODE>mode);
1008 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1009 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1012 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1018 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1019 ;; leaving the upper half uninitalized. This is OK since the shift
1020 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1021 ;; data flow analysis however, we pretend the full register is set
1023 (define_insn "neon_load_count"
1024 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1025 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1026 UNSPEC_LOAD_COUNT))]
1029 vld1.32\t{%P0[0]}, %A1
1030 vmov.32\t%P0[0], %1"
1031 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1034 (define_insn "ashldi3_neon_noclobber"
1035 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1036 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1037 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1038 "TARGET_NEON && reload_completed
1039 && (!CONST_INT_P (operands[2])
1040 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1042 vshl.u64\t%P0, %P1, %2
1043 vshl.u64\t%P0, %P1, %P2"
1044 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1047 (define_insn_and_split "ashldi3_neon"
1048 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1049 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1050 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1051 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1052 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1053 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1054 (clobber (reg:CC_C CC_REGNUM))]
1057 "TARGET_NEON && reload_completed"
1061 if (IS_VFP_REGNUM (REGNO (operands[0])))
1063 if (CONST_INT_P (operands[2]))
1065 if (INTVAL (operands[2]) < 1)
1067 emit_insn (gen_movdi (operands[0], operands[1]));
1070 else if (INTVAL (operands[2]) > 63)
1071 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1075 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1076 operands[2] = operands[5];
1079 /* Ditch the unnecessary clobbers. */
1080 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1085 if (operands[2] == CONST1_RTX (SImode)
1086 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1087 || REGNO (operands[0]) == REGNO (operands[1])))
1088 /* This clobbers CC. */
1089 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1091 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1092 operands[2], operands[3], operands[4]);
1096 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1097 (set_attr "opt" "*,*,speed,speed,*,*")
1098 (set_attr "type" "multiple")]
1101 ; The shift amount needs to be negated for right-shifts
1102 (define_insn "signed_shift_di3_neon"
1103 [(set (match_operand:DI 0 "s_register_operand" "=w")
1104 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1105 (match_operand:DI 2 "s_register_operand" " w")]
1106 UNSPEC_ASHIFT_SIGNED))]
1107 "TARGET_NEON && reload_completed"
1108 "vshl.s64\t%P0, %P1, %P2"
1109 [(set_attr "type" "neon_shift_reg")]
1112 ; The shift amount needs to be negated for right-shifts
1113 (define_insn "unsigned_shift_di3_neon"
1114 [(set (match_operand:DI 0 "s_register_operand" "=w")
1115 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1116 (match_operand:DI 2 "s_register_operand" " w")]
1117 UNSPEC_ASHIFT_UNSIGNED))]
1118 "TARGET_NEON && reload_completed"
1119 "vshl.u64\t%P0, %P1, %P2"
1120 [(set_attr "type" "neon_shift_reg")]
1123 (define_insn "ashrdi3_neon_imm_noclobber"
1124 [(set (match_operand:DI 0 "s_register_operand" "=w")
1125 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1126 (match_operand:DI 2 "const_int_operand" " i")))]
1127 "TARGET_NEON && reload_completed
1128 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1129 "vshr.s64\t%P0, %P1, %2"
1130 [(set_attr "type" "neon_shift_imm")]
1133 (define_insn "lshrdi3_neon_imm_noclobber"
1134 [(set (match_operand:DI 0 "s_register_operand" "=w")
1135 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1136 (match_operand:DI 2 "const_int_operand" " i")))]
1137 "TARGET_NEON && reload_completed
1138 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1139 "vshr.u64\t%P0, %P1, %2"
1140 [(set_attr "type" "neon_shift_imm")]
1145 (define_insn_and_split "<shift>di3_neon"
1146 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1147 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1148 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1149 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1150 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1151 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1152 (clobber (reg:CC CC_REGNUM))]
1155 "TARGET_NEON && reload_completed"
1159 if (IS_VFP_REGNUM (REGNO (operands[0])))
1161 if (CONST_INT_P (operands[2]))
1163 if (INTVAL (operands[2]) < 1)
1165 emit_insn (gen_movdi (operands[0], operands[1]));
1168 else if (INTVAL (operands[2]) > 64)
1169 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1171 /* Ditch the unnecessary clobbers. */
1172 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1178 /* We must use a negative left-shift. */
1179 emit_insn (gen_negsi2 (operands[3], operands[2]));
1180 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1181 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1187 if (operands[2] == CONST1_RTX (SImode)
1188 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1189 || REGNO (operands[0]) == REGNO (operands[1])))
1190 /* This clobbers CC. */
1191 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1193 /* This clobbers CC (ASHIFTRT by register only). */
1194 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1195 operands[2], operands[3], operands[4]);
1200 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1201 (set_attr "opt" "*,*,speed,speed,*,*")
1202 (set_attr "type" "multiple")]
1205 ;; Widening operations
1207 (define_expand "widen_ssum<mode>3"
1208 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1209 (plus:<V_double_width>
1210 (sign_extend:<V_double_width>
1211 (match_operand:VQI 1 "s_register_operand" ""))
1212 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1215 machine_mode mode = GET_MODE (operands[1]);
1218 p1 = arm_simd_vect_par_cnst_half (mode, false);
1219 p2 = arm_simd_vect_par_cnst_half (mode, true);
1221 if (operands[0] != operands[2])
1222 emit_move_insn (operands[0], operands[2]);
1224 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1228 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1236 (define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3"
1237 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1239 (sign_extend:<VW:V_widen>
1241 (match_operand:VQI 1 "s_register_operand" "%w")
1242 (match_operand:VQI 2 "vect_par_constant_low" "")))
1243 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1246 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1247 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1249 [(set_attr "type" "neon_add_widen")])
1251 (define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3"
1252 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1254 (sign_extend:<VW:V_widen>
1255 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1256 (match_operand:VQI 2 "vect_par_constant_high" "")))
1257 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1260 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1261 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1263 [(set_attr "type" "neon_add_widen")])
1265 (define_insn "widen_ssum<mode>3"
1266 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1268 (sign_extend:<V_widen>
1269 (match_operand:VW 1 "s_register_operand" "%w"))
1270 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1272 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1273 [(set_attr "type" "neon_add_widen")]
1276 (define_expand "widen_usum<mode>3"
1277 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1278 (plus:<V_double_width>
1279 (zero_extend:<V_double_width>
1280 (match_operand:VQI 1 "s_register_operand" ""))
1281 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1284 machine_mode mode = GET_MODE (operands[1]);
1287 p1 = arm_simd_vect_par_cnst_half (mode, false);
1288 p2 = arm_simd_vect_par_cnst_half (mode, true);
1290 if (operands[0] != operands[2])
1291 emit_move_insn (operands[0], operands[2]);
1293 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1297 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1305 (define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3"
1306 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1308 (zero_extend:<VW:V_widen>
1310 (match_operand:VQI 1 "s_register_operand" "%w")
1311 (match_operand:VQI 2 "vect_par_constant_low" "")))
1312 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1315 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1316 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1318 [(set_attr "type" "neon_add_widen")])
1320 (define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3"
1321 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1323 (zero_extend:<VW:V_widen>
1324 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1325 (match_operand:VQI 2 "vect_par_constant_high" "")))
1326 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1329 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1330 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1332 [(set_attr "type" "neon_add_widen")])
1334 (define_insn "widen_usum<mode>3"
1335 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1336 (plus:<V_widen> (zero_extend:<V_widen>
1337 (match_operand:VW 1 "s_register_operand" "%w"))
1338 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1340 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1341 [(set_attr "type" "neon_add_widen")]
1344 ;; Helpers for quad-word reduction operations
1346 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1347 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1348 ; N/2-element vector.
1350 (define_insn "quad_halves_<code>v4si"
1351 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1353 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1354 (parallel [(const_int 0) (const_int 1)]))
1355 (vec_select:V2SI (match_dup 1)
1356 (parallel [(const_int 2) (const_int 3)]))))]
1358 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1359 [(set_attr "vqh_mnem" "<VQH_mnem>")
1360 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1363 (define_insn "quad_halves_<code>v4sf"
1364 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1366 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1367 (parallel [(const_int 0) (const_int 1)]))
1368 (vec_select:V2SF (match_dup 1)
1369 (parallel [(const_int 2) (const_int 3)]))))]
1370 "TARGET_NEON && flag_unsafe_math_optimizations"
1371 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1372 [(set_attr "vqh_mnem" "<VQH_mnem>")
1373 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1376 (define_insn "quad_halves_<code>v8hi"
1377 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1379 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1380 (parallel [(const_int 0) (const_int 1)
1381 (const_int 2) (const_int 3)]))
1382 (vec_select:V4HI (match_dup 1)
1383 (parallel [(const_int 4) (const_int 5)
1384 (const_int 6) (const_int 7)]))))]
1386 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1387 [(set_attr "vqh_mnem" "<VQH_mnem>")
1388 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1391 (define_insn "quad_halves_<code>v16qi"
1392 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1394 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1395 (parallel [(const_int 0) (const_int 1)
1396 (const_int 2) (const_int 3)
1397 (const_int 4) (const_int 5)
1398 (const_int 6) (const_int 7)]))
1399 (vec_select:V8QI (match_dup 1)
1400 (parallel [(const_int 8) (const_int 9)
1401 (const_int 10) (const_int 11)
1402 (const_int 12) (const_int 13)
1403 (const_int 14) (const_int 15)]))))]
1405 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1406 [(set_attr "vqh_mnem" "<VQH_mnem>")
1407 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1410 (define_expand "move_hi_quad_<mode>"
1411 [(match_operand:ANY128 0 "s_register_operand" "")
1412 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1415 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1416 GET_MODE_SIZE (<V_HALF>mode)),
1421 (define_expand "move_lo_quad_<mode>"
1422 [(match_operand:ANY128 0 "s_register_operand" "")
1423 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1426 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1432 ;; Reduction operations
1434 (define_expand "reduc_plus_scal_<mode>"
1435 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1436 (match_operand:VD 1 "s_register_operand" "")]
1437 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1439 rtx vec = gen_reg_rtx (<MODE>mode);
1440 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1441 &gen_neon_vpadd_internal<mode>);
1442 /* The same result is actually computed into every element. */
1443 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1447 (define_expand "reduc_plus_scal_<mode>"
1448 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1449 (match_operand:VQ 1 "s_register_operand" "")]
1450 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1451 && !BYTES_BIG_ENDIAN"
1453 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1455 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1456 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1461 (define_expand "reduc_plus_scal_v2di"
1462 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1463 (match_operand:V2DI 1 "s_register_operand" "")]
1464 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1466 rtx vec = gen_reg_rtx (V2DImode);
1468 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1469 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1474 (define_insn "arm_reduc_plus_internal_v2di"
1475 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1476 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1478 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1479 "vadd.i64\t%e0, %e1, %f1"
1480 [(set_attr "type" "neon_add_q")]
1483 (define_expand "reduc_smin_scal_<mode>"
1484 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1485 (match_operand:VD 1 "s_register_operand" "")]
1486 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1488 rtx vec = gen_reg_rtx (<MODE>mode);
1490 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1491 &gen_neon_vpsmin<mode>);
1492 /* The result is computed into every element of the vector. */
1493 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1497 (define_expand "reduc_smin_scal_<mode>"
1498 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1499 (match_operand:VQ 1 "s_register_operand" "")]
1500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1501 && !BYTES_BIG_ENDIAN"
1503 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1505 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1506 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1511 (define_expand "reduc_smax_scal_<mode>"
1512 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1513 (match_operand:VD 1 "s_register_operand" "")]
1514 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1516 rtx vec = gen_reg_rtx (<MODE>mode);
1517 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1518 &gen_neon_vpsmax<mode>);
1519 /* The result is computed into every element of the vector. */
1520 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1524 (define_expand "reduc_smax_scal_<mode>"
1525 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1526 (match_operand:VQ 1 "s_register_operand" "")]
1527 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1528 && !BYTES_BIG_ENDIAN"
1530 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1532 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1533 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1538 (define_expand "reduc_umin_scal_<mode>"
1539 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1540 (match_operand:VDI 1 "s_register_operand" "")]
1543 rtx vec = gen_reg_rtx (<MODE>mode);
1544 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1545 &gen_neon_vpumin<mode>);
1546 /* The result is computed into every element of the vector. */
1547 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1551 (define_expand "reduc_umin_scal_<mode>"
1552 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1553 (match_operand:VQI 1 "s_register_operand" "")]
1554 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1556 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1558 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1559 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1564 (define_expand "reduc_umax_scal_<mode>"
1565 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1566 (match_operand:VDI 1 "s_register_operand" "")]
1569 rtx vec = gen_reg_rtx (<MODE>mode);
1570 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1571 &gen_neon_vpumax<mode>);
1572 /* The result is computed into every element of the vector. */
1573 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1577 (define_expand "reduc_umax_scal_<mode>"
1578 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1579 (match_operand:VQI 1 "s_register_operand" "")]
1580 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1582 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1584 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1585 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1590 (define_insn "neon_vpadd_internal<mode>"
1591 [(set (match_operand:VD 0 "s_register_operand" "=w")
1592 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1593 (match_operand:VD 2 "s_register_operand" "w")]
1596 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1597 ;; Assume this schedules like vadd.
1599 (if_then_else (match_test "<Is_float_mode>")
1600 (const_string "neon_fp_reduc_add_s<q>")
1601 (const_string "neon_reduc_add<q>")))]
1604 (define_insn "neon_vpsmin<mode>"
1605 [(set (match_operand:VD 0 "s_register_operand" "=w")
1606 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1607 (match_operand:VD 2 "s_register_operand" "w")]
1610 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1612 (if_then_else (match_test "<Is_float_mode>")
1613 (const_string "neon_fp_reduc_minmax_s<q>")
1614 (const_string "neon_reduc_minmax<q>")))]
1617 (define_insn "neon_vpsmax<mode>"
1618 [(set (match_operand:VD 0 "s_register_operand" "=w")
1619 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1620 (match_operand:VD 2 "s_register_operand" "w")]
1623 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1625 (if_then_else (match_test "<Is_float_mode>")
1626 (const_string "neon_fp_reduc_minmax_s<q>")
1627 (const_string "neon_reduc_minmax<q>")))]
1630 (define_insn "neon_vpumin<mode>"
1631 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1632 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1633 (match_operand:VDI 2 "s_register_operand" "w")]
1636 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1637 [(set_attr "type" "neon_reduc_minmax<q>")]
1640 (define_insn "neon_vpumax<mode>"
1641 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1642 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1643 (match_operand:VDI 2 "s_register_operand" "w")]
1646 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1647 [(set_attr "type" "neon_reduc_minmax<q>")]
1650 ;; Saturating arithmetic
1652 ; NOTE: Neon supports many more saturating variants of instructions than the
1653 ; following, but these are all GCC currently understands.
1654 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1655 ; yet either, although these patterns may be used by intrinsics when they're
1658 (define_insn "*ss_add<mode>_neon"
1659 [(set (match_operand:VD 0 "s_register_operand" "=w")
1660 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1661 (match_operand:VD 2 "s_register_operand" "w")))]
1663 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1664 [(set_attr "type" "neon_qadd<q>")]
1667 (define_insn "*us_add<mode>_neon"
1668 [(set (match_operand:VD 0 "s_register_operand" "=w")
1669 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1670 (match_operand:VD 2 "s_register_operand" "w")))]
1672 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1673 [(set_attr "type" "neon_qadd<q>")]
1676 (define_insn "*ss_sub<mode>_neon"
1677 [(set (match_operand:VD 0 "s_register_operand" "=w")
1678 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1679 (match_operand:VD 2 "s_register_operand" "w")))]
1681 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1682 [(set_attr "type" "neon_qsub<q>")]
1685 (define_insn "*us_sub<mode>_neon"
1686 [(set (match_operand:VD 0 "s_register_operand" "=w")
1687 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1688 (match_operand:VD 2 "s_register_operand" "w")))]
1690 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1691 [(set_attr "type" "neon_qsub<q>")]
1694 ;; Conditional instructions. These are comparisons with conditional moves for
1695 ;; vectors. They perform the assignment:
1697 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1699 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1702 (define_expand "vcond<mode><mode>"
1703 [(set (match_operand:VDQW 0 "s_register_operand" "")
1705 (match_operator 3 "comparison_operator"
1706 [(match_operand:VDQW 4 "s_register_operand" "")
1707 (match_operand:VDQW 5 "nonmemory_operand" "")])
1708 (match_operand:VDQW 1 "s_register_operand" "")
1709 (match_operand:VDQW 2 "s_register_operand" "")))]
1710 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1713 int use_zero_form = 0;
1714 int swap_bsl_operands = 0;
1715 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1716 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1718 rtx (*base_comparison) (rtx, rtx, rtx);
1719 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1721 switch (GET_CODE (operands[3]))
1728 if (operands[5] == CONST0_RTX (<MODE>mode))
1735 if (!REG_P (operands[5]))
1736 operands[5] = force_reg (<MODE>mode, operands[5]);
1739 switch (GET_CODE (operands[3]))
1749 base_comparison = gen_neon_vcge<mode>;
1750 complimentary_comparison = gen_neon_vcgt<mode>;
1758 base_comparison = gen_neon_vcgt<mode>;
1759 complimentary_comparison = gen_neon_vcge<mode>;
1764 base_comparison = gen_neon_vceq<mode>;
1765 complimentary_comparison = gen_neon_vceq<mode>;
1771 switch (GET_CODE (operands[3]))
1778 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1779 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1785 Note that there also exist direct comparison against 0 forms,
1786 so catch those as a special case. */
1790 switch (GET_CODE (operands[3]))
1793 base_comparison = gen_neon_vclt<mode>;
1796 base_comparison = gen_neon_vcle<mode>;
1799 /* Do nothing, other zero form cases already have the correct
1806 emit_insn (base_comparison (mask, operands[4], operands[5]));
1808 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1815 /* Vector compare returns false for lanes which are unordered, so if we use
1816 the inverse of the comparison we actually want to emit, then
1817 swap the operands to BSL, we will end up with the correct result.
1818 Note that a NE NaN and NaN NE b are true for all a, b.
1820 Our transformations are:
1825 a NE b -> !(a EQ b) */
1828 emit_insn (base_comparison (mask, operands[4], operands[5]));
1830 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1832 swap_bsl_operands = 1;
1835 /* We check (a > b || b > a). combining these comparisons give us
1836 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1837 will then give us (a == b || a UNORDERED b) as intended. */
1839 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1840 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1841 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1842 swap_bsl_operands = 1;
1845 /* Operands are ORDERED iff (a > b || b >= a).
1846 Swapping the operands to BSL will give the UNORDERED case. */
1847 swap_bsl_operands = 1;
1850 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1851 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1852 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1858 if (swap_bsl_operands)
1859 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1862 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1867 (define_expand "vcondu<mode><mode>"
1868 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1870 (match_operator 3 "arm_comparison_operator"
1871 [(match_operand:VDQIW 4 "s_register_operand" "")
1872 (match_operand:VDQIW 5 "s_register_operand" "")])
1873 (match_operand:VDQIW 1 "s_register_operand" "")
1874 (match_operand:VDQIW 2 "s_register_operand" "")))]
1878 int inverse = 0, immediate_zero = 0;
1880 mask = gen_reg_rtx (<V_cmp_result>mode);
1882 if (operands[5] == CONST0_RTX (<MODE>mode))
1884 else if (!REG_P (operands[5]))
1885 operands[5] = force_reg (<MODE>mode, operands[5]);
1887 switch (GET_CODE (operands[3]))
1890 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1894 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1898 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1903 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1905 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1910 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1912 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1916 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1925 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1928 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1934 ;; Patterns for builtins.
1936 ; good for plain vadd, vaddq.
1938 (define_expand "neon_vadd<mode>"
1939 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1940 (match_operand:VCVTF 1 "s_register_operand" "w")
1941 (match_operand:VCVTF 2 "s_register_operand" "w")]
1944 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1945 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1947 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1952 ; Note that NEON operations don't support the full IEEE 754 standard: in
1953 ; particular, denormal values are flushed to zero. This means that GCC cannot
1954 ; use those instructions for autovectorization, etc. unless
1955 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1956 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
1957 ; header) must work in either case: if -funsafe-math-optimizations is given,
1958 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1959 ; expand to unspecs (which may potentially limit the extent to which they might
1960 ; be optimized by generic code).
1962 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1964 (define_insn "neon_vadd<mode>_unspec"
1965 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1966 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1967 (match_operand:VCVTF 2 "s_register_operand" "w")]
1970 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1972 (if_then_else (match_test "<Is_float_mode>")
1973 (const_string "neon_fp_addsub_s<q>")
1974 (const_string "neon_add<q>")))]
1977 (define_insn "neon_vaddl<sup><mode>"
1978 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1979 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1980 (match_operand:VDI 2 "s_register_operand" "w")]
1983 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1984 [(set_attr "type" "neon_add_long")]
1987 (define_insn "neon_vaddw<sup><mode>"
1988 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1989 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1990 (match_operand:VDI 2 "s_register_operand" "w")]
1993 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1994 [(set_attr "type" "neon_add_widen")]
1999 (define_insn "neon_v<r>hadd<sup><mode>"
2000 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2001 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2002 (match_operand:VDQIW 2 "s_register_operand" "w")]
2005 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2006 [(set_attr "type" "neon_add_halve_q")]
2009 (define_insn "neon_vqadd<sup><mode>"
2010 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2011 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2012 (match_operand:VDQIX 2 "s_register_operand" "w")]
2015 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2016 [(set_attr "type" "neon_qadd<q>")]
2019 (define_insn "neon_v<r>addhn<mode>"
2020 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2021 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2022 (match_operand:VN 2 "s_register_operand" "w")]
2025 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2026 [(set_attr "type" "neon_add_halve_narrow_q")]
2029 ;; Polynomial and Float multiplication.
2030 (define_insn "neon_vmul<pf><mode>"
2031 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2032 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2033 (match_operand:VPF 2 "s_register_operand" "w")]
2036 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2038 (if_then_else (match_test "<Is_float_mode>")
2039 (const_string "neon_fp_mul_s<q>")
2040 (const_string "neon_mul_<V_elem_ch><q>")))]
2043 (define_expand "neon_vmla<mode>"
2044 [(match_operand:VDQW 0 "s_register_operand" "=w")
2045 (match_operand:VDQW 1 "s_register_operand" "0")
2046 (match_operand:VDQW 2 "s_register_operand" "w")
2047 (match_operand:VDQW 3 "s_register_operand" "w")]
2050 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2051 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2052 operands[2], operands[3]));
2054 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2055 operands[2], operands[3]));
2059 (define_expand "neon_vfma<VCVTF:mode>"
2060 [(match_operand:VCVTF 0 "s_register_operand")
2061 (match_operand:VCVTF 1 "s_register_operand")
2062 (match_operand:VCVTF 2 "s_register_operand")
2063 (match_operand:VCVTF 3 "s_register_operand")]
2064 "TARGET_NEON && TARGET_FMA"
2066 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2071 (define_expand "neon_vfms<VCVTF:mode>"
2072 [(match_operand:VCVTF 0 "s_register_operand")
2073 (match_operand:VCVTF 1 "s_register_operand")
2074 (match_operand:VCVTF 2 "s_register_operand")
2075 (match_operand:VCVTF 3 "s_register_operand")]
2076 "TARGET_NEON && TARGET_FMA"
2078 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2083 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2085 (define_insn "neon_vmla<mode>_unspec"
2086 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2087 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2088 (match_operand:VDQW 2 "s_register_operand" "w")
2089 (match_operand:VDQW 3 "s_register_operand" "w")]
2092 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2094 (if_then_else (match_test "<Is_float_mode>")
2095 (const_string "neon_fp_mla_s<q>")
2096 (const_string "neon_mla_<V_elem_ch><q>")))]
2099 (define_insn "neon_vmlal<sup><mode>"
2100 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2101 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2102 (match_operand:VW 2 "s_register_operand" "w")
2103 (match_operand:VW 3 "s_register_operand" "w")]
2106 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2107 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2110 (define_expand "neon_vmls<mode>"
2111 [(match_operand:VDQW 0 "s_register_operand" "=w")
2112 (match_operand:VDQW 1 "s_register_operand" "0")
2113 (match_operand:VDQW 2 "s_register_operand" "w")
2114 (match_operand:VDQW 3 "s_register_operand" "w")]
2117 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2118 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2119 operands[1], operands[2], operands[3]));
2121 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2122 operands[2], operands[3]));
2126 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2128 (define_insn "neon_vmls<mode>_unspec"
2129 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2130 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2131 (match_operand:VDQW 2 "s_register_operand" "w")
2132 (match_operand:VDQW 3 "s_register_operand" "w")]
2135 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2137 (if_then_else (match_test "<Is_float_mode>")
2138 (const_string "neon_fp_mla_s<q>")
2139 (const_string "neon_mla_<V_elem_ch><q>")))]
2142 (define_insn "neon_vmlsl<sup><mode>"
2143 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2144 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2145 (match_operand:VW 2 "s_register_operand" "w")
2146 (match_operand:VW 3 "s_register_operand" "w")]
2149 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2150 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2153 ;; vqdmulh, vqrdmulh
2154 (define_insn "neon_vq<r>dmulh<mode>"
2155 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2156 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2157 (match_operand:VMDQI 2 "s_register_operand" "w")]
2160 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2161 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2164 ;; vqrdmlah, vqrdmlsh
2165 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2166 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2167 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2168 (match_operand:VMDQI 2 "s_register_operand" "w")
2169 (match_operand:VMDQI 3 "s_register_operand" "w")]
2172 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2173 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2176 (define_insn "neon_vqdmlal<mode>"
2177 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2178 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2179 (match_operand:VMDI 2 "s_register_operand" "w")
2180 (match_operand:VMDI 3 "s_register_operand" "w")]
2183 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2184 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2187 (define_insn "neon_vqdmlsl<mode>"
2188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2189 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2190 (match_operand:VMDI 2 "s_register_operand" "w")
2191 (match_operand:VMDI 3 "s_register_operand" "w")]
2194 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2195 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2198 (define_insn "neon_vmull<sup><mode>"
2199 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2200 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2201 (match_operand:VW 2 "s_register_operand" "w")]
2204 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2205 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2208 (define_insn "neon_vqdmull<mode>"
2209 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2210 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2211 (match_operand:VMDI 2 "s_register_operand" "w")]
2214 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2215 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2218 (define_expand "neon_vsub<mode>"
2219 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2220 (match_operand:VCVTF 1 "s_register_operand" "w")
2221 (match_operand:VCVTF 2 "s_register_operand" "w")]
2224 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2225 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2227 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2232 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2234 (define_insn "neon_vsub<mode>_unspec"
2235 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2236 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2237 (match_operand:VCVTF 2 "s_register_operand" "w")]
2240 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2242 (if_then_else (match_test "<Is_float_mode>")
2243 (const_string "neon_fp_addsub_s<q>")
2244 (const_string "neon_sub<q>")))]
2247 (define_insn "neon_vsubl<sup><mode>"
2248 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2249 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2250 (match_operand:VDI 2 "s_register_operand" "w")]
2253 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2254 [(set_attr "type" "neon_sub_long")]
2257 (define_insn "neon_vsubw<sup><mode>"
2258 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2259 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2260 (match_operand:VDI 2 "s_register_operand" "w")]
2263 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2264 [(set_attr "type" "neon_sub_widen")]
2267 (define_insn "neon_vqsub<sup><mode>"
2268 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2269 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2270 (match_operand:VDQIX 2 "s_register_operand" "w")]
2273 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2274 [(set_attr "type" "neon_qsub<q>")]
2277 (define_insn "neon_vhsub<sup><mode>"
2278 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2279 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2280 (match_operand:VDQIW 2 "s_register_operand" "w")]
2283 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2284 [(set_attr "type" "neon_sub_halve<q>")]
2287 (define_insn "neon_v<r>subhn<mode>"
2288 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2289 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2290 (match_operand:VN 2 "s_register_operand" "w")]
2293 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2294 [(set_attr "type" "neon_sub_halve_narrow_q")]
2297 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2298 ;; without unsafe math optimizations.
2299 (define_expand "neon_vc<cmp_op><mode>"
2300 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2302 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2303 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2306 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2308 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2309 && !flag_unsafe_math_optimizations)
2311 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2312 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2313 whereas this expander iterates over the integer modes as well,
2314 but we will never expand to UNSPECs for the integer comparisons. */
2318 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2323 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2332 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2339 (define_insn "neon_vc<cmp_op><mode>_insn"
2340 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2342 (COMPARISONS:<V_cmp_result>
2343 (match_operand:VDQW 1 "s_register_operand" "w,w")
2344 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2345 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2346 && !flag_unsafe_math_optimizations)"
2349 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2351 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2352 ? "f" : "<cmp_type>",
2353 which_alternative == 0
2354 ? "%<V_reg>2" : "#0");
2355 output_asm_insn (pattern, operands);
2359 (if_then_else (match_operand 2 "zero_operand")
2360 (const_string "neon_compare_zero<q>")
2361 (const_string "neon_compare<q>")))]
2364 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2365 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2366 (unspec:<V_cmp_result>
2367 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2368 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2373 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2375 which_alternative == 0
2376 ? "%<V_reg>2" : "#0");
2377 output_asm_insn (pattern, operands);
2380 [(set_attr "type" "neon_fp_compare_s<q>")]
2383 (define_insn "neon_vc<cmp_op>u<mode>"
2384 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2386 (GTUGEU:<V_cmp_result>
2387 (match_operand:VDQIW 1 "s_register_operand" "w")
2388 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2390 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2391 [(set_attr "type" "neon_compare<q>")]
2394 (define_expand "neon_vca<cmp_op><mode>"
2395 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2397 (GTGE:<V_cmp_result>
2398 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2399 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2402 if (flag_unsafe_math_optimizations)
2403 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2406 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2413 (define_insn "neon_vca<cmp_op><mode>_insn"
2414 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2416 (GTGE:<V_cmp_result>
2417 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2418 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2419 "TARGET_NEON && flag_unsafe_math_optimizations"
2420 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2421 [(set_attr "type" "neon_fp_compare_s<q>")]
2424 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2425 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2426 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2427 (match_operand:VCVTF 2 "s_register_operand" "w")]
2430 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2431 [(set_attr "type" "neon_fp_compare_s<q>")]
2434 (define_insn "neon_vtst<mode>"
2435 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2436 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2437 (match_operand:VDQIW 2 "s_register_operand" "w")]
2440 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2441 [(set_attr "type" "neon_tst<q>")]
2444 (define_insn "neon_vabd<sup><mode>"
2445 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2446 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2447 (match_operand:VDQIW 2 "s_register_operand" "w")]
2450 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2451 [(set_attr "type" "neon_abd<q>")]
2454 (define_insn "neon_vabdf<mode>"
2455 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2456 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2457 (match_operand:VCVTF 2 "s_register_operand" "w")]
2460 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2461 [(set_attr "type" "neon_fp_abd_s<q>")]
2464 (define_insn "neon_vabdl<sup><mode>"
2465 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2466 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2467 (match_operand:VW 2 "s_register_operand" "w")]
2470 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2471 [(set_attr "type" "neon_abd_long")]
2474 (define_insn "neon_vaba<sup><mode>"
2475 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2476 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2477 (match_operand:VDQIW 3 "s_register_operand" "w")]
2479 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2481 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2482 [(set_attr "type" "neon_arith_acc<q>")]
2485 (define_insn "neon_vabal<sup><mode>"
2486 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2487 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2488 (match_operand:VW 3 "s_register_operand" "w")]
2490 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2492 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2493 [(set_attr "type" "neon_arith_acc<q>")]
2496 (define_insn "neon_v<maxmin><sup><mode>"
2497 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2498 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2499 (match_operand:VDQIW 2 "s_register_operand" "w")]
2502 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2503 [(set_attr "type" "neon_minmax<q>")]
2506 (define_insn "neon_v<maxmin>f<mode>"
2507 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2508 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2509 (match_operand:VCVTF 2 "s_register_operand" "w")]
2512 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2513 [(set_attr "type" "neon_fp_minmax_s<q>")]
2516 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2517 (define_insn "<fmaxmin><mode>3"
2518 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2519 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2520 (match_operand:VCVTF 2 "s_register_operand" "w")]
2522 "TARGET_NEON && TARGET_FPU_ARMV8"
2523 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2524 [(set_attr "type" "neon_fp_minmax_s<q>")]
2527 (define_expand "neon_vpadd<mode>"
2528 [(match_operand:VD 0 "s_register_operand" "=w")
2529 (match_operand:VD 1 "s_register_operand" "w")
2530 (match_operand:VD 2 "s_register_operand" "w")]
2533 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2538 (define_insn "neon_vpaddl<sup><mode>"
2539 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2540 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2543 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2544 [(set_attr "type" "neon_reduc_add_long")]
2547 (define_insn "neon_vpadal<sup><mode>"
2548 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2549 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2550 (match_operand:VDQIW 2 "s_register_operand" "w")]
2553 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2554 [(set_attr "type" "neon_reduc_add_acc")]
2557 (define_insn "neon_vp<maxmin><sup><mode>"
2558 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2559 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2560 (match_operand:VDI 2 "s_register_operand" "w")]
2563 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2564 [(set_attr "type" "neon_reduc_minmax<q>")]
2567 (define_insn "neon_vp<maxmin>f<mode>"
2568 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2569 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2570 (match_operand:VCVTF 2 "s_register_operand" "w")]
2573 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2574 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2577 (define_insn "neon_vrecps<mode>"
2578 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2579 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2580 (match_operand:VCVTF 2 "s_register_operand" "w")]
2583 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2584 [(set_attr "type" "neon_fp_recps_s<q>")]
2587 (define_insn "neon_vrsqrts<mode>"
2588 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2589 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2590 (match_operand:VCVTF 2 "s_register_operand" "w")]
2593 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2594 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2597 (define_expand "neon_vabs<mode>"
2598 [(match_operand:VDQW 0 "s_register_operand" "")
2599 (match_operand:VDQW 1 "s_register_operand" "")]
2602 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2606 (define_insn "neon_vqabs<mode>"
2607 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2608 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2611 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2612 [(set_attr "type" "neon_qabs<q>")]
2615 (define_insn "neon_bswap<mode>"
2616 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2617 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2619 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2620 [(set_attr "type" "neon_rev<q>")]
2623 (define_expand "neon_vneg<mode>"
2624 [(match_operand:VDQW 0 "s_register_operand" "")
2625 (match_operand:VDQW 1 "s_register_operand" "")]
2628 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2632 (define_expand "neon_copysignf<mode>"
2633 [(match_operand:VCVTF 0 "register_operand")
2634 (match_operand:VCVTF 1 "register_operand")
2635 (match_operand:VCVTF 2 "register_operand")]
2639 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2640 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2641 rtvec v = rtvec_alloc (n_elt);
2643 /* Create bitmask for vector select. */
2644 for (i = 0; i < n_elt; ++i)
2645 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2647 emit_move_insn (v_bitmask,
2648 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2649 emit_move_insn (operands[0], operands[2]);
2650 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2651 <VCVTF:V_cmp_result>mode, 0);
2652 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2659 (define_insn "neon_vqneg<mode>"
2660 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2661 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2664 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2665 [(set_attr "type" "neon_qneg<q>")]
2668 (define_insn "neon_vcls<mode>"
2669 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2670 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2673 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2674 [(set_attr "type" "neon_cls<q>")]
2677 (define_insn "clz<mode>2"
2678 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2679 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2681 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2682 [(set_attr "type" "neon_cnt<q>")]
2685 (define_expand "neon_vclz<mode>"
2686 [(match_operand:VDQIW 0 "s_register_operand" "")
2687 (match_operand:VDQIW 1 "s_register_operand" "")]
2690 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2694 (define_insn "popcount<mode>2"
2695 [(set (match_operand:VE 0 "s_register_operand" "=w")
2696 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2698 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2699 [(set_attr "type" "neon_cnt<q>")]
2702 (define_expand "neon_vcnt<mode>"
2703 [(match_operand:VE 0 "s_register_operand" "=w")
2704 (match_operand:VE 1 "s_register_operand" "w")]
2707 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2711 (define_insn "neon_vrecpe<mode>"
2712 [(set (match_operand:V32 0 "s_register_operand" "=w")
2713 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2716 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2717 [(set_attr "type" "neon_fp_recpe_s<q>")]
2720 (define_insn "neon_vrsqrte<mode>"
2721 [(set (match_operand:V32 0 "s_register_operand" "=w")
2722 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2725 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2726 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2729 (define_expand "neon_vmvn<mode>"
2730 [(match_operand:VDQIW 0 "s_register_operand" "")
2731 (match_operand:VDQIW 1 "s_register_operand" "")]
2734 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2738 (define_insn "neon_vget_lane<mode>_sext_internal"
2739 [(set (match_operand:SI 0 "s_register_operand" "=r")
2741 (vec_select:<V_elem>
2742 (match_operand:VD 1 "s_register_operand" "w")
2743 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2746 if (BYTES_BIG_ENDIAN)
2748 int elt = INTVAL (operands[2]);
2749 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2750 operands[2] = GEN_INT (elt);
2752 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2754 [(set_attr "type" "neon_to_gp")]
2757 (define_insn "neon_vget_lane<mode>_zext_internal"
2758 [(set (match_operand:SI 0 "s_register_operand" "=r")
2760 (vec_select:<V_elem>
2761 (match_operand:VD 1 "s_register_operand" "w")
2762 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2765 if (BYTES_BIG_ENDIAN)
2767 int elt = INTVAL (operands[2]);
2768 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2769 operands[2] = GEN_INT (elt);
2771 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2773 [(set_attr "type" "neon_to_gp")]
2776 (define_insn "neon_vget_lane<mode>_sext_internal"
2777 [(set (match_operand:SI 0 "s_register_operand" "=r")
2779 (vec_select:<V_elem>
2780 (match_operand:VQ2 1 "s_register_operand" "w")
2781 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2785 int regno = REGNO (operands[1]);
2786 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2787 unsigned int elt = INTVAL (operands[2]);
2788 unsigned int elt_adj = elt % halfelts;
2790 if (BYTES_BIG_ENDIAN)
2791 elt_adj = halfelts - 1 - elt_adj;
2793 ops[0] = operands[0];
2794 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2795 ops[2] = GEN_INT (elt_adj);
2796 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2800 [(set_attr "type" "neon_to_gp_q")]
2803 (define_insn "neon_vget_lane<mode>_zext_internal"
2804 [(set (match_operand:SI 0 "s_register_operand" "=r")
2806 (vec_select:<V_elem>
2807 (match_operand:VQ2 1 "s_register_operand" "w")
2808 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2812 int regno = REGNO (operands[1]);
2813 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2814 unsigned int elt = INTVAL (operands[2]);
2815 unsigned int elt_adj = elt % halfelts;
2817 if (BYTES_BIG_ENDIAN)
2818 elt_adj = halfelts - 1 - elt_adj;
2820 ops[0] = operands[0];
2821 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2822 ops[2] = GEN_INT (elt_adj);
2823 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2827 [(set_attr "type" "neon_to_gp_q")]
2830 (define_expand "neon_vget_lane<mode>"
2831 [(match_operand:<V_ext> 0 "s_register_operand" "")
2832 (match_operand:VDQW 1 "s_register_operand" "")
2833 (match_operand:SI 2 "immediate_operand" "")]
2836 if (BYTES_BIG_ENDIAN)
2838 /* The intrinsics are defined in terms of a model where the
2839 element ordering in memory is vldm order, whereas the generic
2840 RTL is defined in terms of a model where the element ordering
2841 in memory is array order. Convert the lane number to conform
2843 unsigned int elt = INTVAL (operands[2]);
2844 unsigned int reg_nelts
2845 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2846 elt ^= reg_nelts - 1;
2847 operands[2] = GEN_INT (elt);
2850 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2851 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2853 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
2859 (define_expand "neon_vget_laneu<mode>"
2860 [(match_operand:<V_ext> 0 "s_register_operand" "")
2861 (match_operand:VDQIW 1 "s_register_operand" "")
2862 (match_operand:SI 2 "immediate_operand" "")]
2865 if (BYTES_BIG_ENDIAN)
2867 /* The intrinsics are defined in terms of a model where the
2868 element ordering in memory is vldm order, whereas the generic
2869 RTL is defined in terms of a model where the element ordering
2870 in memory is array order. Convert the lane number to conform
2872 unsigned int elt = INTVAL (operands[2]);
2873 unsigned int reg_nelts
2874 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2875 elt ^= reg_nelts - 1;
2876 operands[2] = GEN_INT (elt);
2879 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2880 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2882 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
2888 (define_expand "neon_vget_lanedi"
2889 [(match_operand:DI 0 "s_register_operand" "=r")
2890 (match_operand:DI 1 "s_register_operand" "w")
2891 (match_operand:SI 2 "immediate_operand" "")]
2894 emit_move_insn (operands[0], operands[1]);
2898 (define_expand "neon_vget_lanev2di"
2899 [(match_operand:DI 0 "s_register_operand" "")
2900 (match_operand:V2DI 1 "s_register_operand" "")
2901 (match_operand:SI 2 "immediate_operand" "")]
2906 if (BYTES_BIG_ENDIAN)
2908 /* The intrinsics are defined in terms of a model where the
2909 element ordering in memory is vldm order, whereas the generic
2910 RTL is defined in terms of a model where the element ordering
2911 in memory is array order. Convert the lane number to conform
2913 unsigned int elt = INTVAL (operands[2]);
2914 unsigned int reg_nelts = 2;
2915 elt ^= reg_nelts - 1;
2916 operands[2] = GEN_INT (elt);
2919 lane = INTVAL (operands[2]);
2920 gcc_assert ((lane ==0) || (lane == 1));
2921 emit_move_insn (operands[0], lane == 0
2922 ? gen_lowpart (DImode, operands[1])
2923 : gen_highpart (DImode, operands[1]));
2927 (define_expand "neon_vset_lane<mode>"
2928 [(match_operand:VDQ 0 "s_register_operand" "=w")
2929 (match_operand:<V_elem> 1 "s_register_operand" "r")
2930 (match_operand:VDQ 2 "s_register_operand" "0")
2931 (match_operand:SI 3 "immediate_operand" "i")]
2934 unsigned int elt = INTVAL (operands[3]);
2936 if (BYTES_BIG_ENDIAN)
2938 unsigned int reg_nelts
2939 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2940 elt ^= reg_nelts - 1;
2943 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2944 GEN_INT (1 << elt), operands[2]));
2948 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2950 (define_expand "neon_vset_lanedi"
2951 [(match_operand:DI 0 "s_register_operand" "=w")
2952 (match_operand:DI 1 "s_register_operand" "r")
2953 (match_operand:DI 2 "s_register_operand" "0")
2954 (match_operand:SI 3 "immediate_operand" "i")]
2957 emit_move_insn (operands[0], operands[1]);
2961 (define_expand "neon_vcreate<mode>"
2962 [(match_operand:VD_RE 0 "s_register_operand" "")
2963 (match_operand:DI 1 "general_operand" "")]
2966 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2967 emit_move_insn (operands[0], src);
2971 (define_insn "neon_vdup_n<mode>"
2972 [(set (match_operand:VX 0 "s_register_operand" "=w")
2973 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2975 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2976 [(set_attr "type" "neon_from_gp<q>")]
2979 (define_insn "neon_vdup_nv4hf"
2980 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2981 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
2984 [(set_attr "type" "neon_from_gp")]
2987 (define_insn "neon_vdup_nv8hf"
2988 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
2989 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
2992 [(set_attr "type" "neon_from_gp_q")]
2995 (define_insn "neon_vdup_n<mode>"
2996 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2997 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3000 vdup.<V_sz_elem>\t%<V_reg>0, %1
3001 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3002 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3005 (define_expand "neon_vdup_ndi"
3006 [(match_operand:DI 0 "s_register_operand" "=w")
3007 (match_operand:DI 1 "s_register_operand" "r")]
3010 emit_move_insn (operands[0], operands[1]);
3015 (define_insn "neon_vdup_nv2di"
3016 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3017 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3020 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3021 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3022 [(set_attr "length" "8")
3023 (set_attr "type" "multiple")]
3026 (define_insn "neon_vdup_lane<mode>_internal"
3027 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3029 (vec_select:<V_elem>
3030 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3031 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3034 if (BYTES_BIG_ENDIAN)
3036 int elt = INTVAL (operands[2]);
3037 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3038 operands[2] = GEN_INT (elt);
3041 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3043 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3045 [(set_attr "type" "neon_dup<q>")]
3048 (define_insn "neon_vdup_lane<mode>_internal"
3049 [(set (match_operand:VH 0 "s_register_operand" "=w")
3051 (vec_select:<V_elem>
3052 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3053 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3054 "TARGET_NEON && TARGET_FP16"
3056 if (BYTES_BIG_ENDIAN)
3058 int elt = INTVAL (operands[2]);
3059 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3060 operands[2] = GEN_INT (elt);
3063 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3065 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3067 [(set_attr "type" "neon_dup<q>")]
3070 (define_expand "neon_vdup_lane<mode>"
3071 [(match_operand:VDQW 0 "s_register_operand" "=w")
3072 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3073 (match_operand:SI 2 "immediate_operand" "i")]
3076 if (BYTES_BIG_ENDIAN)
3078 unsigned int elt = INTVAL (operands[2]);
3079 unsigned int reg_nelts
3080 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3081 elt ^= reg_nelts - 1;
3082 operands[2] = GEN_INT (elt);
3084 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3089 (define_expand "neon_vdup_lane<mode>"
3090 [(match_operand:VH 0 "s_register_operand")
3091 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3092 (match_operand:SI 2 "immediate_operand")]
3093 "TARGET_NEON && TARGET_FP16"
3095 if (BYTES_BIG_ENDIAN)
3097 unsigned int elt = INTVAL (operands[2]);
3098 unsigned int reg_nelts
3099 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3100 elt ^= reg_nelts - 1;
3101 operands[2] = GEN_INT (elt);
3103 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3108 ; Scalar index is ignored, since only zero is valid here.
3109 (define_expand "neon_vdup_lanedi"
3110 [(match_operand:DI 0 "s_register_operand" "=w")
3111 (match_operand:DI 1 "s_register_operand" "w")
3112 (match_operand:SI 2 "immediate_operand" "i")]
3115 emit_move_insn (operands[0], operands[1]);
3119 ; Likewise for v2di, as the DImode second operand has only a single element.
3120 (define_expand "neon_vdup_lanev2di"
3121 [(match_operand:V2DI 0 "s_register_operand" "=w")
3122 (match_operand:DI 1 "s_register_operand" "w")
3123 (match_operand:SI 2 "immediate_operand" "i")]
3126 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3130 ; Disabled before reload because we don't want combine doing something silly,
3131 ; but used by the post-reload expansion of neon_vcombine.
3132 (define_insn "*neon_vswp<mode>"
3133 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3134 (match_operand:VDQX 1 "s_register_operand" "+w"))
3135 (set (match_dup 1) (match_dup 0))]
3136 "TARGET_NEON && reload_completed"
3137 "vswp\t%<V_reg>0, %<V_reg>1"
3138 [(set_attr "type" "neon_permute<q>")]
3141 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3143 ;; FIXME: A different implementation of this builtin could make it much
3144 ;; more likely that we wouldn't actually need to output anything (we could make
3145 ;; it so that the reg allocator puts things in the right places magically
3146 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3148 (define_insn_and_split "neon_vcombine<mode>"
3149 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3150 (vec_concat:<V_DOUBLE>
3151 (match_operand:VDX 1 "s_register_operand" "w")
3152 (match_operand:VDX 2 "s_register_operand" "w")))]
3155 "&& reload_completed"
3158 neon_split_vcombine (operands);
3161 [(set_attr "type" "multiple")]
3164 (define_expand "neon_vget_high<mode>"
3165 [(match_operand:<V_HALF> 0 "s_register_operand")
3166 (match_operand:VQX 1 "s_register_operand")]
3169 emit_move_insn (operands[0],
3170 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3171 GET_MODE_SIZE (<V_HALF>mode)));
3175 (define_expand "neon_vget_low<mode>"
3176 [(match_operand:<V_HALF> 0 "s_register_operand")
3177 (match_operand:VQX 1 "s_register_operand")]
3180 emit_move_insn (operands[0],
3181 simplify_gen_subreg (<V_HALF>mode, operands[1],
3186 (define_insn "float<mode><V_cvtto>2"
3187 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3188 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3189 "TARGET_NEON && !flag_rounding_math"
3190 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3191 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3194 (define_insn "floatuns<mode><V_cvtto>2"
3195 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3196 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3197 "TARGET_NEON && !flag_rounding_math"
3198 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3199 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3202 (define_insn "fix_trunc<mode><V_cvtto>2"
3203 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3204 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3206 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3207 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3210 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3211 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3212 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3214 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3215 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3218 (define_insn "neon_vcvt<sup><mode>"
3219 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3220 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3223 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3224 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3227 (define_insn "neon_vcvt<sup><mode>"
3228 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3229 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3232 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3233 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3236 (define_insn "neon_vcvtv4sfv4hf"
3237 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3238 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3240 "TARGET_NEON && TARGET_FP16"
3241 "vcvt.f32.f16\t%q0, %P1"
3242 [(set_attr "type" "neon_fp_cvt_widen_h")]
3245 (define_insn "neon_vcvtv4hfv4sf"
3246 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3247 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3249 "TARGET_NEON && TARGET_FP16"
3250 "vcvt.f16.f32\t%P0, %q1"
3251 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3254 (define_insn "neon_vcvt<sup>_n<mode>"
3255 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3256 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3257 (match_operand:SI 2 "immediate_operand" "i")]
3261 neon_const_bounds (operands[2], 1, 33);
3262 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3264 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3267 (define_insn "neon_vcvt<sup>_n<mode>"
3268 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3269 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3270 (match_operand:SI 2 "immediate_operand" "i")]
3274 neon_const_bounds (operands[2], 1, 33);
3275 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3277 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3280 (define_insn "neon_vmovn<mode>"
3281 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3282 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3285 "vmovn.<V_if_elem>\t%P0, %q1"
3286 [(set_attr "type" "neon_shift_imm_narrow_q")]
3289 (define_insn "neon_vqmovn<sup><mode>"
3290 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3291 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3294 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3295 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3298 (define_insn "neon_vqmovun<mode>"
3299 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3300 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3303 "vqmovun.<V_s_elem>\t%P0, %q1"
3304 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3307 (define_insn "neon_vmovl<sup><mode>"
3308 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3309 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3312 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3313 [(set_attr "type" "neon_shift_imm_long")]
3316 (define_insn "neon_vmul_lane<mode>"
3317 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3318 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3319 (match_operand:VMD 2 "s_register_operand"
3320 "<scalar_mul_constraint>")
3321 (match_operand:SI 3 "immediate_operand" "i")]
3325 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3328 (if_then_else (match_test "<Is_float_mode>")
3329 (const_string "neon_fp_mul_s_scalar<q>")
3330 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3333 (define_insn "neon_vmul_lane<mode>"
3334 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3335 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3336 (match_operand:<V_HALF> 2 "s_register_operand"
3337 "<scalar_mul_constraint>")
3338 (match_operand:SI 3 "immediate_operand" "i")]
3342 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3345 (if_then_else (match_test "<Is_float_mode>")
3346 (const_string "neon_fp_mul_s_scalar<q>")
3347 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3350 (define_insn "neon_vmull<sup>_lane<mode>"
3351 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3352 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3353 (match_operand:VMDI 2 "s_register_operand"
3354 "<scalar_mul_constraint>")
3355 (match_operand:SI 3 "immediate_operand" "i")]
3359 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3361 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3364 (define_insn "neon_vqdmull_lane<mode>"
3365 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3366 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3367 (match_operand:VMDI 2 "s_register_operand"
3368 "<scalar_mul_constraint>")
3369 (match_operand:SI 3 "immediate_operand" "i")]
3370 UNSPEC_VQDMULL_LANE))]
3373 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3375 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3378 (define_insn "neon_vq<r>dmulh_lane<mode>"
3379 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3380 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3381 (match_operand:<V_HALF> 2 "s_register_operand"
3382 "<scalar_mul_constraint>")
3383 (match_operand:SI 3 "immediate_operand" "i")]
3387 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3389 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3392 (define_insn "neon_vq<r>dmulh_lane<mode>"
3393 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3394 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3395 (match_operand:VMDI 2 "s_register_operand"
3396 "<scalar_mul_constraint>")
3397 (match_operand:SI 3 "immediate_operand" "i")]
3401 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3403 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3406 ;; vqrdmlah_lane, vqrdmlsh_lane
3407 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3408 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3409 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3410 (match_operand:VMQI 2 "s_register_operand" "w")
3411 (match_operand:<V_HALF> 3 "s_register_operand"
3412 "<scalar_mul_constraint>")
3413 (match_operand:SI 4 "immediate_operand" "i")]
3418 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3420 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3423 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3424 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3425 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3426 (match_operand:VMDI 2 "s_register_operand" "w")
3427 (match_operand:VMDI 3 "s_register_operand"
3428 "<scalar_mul_constraint>")
3429 (match_operand:SI 4 "immediate_operand" "i")]
3434 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3436 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3439 (define_insn "neon_vmla_lane<mode>"
3440 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3441 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3442 (match_operand:VMD 2 "s_register_operand" "w")
3443 (match_operand:VMD 3 "s_register_operand"
3444 "<scalar_mul_constraint>")
3445 (match_operand:SI 4 "immediate_operand" "i")]
3449 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3452 (if_then_else (match_test "<Is_float_mode>")
3453 (const_string "neon_fp_mla_s_scalar<q>")
3454 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3457 (define_insn "neon_vmla_lane<mode>"
3458 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3459 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3460 (match_operand:VMQ 2 "s_register_operand" "w")
3461 (match_operand:<V_HALF> 3 "s_register_operand"
3462 "<scalar_mul_constraint>")
3463 (match_operand:SI 4 "immediate_operand" "i")]
3467 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3470 (if_then_else (match_test "<Is_float_mode>")
3471 (const_string "neon_fp_mla_s_scalar<q>")
3472 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3475 (define_insn "neon_vmlal<sup>_lane<mode>"
3476 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3477 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3478 (match_operand:VMDI 2 "s_register_operand" "w")
3479 (match_operand:VMDI 3 "s_register_operand"
3480 "<scalar_mul_constraint>")
3481 (match_operand:SI 4 "immediate_operand" "i")]
3485 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3487 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3490 (define_insn "neon_vqdmlal_lane<mode>"
3491 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3492 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3493 (match_operand:VMDI 2 "s_register_operand" "w")
3494 (match_operand:VMDI 3 "s_register_operand"
3495 "<scalar_mul_constraint>")
3496 (match_operand:SI 4 "immediate_operand" "i")]
3497 UNSPEC_VQDMLAL_LANE))]
3500 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3502 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3505 (define_insn "neon_vmls_lane<mode>"
3506 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3507 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3508 (match_operand:VMD 2 "s_register_operand" "w")
3509 (match_operand:VMD 3 "s_register_operand"
3510 "<scalar_mul_constraint>")
3511 (match_operand:SI 4 "immediate_operand" "i")]
3515 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3518 (if_then_else (match_test "<Is_float_mode>")
3519 (const_string "neon_fp_mla_s_scalar<q>")
3520 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3523 (define_insn "neon_vmls_lane<mode>"
3524 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3525 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3526 (match_operand:VMQ 2 "s_register_operand" "w")
3527 (match_operand:<V_HALF> 3 "s_register_operand"
3528 "<scalar_mul_constraint>")
3529 (match_operand:SI 4 "immediate_operand" "i")]
3533 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3536 (if_then_else (match_test "<Is_float_mode>")
3537 (const_string "neon_fp_mla_s_scalar<q>")
3538 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3541 (define_insn "neon_vmlsl<sup>_lane<mode>"
3542 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3543 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3544 (match_operand:VMDI 2 "s_register_operand" "w")
3545 (match_operand:VMDI 3 "s_register_operand"
3546 "<scalar_mul_constraint>")
3547 (match_operand:SI 4 "immediate_operand" "i")]
3551 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3553 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3556 (define_insn "neon_vqdmlsl_lane<mode>"
3557 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3558 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3559 (match_operand:VMDI 2 "s_register_operand" "w")
3560 (match_operand:VMDI 3 "s_register_operand"
3561 "<scalar_mul_constraint>")
3562 (match_operand:SI 4 "immediate_operand" "i")]
3563 UNSPEC_VQDMLSL_LANE))]
3566 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3568 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3571 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3572 ; core register into a temp register, then use a scalar taken from that. This
3573 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3574 ; or extracted from another vector. The latter case it's currently better to
3575 ; use the "_lane" variant, and the former case can probably be implemented
3576 ; using vld1_lane, but that hasn't been done yet.
3578 (define_expand "neon_vmul_n<mode>"
3579 [(match_operand:VMD 0 "s_register_operand" "")
3580 (match_operand:VMD 1 "s_register_operand" "")
3581 (match_operand:<V_elem> 2 "s_register_operand" "")]
3584 rtx tmp = gen_reg_rtx (<MODE>mode);
3585 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3586 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3591 (define_expand "neon_vmul_n<mode>"
3592 [(match_operand:VMQ 0 "s_register_operand" "")
3593 (match_operand:VMQ 1 "s_register_operand" "")
3594 (match_operand:<V_elem> 2 "s_register_operand" "")]
3597 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3598 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3599 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3604 (define_expand "neon_vmulls_n<mode>"
3605 [(match_operand:<V_widen> 0 "s_register_operand" "")
3606 (match_operand:VMDI 1 "s_register_operand" "")
3607 (match_operand:<V_elem> 2 "s_register_operand" "")]
3610 rtx tmp = gen_reg_rtx (<MODE>mode);
3611 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3612 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
3617 (define_expand "neon_vmullu_n<mode>"
3618 [(match_operand:<V_widen> 0 "s_register_operand" "")
3619 (match_operand:VMDI 1 "s_register_operand" "")
3620 (match_operand:<V_elem> 2 "s_register_operand" "")]
3623 rtx tmp = gen_reg_rtx (<MODE>mode);
3624 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3625 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
3630 (define_expand "neon_vqdmull_n<mode>"
3631 [(match_operand:<V_widen> 0 "s_register_operand" "")
3632 (match_operand:VMDI 1 "s_register_operand" "")
3633 (match_operand:<V_elem> 2 "s_register_operand" "")]
3636 rtx tmp = gen_reg_rtx (<MODE>mode);
3637 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3638 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3643 (define_expand "neon_vqdmulh_n<mode>"
3644 [(match_operand:VMDI 0 "s_register_operand" "")
3645 (match_operand:VMDI 1 "s_register_operand" "")
3646 (match_operand:<V_elem> 2 "s_register_operand" "")]
3649 rtx tmp = gen_reg_rtx (<MODE>mode);
3650 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3651 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3656 (define_expand "neon_vqrdmulh_n<mode>"
3657 [(match_operand:VMDI 0 "s_register_operand" "")
3658 (match_operand:VMDI 1 "s_register_operand" "")
3659 (match_operand:<V_elem> 2 "s_register_operand" "")]
3662 rtx tmp = gen_reg_rtx (<MODE>mode);
3663 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3664 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3669 (define_expand "neon_vqdmulh_n<mode>"
3670 [(match_operand:VMQI 0 "s_register_operand" "")
3671 (match_operand:VMQI 1 "s_register_operand" "")
3672 (match_operand:<V_elem> 2 "s_register_operand" "")]
3675 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3676 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3677 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3682 (define_expand "neon_vqrdmulh_n<mode>"
3683 [(match_operand:VMQI 0 "s_register_operand" "")
3684 (match_operand:VMQI 1 "s_register_operand" "")
3685 (match_operand:<V_elem> 2 "s_register_operand" "")]
3688 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3689 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3690 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3695 (define_expand "neon_vmla_n<mode>"
3696 [(match_operand:VMD 0 "s_register_operand" "")
3697 (match_operand:VMD 1 "s_register_operand" "")
3698 (match_operand:VMD 2 "s_register_operand" "")
3699 (match_operand:<V_elem> 3 "s_register_operand" "")]
3702 rtx tmp = gen_reg_rtx (<MODE>mode);
3703 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3704 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3709 (define_expand "neon_vmla_n<mode>"
3710 [(match_operand:VMQ 0 "s_register_operand" "")
3711 (match_operand:VMQ 1 "s_register_operand" "")
3712 (match_operand:VMQ 2 "s_register_operand" "")
3713 (match_operand:<V_elem> 3 "s_register_operand" "")]
3716 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3717 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3718 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3723 (define_expand "neon_vmlals_n<mode>"
3724 [(match_operand:<V_widen> 0 "s_register_operand" "")
3725 (match_operand:<V_widen> 1 "s_register_operand" "")
3726 (match_operand:VMDI 2 "s_register_operand" "")
3727 (match_operand:<V_elem> 3 "s_register_operand" "")]
3730 rtx tmp = gen_reg_rtx (<MODE>mode);
3731 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3732 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
3737 (define_expand "neon_vmlalu_n<mode>"
3738 [(match_operand:<V_widen> 0 "s_register_operand" "")
3739 (match_operand:<V_widen> 1 "s_register_operand" "")
3740 (match_operand:VMDI 2 "s_register_operand" "")
3741 (match_operand:<V_elem> 3 "s_register_operand" "")]
3744 rtx tmp = gen_reg_rtx (<MODE>mode);
3745 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3746 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
3751 (define_expand "neon_vqdmlal_n<mode>"
3752 [(match_operand:<V_widen> 0 "s_register_operand" "")
3753 (match_operand:<V_widen> 1 "s_register_operand" "")
3754 (match_operand:VMDI 2 "s_register_operand" "")
3755 (match_operand:<V_elem> 3 "s_register_operand" "")]
3758 rtx tmp = gen_reg_rtx (<MODE>mode);
3759 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3760 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3765 (define_expand "neon_vmls_n<mode>"
3766 [(match_operand:VMD 0 "s_register_operand" "")
3767 (match_operand:VMD 1 "s_register_operand" "")
3768 (match_operand:VMD 2 "s_register_operand" "")
3769 (match_operand:<V_elem> 3 "s_register_operand" "")]
3772 rtx tmp = gen_reg_rtx (<MODE>mode);
3773 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3774 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3779 (define_expand "neon_vmls_n<mode>"
3780 [(match_operand:VMQ 0 "s_register_operand" "")
3781 (match_operand:VMQ 1 "s_register_operand" "")
3782 (match_operand:VMQ 2 "s_register_operand" "")
3783 (match_operand:<V_elem> 3 "s_register_operand" "")]
3786 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3787 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3788 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3793 (define_expand "neon_vmlsls_n<mode>"
3794 [(match_operand:<V_widen> 0 "s_register_operand" "")
3795 (match_operand:<V_widen> 1 "s_register_operand" "")
3796 (match_operand:VMDI 2 "s_register_operand" "")
3797 (match_operand:<V_elem> 3 "s_register_operand" "")]
3800 rtx tmp = gen_reg_rtx (<MODE>mode);
3801 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3802 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
3807 (define_expand "neon_vmlslu_n<mode>"
3808 [(match_operand:<V_widen> 0 "s_register_operand" "")
3809 (match_operand:<V_widen> 1 "s_register_operand" "")
3810 (match_operand:VMDI 2 "s_register_operand" "")
3811 (match_operand:<V_elem> 3 "s_register_operand" "")]
3814 rtx tmp = gen_reg_rtx (<MODE>mode);
3815 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3816 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
3821 (define_expand "neon_vqdmlsl_n<mode>"
3822 [(match_operand:<V_widen> 0 "s_register_operand" "")
3823 (match_operand:<V_widen> 1 "s_register_operand" "")
3824 (match_operand:VMDI 2 "s_register_operand" "")
3825 (match_operand:<V_elem> 3 "s_register_operand" "")]
3828 rtx tmp = gen_reg_rtx (<MODE>mode);
3829 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3830 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3835 (define_insn "neon_vext<mode>"
3836 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3837 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3838 (match_operand:VDQX 2 "s_register_operand" "w")
3839 (match_operand:SI 3 "immediate_operand" "i")]
3843 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3844 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3846 [(set_attr "type" "neon_ext<q>")]
3849 (define_insn "neon_vrev64<mode>"
3850 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3851 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
3854 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3855 [(set_attr "type" "neon_rev<q>")]
3858 (define_insn "neon_vrev32<mode>"
3859 [(set (match_operand:VX 0 "s_register_operand" "=w")
3860 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
3863 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3864 [(set_attr "type" "neon_rev<q>")]
3867 (define_insn "neon_vrev16<mode>"
3868 [(set (match_operand:VE 0 "s_register_operand" "=w")
3869 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
3872 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3873 [(set_attr "type" "neon_rev<q>")]
3876 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3877 ; allocation. For an intrinsic of form:
3878 ; rD = vbsl_* (rS, rN, rM)
3879 ; We can use any of:
3880 ; vbsl rS, rN, rM (if D = S)
3881 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3882 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3884 (define_insn "neon_vbsl<mode>_internal"
3885 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3886 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3887 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3888 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3892 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3893 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3894 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3895 [(set_attr "type" "neon_bsl<q>")]
3898 (define_expand "neon_vbsl<mode>"
3899 [(set (match_operand:VDQX 0 "s_register_operand" "")
3900 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3901 (match_operand:VDQX 2 "s_register_operand" "")
3902 (match_operand:VDQX 3 "s_register_operand" "")]
3906 /* We can't alias operands together if they have different modes. */
3907 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3911 (define_insn "neon_v<shift_op><sup><mode>"
3912 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3913 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3914 (match_operand:VDQIX 2 "s_register_operand" "w")]
3917 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3918 [(set_attr "type" "neon_shift_imm<q>")]
3922 (define_insn "neon_v<shift_op><sup><mode>"
3923 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3924 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3925 (match_operand:VDQIX 2 "s_register_operand" "w")]
3928 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3929 [(set_attr "type" "neon_sat_shift_imm<q>")]
3933 (define_insn "neon_v<shift_op><sup>_n<mode>"
3934 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3935 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3936 (match_operand:SI 2 "immediate_operand" "i")]
3940 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3941 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3943 [(set_attr "type" "neon_shift_imm<q>")]
3946 ;; vshrn_n, vrshrn_n
3947 (define_insn "neon_v<shift_op>_n<mode>"
3948 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3949 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3950 (match_operand:SI 2 "immediate_operand" "i")]
3954 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3955 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
3957 [(set_attr "type" "neon_shift_imm_narrow_q")]
3960 ;; vqshrn_n, vqrshrn_n
3961 (define_insn "neon_v<shift_op><sup>_n<mode>"
3962 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3963 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3964 (match_operand:SI 2 "immediate_operand" "i")]
3968 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3969 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
3971 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3974 ;; vqshrun_n, vqrshrun_n
3975 (define_insn "neon_v<shift_op>_n<mode>"
3976 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3977 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3978 (match_operand:SI 2 "immediate_operand" "i")]
3982 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3983 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
3985 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3988 (define_insn "neon_vshl_n<mode>"
3989 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3990 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3991 (match_operand:SI 2 "immediate_operand" "i")]
3995 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3996 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3998 [(set_attr "type" "neon_shift_imm<q>")]
4001 (define_insn "neon_vqshl_<sup>_n<mode>"
4002 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4003 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4004 (match_operand:SI 2 "immediate_operand" "i")]
4008 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4009 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4011 [(set_attr "type" "neon_sat_shift_imm<q>")]
4014 (define_insn "neon_vqshlu_n<mode>"
4015 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4016 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4017 (match_operand:SI 2 "immediate_operand" "i")]
4021 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4022 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4024 [(set_attr "type" "neon_sat_shift_imm<q>")]
4027 (define_insn "neon_vshll<sup>_n<mode>"
4028 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4029 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4030 (match_operand:SI 2 "immediate_operand" "i")]
4034 /* The boundaries are: 0 < imm <= size. */
4035 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4036 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4038 [(set_attr "type" "neon_shift_imm_long")]
4042 (define_insn "neon_v<shift_op><sup>_n<mode>"
4043 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4044 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4045 (match_operand:VDQIX 2 "s_register_operand" "w")
4046 (match_operand:SI 3 "immediate_operand" "i")]
4050 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4051 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4053 [(set_attr "type" "neon_shift_acc<q>")]
4056 (define_insn "neon_vsri_n<mode>"
4057 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4058 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4059 (match_operand:VDQIX 2 "s_register_operand" "w")
4060 (match_operand:SI 3 "immediate_operand" "i")]
4064 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4065 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4067 [(set_attr "type" "neon_shift_reg<q>")]
4070 (define_insn "neon_vsli_n<mode>"
4071 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4072 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4073 (match_operand:VDQIX 2 "s_register_operand" "w")
4074 (match_operand:SI 3 "immediate_operand" "i")]
4078 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4079 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4081 [(set_attr "type" "neon_shift_reg<q>")]
4084 (define_insn "neon_vtbl1v8qi"
4085 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4086 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4087 (match_operand:V8QI 2 "s_register_operand" "w")]
4090 "vtbl.8\t%P0, {%P1}, %P2"
4091 [(set_attr "type" "neon_tbl1")]
4094 (define_insn "neon_vtbl2v8qi"
4095 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4096 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4097 (match_operand:V8QI 2 "s_register_operand" "w")]
4102 int tabbase = REGNO (operands[1]);
4104 ops[0] = operands[0];
4105 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4106 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4107 ops[3] = operands[2];
4108 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4112 [(set_attr "type" "neon_tbl2")]
4115 (define_insn "neon_vtbl3v8qi"
4116 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4117 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4118 (match_operand:V8QI 2 "s_register_operand" "w")]
4123 int tabbase = REGNO (operands[1]);
4125 ops[0] = operands[0];
4126 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4127 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4128 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4129 ops[4] = operands[2];
4130 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4134 [(set_attr "type" "neon_tbl3")]
4137 (define_insn "neon_vtbl4v8qi"
4138 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4139 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4140 (match_operand:V8QI 2 "s_register_operand" "w")]
4145 int tabbase = REGNO (operands[1]);
4147 ops[0] = operands[0];
4148 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4149 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4150 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4151 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4152 ops[5] = operands[2];
4153 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4157 [(set_attr "type" "neon_tbl4")]
4160 ;; These three are used by the vec_perm infrastructure for V16QImode.
4161 (define_insn_and_split "neon_vtbl1v16qi"
4162 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4163 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4164 (match_operand:V16QI 2 "s_register_operand" "w")]
4168 "&& reload_completed"
4171 rtx op0, op1, op2, part0, part2;
4175 op1 = gen_lowpart (TImode, operands[1]);
4178 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4179 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4180 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4181 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4183 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4184 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4185 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4186 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4189 [(set_attr "type" "multiple")]
4192 (define_insn_and_split "neon_vtbl2v16qi"
4193 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4194 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4195 (match_operand:V16QI 2 "s_register_operand" "w")]
4199 "&& reload_completed"
4202 rtx op0, op1, op2, part0, part2;
4209 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4210 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4211 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4212 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4214 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4215 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4216 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4217 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4220 [(set_attr "type" "multiple")]
4223 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4224 ;; handle quad-word input modes, producing octa-word output modes. But
4225 ;; that requires us to add support for octa-word vector modes in moves.
4226 ;; That seems overkill for this one use in vec_perm.
4227 (define_insn_and_split "neon_vcombinev16qi"
4228 [(set (match_operand:OI 0 "s_register_operand" "=w")
4229 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4230 (match_operand:V16QI 2 "s_register_operand" "w")]
4234 "&& reload_completed"
4237 neon_split_vcombine (operands);
4240 [(set_attr "type" "multiple")]
4243 (define_insn "neon_vtbx1v8qi"
4244 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4245 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4246 (match_operand:V8QI 2 "s_register_operand" "w")
4247 (match_operand:V8QI 3 "s_register_operand" "w")]
4250 "vtbx.8\t%P0, {%P2}, %P3"
4251 [(set_attr "type" "neon_tbl1")]
4254 (define_insn "neon_vtbx2v8qi"
4255 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4256 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4257 (match_operand:TI 2 "s_register_operand" "w")
4258 (match_operand:V8QI 3 "s_register_operand" "w")]
4263 int tabbase = REGNO (operands[2]);
4265 ops[0] = operands[0];
4266 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4267 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4268 ops[3] = operands[3];
4269 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4273 [(set_attr "type" "neon_tbl2")]
4276 (define_insn "neon_vtbx3v8qi"
4277 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4278 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4279 (match_operand:EI 2 "s_register_operand" "w")
4280 (match_operand:V8QI 3 "s_register_operand" "w")]
4285 int tabbase = REGNO (operands[2]);
4287 ops[0] = operands[0];
4288 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4289 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4290 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4291 ops[4] = operands[3];
4292 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4296 [(set_attr "type" "neon_tbl3")]
4299 (define_insn "neon_vtbx4v8qi"
4300 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4301 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4302 (match_operand:OI 2 "s_register_operand" "w")
4303 (match_operand:V8QI 3 "s_register_operand" "w")]
4308 int tabbase = REGNO (operands[2]);
4310 ops[0] = operands[0];
4311 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4312 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4313 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4314 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4315 ops[5] = operands[3];
4316 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4320 [(set_attr "type" "neon_tbl4")]
4323 (define_expand "neon_vtrn<mode>_internal"
4325 [(set (match_operand:VDQWH 0 "s_register_operand")
4326 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4327 (match_operand:VDQWH 2 "s_register_operand")]
4329 (set (match_operand:VDQWH 3 "s_register_operand")
4330 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4335 ;; Note: Different operand numbering to handle tied registers correctly.
4336 (define_insn "*neon_vtrn<mode>_insn"
4337 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4338 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4339 (match_operand:VDQWH 3 "s_register_operand" "2")]
4341 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4342 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4345 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4346 [(set_attr "type" "neon_permute<q>")]
4349 (define_expand "neon_vzip<mode>_internal"
4351 [(set (match_operand:VDQWH 0 "s_register_operand")
4352 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4353 (match_operand:VDQWH 2 "s_register_operand")]
4355 (set (match_operand:VDQWH 3 "s_register_operand")
4356 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4361 ;; Note: Different operand numbering to handle tied registers correctly.
4362 (define_insn "*neon_vzip<mode>_insn"
4363 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4364 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4365 (match_operand:VDQWH 3 "s_register_operand" "2")]
4367 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4368 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4371 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4372 [(set_attr "type" "neon_zip<q>")]
4375 (define_expand "neon_vuzp<mode>_internal"
4377 [(set (match_operand:VDQWH 0 "s_register_operand")
4378 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
4379 (match_operand:VDQWH 2 "s_register_operand")]
4381 (set (match_operand:VDQWH 3 "s_register_operand" "")
4382 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4387 ;; Note: Different operand numbering to handle tied registers correctly.
4388 (define_insn "*neon_vuzp<mode>_insn"
4389 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
4390 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
4391 (match_operand:VDQWH 3 "s_register_operand" "2")]
4393 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
4394 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
4397 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4398 [(set_attr "type" "neon_zip<q>")]
4401 (define_expand "vec_load_lanes<mode><mode>"
4402 [(set (match_operand:VDQX 0 "s_register_operand")
4403 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4407 (define_insn "neon_vld1<mode>"
4408 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4409 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4412 "vld1.<V_sz_elem>\t%h0, %A1"
4413 [(set_attr "type" "neon_load1_1reg<q>")]
4416 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4417 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4419 (define_insn "neon_vld1_lane<mode>"
4420 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4421 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4422 (match_operand:VDX 2 "s_register_operand" "0")
4423 (match_operand:SI 3 "immediate_operand" "i")]
4427 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4428 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4429 operands[3] = GEN_INT (lane);
4431 return "vld1.<V_sz_elem>\t%P0, %A1";
4433 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4435 [(set_attr "type" "neon_load1_one_lane<q>")]
4438 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4439 ;; here on big endian targets.
4440 (define_insn "neon_vld1_lane<mode>"
4441 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4442 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4443 (match_operand:VQX 2 "s_register_operand" "0")
4444 (match_operand:SI 3 "immediate_operand" "i")]
4448 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4449 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4450 operands[3] = GEN_INT (lane);
4451 int regno = REGNO (operands[0]);
4452 if (lane >= max / 2)
4456 operands[3] = GEN_INT (lane);
4458 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4460 return "vld1.<V_sz_elem>\t%P0, %A1";
4462 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4464 [(set_attr "type" "neon_load1_one_lane<q>")]
4467 (define_insn "neon_vld1_dup<mode>"
4468 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
4469 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4471 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4472 [(set_attr "type" "neon_load1_all_lanes<q>")]
4475 ;; Special case for DImode. Treat it exactly like a simple load.
4476 (define_expand "neon_vld1_dupdi"
4477 [(set (match_operand:DI 0 "s_register_operand" "")
4478 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4484 (define_insn "neon_vld1_dup<mode>"
4485 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
4486 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4489 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4491 [(set_attr "type" "neon_load1_all_lanes<q>")]
4494 (define_insn_and_split "neon_vld1_dupv2di"
4495 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4496 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4499 "&& reload_completed"
4502 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4503 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4504 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4507 [(set_attr "length" "8")
4508 (set_attr "type" "neon_load1_all_lanes_q")]
4511 (define_expand "vec_store_lanes<mode><mode>"
4512 [(set (match_operand:VDQX 0 "neon_struct_operand")
4513 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4517 (define_insn "neon_vst1<mode>"
4518 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4519 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4522 "vst1.<V_sz_elem>\t%h1, %A0"
4523 [(set_attr "type" "neon_store1_1reg<q>")])
4525 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4526 ;; here on big endian targets.
4527 (define_insn "neon_vst1_lane<mode>"
4528 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4530 [(match_operand:VDX 1 "s_register_operand" "w")
4531 (match_operand:SI 2 "immediate_operand" "i")]
4535 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4536 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4537 operands[2] = GEN_INT (lane);
4539 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4541 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4543 [(set_attr "type" "neon_store1_one_lane<q>")]
4546 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4547 ;; here on big endian targets.
4548 (define_insn "neon_vst1_lane<mode>"
4549 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4551 [(match_operand:VQX 1 "s_register_operand" "w")
4552 (match_operand:SI 2 "immediate_operand" "i")]
4556 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4557 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4558 int regno = REGNO (operands[1]);
4559 if (lane >= max / 2)
4564 operands[2] = GEN_INT (lane);
4565 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4567 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4569 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4571 [(set_attr "type" "neon_store1_one_lane<q>")]
4574 (define_expand "vec_load_lanesti<mode>"
4575 [(set (match_operand:TI 0 "s_register_operand")
4576 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4577 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4581 (define_insn "neon_vld2<mode>"
4582 [(set (match_operand:TI 0 "s_register_operand" "=w")
4583 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4584 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4588 if (<V_sz_elem> == 64)
4589 return "vld1.64\t%h0, %A1";
4591 return "vld2.<V_sz_elem>\t%h0, %A1";
4594 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4595 (const_string "neon_load1_2reg<q>")
4596 (const_string "neon_load2_2reg<q>")))]
4599 (define_expand "vec_load_lanesoi<mode>"
4600 [(set (match_operand:OI 0 "s_register_operand")
4601 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4602 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4606 (define_insn "neon_vld2<mode>"
4607 [(set (match_operand:OI 0 "s_register_operand" "=w")
4608 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4609 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4612 "vld2.<V_sz_elem>\t%h0, %A1"
4613 [(set_attr "type" "neon_load2_2reg_q")])
4615 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4616 ;; here on big endian targets.
4617 (define_insn "neon_vld2_lane<mode>"
4618 [(set (match_operand:TI 0 "s_register_operand" "=w")
4619 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4620 (match_operand:TI 2 "s_register_operand" "0")
4621 (match_operand:SI 3 "immediate_operand" "i")
4622 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4626 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4627 int regno = REGNO (operands[0]);
4629 ops[0] = gen_rtx_REG (DImode, regno);
4630 ops[1] = gen_rtx_REG (DImode, regno + 2);
4631 ops[2] = operands[1];
4632 ops[3] = GEN_INT (lane);
4633 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4636 [(set_attr "type" "neon_load2_one_lane<q>")]
4639 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4640 ;; here on big endian targets.
4641 (define_insn "neon_vld2_lane<mode>"
4642 [(set (match_operand:OI 0 "s_register_operand" "=w")
4643 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4644 (match_operand:OI 2 "s_register_operand" "0")
4645 (match_operand:SI 3 "immediate_operand" "i")
4646 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4650 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4651 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4652 int regno = REGNO (operands[0]);
4654 if (lane >= max / 2)
4659 ops[0] = gen_rtx_REG (DImode, regno);
4660 ops[1] = gen_rtx_REG (DImode, regno + 4);
4661 ops[2] = operands[1];
4662 ops[3] = GEN_INT (lane);
4663 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4666 [(set_attr "type" "neon_load2_one_lane<q>")]
4669 (define_insn "neon_vld2_dup<mode>"
4670 [(set (match_operand:TI 0 "s_register_operand" "=w")
4671 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4672 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4676 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4677 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4679 return "vld1.<V_sz_elem>\t%h0, %A1";
4682 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4683 (const_string "neon_load2_all_lanes<q>")
4684 (const_string "neon_load1_1reg<q>")))]
4687 (define_expand "vec_store_lanesti<mode>"
4688 [(set (match_operand:TI 0 "neon_struct_operand")
4689 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4690 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4694 (define_insn "neon_vst2<mode>"
4695 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4696 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4697 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4701 if (<V_sz_elem> == 64)
4702 return "vst1.64\t%h1, %A0";
4704 return "vst2.<V_sz_elem>\t%h1, %A0";
4707 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4708 (const_string "neon_store1_2reg<q>")
4709 (const_string "neon_store2_one_lane<q>")))]
4712 (define_expand "vec_store_lanesoi<mode>"
4713 [(set (match_operand:OI 0 "neon_struct_operand")
4714 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4715 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4719 (define_insn "neon_vst2<mode>"
4720 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4721 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4722 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4725 "vst2.<V_sz_elem>\t%h1, %A0"
4726 [(set_attr "type" "neon_store2_4reg<q>")]
4729 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4730 ;; here on big endian targets.
4731 (define_insn "neon_vst2_lane<mode>"
4732 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4733 (unspec:<V_two_elem>
4734 [(match_operand:TI 1 "s_register_operand" "w")
4735 (match_operand:SI 2 "immediate_operand" "i")
4736 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4740 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4741 int regno = REGNO (operands[1]);
4743 ops[0] = operands[0];
4744 ops[1] = gen_rtx_REG (DImode, regno);
4745 ops[2] = gen_rtx_REG (DImode, regno + 2);
4746 ops[3] = GEN_INT (lane);
4747 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4750 [(set_attr "type" "neon_store2_one_lane<q>")]
4753 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4754 ;; here on big endian targets.
4755 (define_insn "neon_vst2_lane<mode>"
4756 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4757 (unspec:<V_two_elem>
4758 [(match_operand:OI 1 "s_register_operand" "w")
4759 (match_operand:SI 2 "immediate_operand" "i")
4760 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4764 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4765 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4766 int regno = REGNO (operands[1]);
4768 if (lane >= max / 2)
4773 ops[0] = operands[0];
4774 ops[1] = gen_rtx_REG (DImode, regno);
4775 ops[2] = gen_rtx_REG (DImode, regno + 4);
4776 ops[3] = GEN_INT (lane);
4777 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4780 [(set_attr "type" "neon_store2_one_lane<q>")]
4783 (define_expand "vec_load_lanesei<mode>"
4784 [(set (match_operand:EI 0 "s_register_operand")
4785 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4786 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4790 (define_insn "neon_vld3<mode>"
4791 [(set (match_operand:EI 0 "s_register_operand" "=w")
4792 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4793 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4797 if (<V_sz_elem> == 64)
4798 return "vld1.64\t%h0, %A1";
4800 return "vld3.<V_sz_elem>\t%h0, %A1";
4803 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4804 (const_string "neon_load1_3reg<q>")
4805 (const_string "neon_load3_3reg<q>")))]
4808 (define_expand "vec_load_lanesci<mode>"
4809 [(match_operand:CI 0 "s_register_operand")
4810 (match_operand:CI 1 "neon_struct_operand")
4811 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4814 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4818 (define_expand "neon_vld3<mode>"
4819 [(match_operand:CI 0 "s_register_operand")
4820 (match_operand:CI 1 "neon_struct_operand")
4821 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4826 mem = adjust_address (operands[1], EImode, 0);
4827 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4828 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4829 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4833 (define_insn "neon_vld3qa<mode>"
4834 [(set (match_operand:CI 0 "s_register_operand" "=w")
4835 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4836 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4840 int regno = REGNO (operands[0]);
4842 ops[0] = gen_rtx_REG (DImode, regno);
4843 ops[1] = gen_rtx_REG (DImode, regno + 4);
4844 ops[2] = gen_rtx_REG (DImode, regno + 8);
4845 ops[3] = operands[1];
4846 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4849 [(set_attr "type" "neon_load3_3reg<q>")]
4852 (define_insn "neon_vld3qb<mode>"
4853 [(set (match_operand:CI 0 "s_register_operand" "=w")
4854 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4855 (match_operand:CI 2 "s_register_operand" "0")
4856 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4860 int regno = REGNO (operands[0]);
4862 ops[0] = gen_rtx_REG (DImode, regno + 2);
4863 ops[1] = gen_rtx_REG (DImode, regno + 6);
4864 ops[2] = gen_rtx_REG (DImode, regno + 10);
4865 ops[3] = operands[1];
4866 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4869 [(set_attr "type" "neon_load3_3reg<q>")]
4872 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4873 ;; here on big endian targets.
4874 (define_insn "neon_vld3_lane<mode>"
4875 [(set (match_operand:EI 0 "s_register_operand" "=w")
4876 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4877 (match_operand:EI 2 "s_register_operand" "0")
4878 (match_operand:SI 3 "immediate_operand" "i")
4879 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4883 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
4884 int regno = REGNO (operands[0]);
4886 ops[0] = gen_rtx_REG (DImode, regno);
4887 ops[1] = gen_rtx_REG (DImode, regno + 2);
4888 ops[2] = gen_rtx_REG (DImode, regno + 4);
4889 ops[3] = operands[1];
4890 ops[4] = GEN_INT (lane);
4891 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4895 [(set_attr "type" "neon_load3_one_lane<q>")]
4898 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4899 ;; here on big endian targets.
4900 (define_insn "neon_vld3_lane<mode>"
4901 [(set (match_operand:CI 0 "s_register_operand" "=w")
4902 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4903 (match_operand:CI 2 "s_register_operand" "0")
4904 (match_operand:SI 3 "immediate_operand" "i")
4905 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4909 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4910 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4911 int regno = REGNO (operands[0]);
4913 if (lane >= max / 2)
4918 ops[0] = gen_rtx_REG (DImode, regno);
4919 ops[1] = gen_rtx_REG (DImode, regno + 4);
4920 ops[2] = gen_rtx_REG (DImode, regno + 8);
4921 ops[3] = operands[1];
4922 ops[4] = GEN_INT (lane);
4923 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4927 [(set_attr "type" "neon_load3_one_lane<q>")]
4930 (define_insn "neon_vld3_dup<mode>"
4931 [(set (match_operand:EI 0 "s_register_operand" "=w")
4932 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4933 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4937 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4939 int regno = REGNO (operands[0]);
4941 ops[0] = gen_rtx_REG (DImode, regno);
4942 ops[1] = gen_rtx_REG (DImode, regno + 2);
4943 ops[2] = gen_rtx_REG (DImode, regno + 4);
4944 ops[3] = operands[1];
4945 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4949 return "vld1.<V_sz_elem>\t%h0, %A1";
4952 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4953 (const_string "neon_load3_all_lanes<q>")
4954 (const_string "neon_load1_1reg<q>")))])
4956 (define_expand "vec_store_lanesei<mode>"
4957 [(set (match_operand:EI 0 "neon_struct_operand")
4958 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4959 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4963 (define_insn "neon_vst3<mode>"
4964 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4965 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4966 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4970 if (<V_sz_elem> == 64)
4971 return "vst1.64\t%h1, %A0";
4973 return "vst3.<V_sz_elem>\t%h1, %A0";
4976 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4977 (const_string "neon_store1_3reg<q>")
4978 (const_string "neon_store3_one_lane<q>")))])
4980 (define_expand "vec_store_lanesci<mode>"
4981 [(match_operand:CI 0 "neon_struct_operand")
4982 (match_operand:CI 1 "s_register_operand")
4983 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4986 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4990 (define_expand "neon_vst3<mode>"
4991 [(match_operand:CI 0 "neon_struct_operand")
4992 (match_operand:CI 1 "s_register_operand")
4993 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4998 mem = adjust_address (operands[0], EImode, 0);
4999 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5000 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5001 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5005 (define_insn "neon_vst3qa<mode>"
5006 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5007 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5008 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5012 int regno = REGNO (operands[1]);
5014 ops[0] = operands[0];
5015 ops[1] = gen_rtx_REG (DImode, regno);
5016 ops[2] = gen_rtx_REG (DImode, regno + 4);
5017 ops[3] = gen_rtx_REG (DImode, regno + 8);
5018 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5021 [(set_attr "type" "neon_store3_3reg<q>")]
5024 (define_insn "neon_vst3qb<mode>"
5025 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5026 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5027 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5031 int regno = REGNO (operands[1]);
5033 ops[0] = operands[0];
5034 ops[1] = gen_rtx_REG (DImode, regno + 2);
5035 ops[2] = gen_rtx_REG (DImode, regno + 6);
5036 ops[3] = gen_rtx_REG (DImode, regno + 10);
5037 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5040 [(set_attr "type" "neon_store3_3reg<q>")]
5043 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5044 ;; here on big endian targets.
5045 (define_insn "neon_vst3_lane<mode>"
5046 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5047 (unspec:<V_three_elem>
5048 [(match_operand:EI 1 "s_register_operand" "w")
5049 (match_operand:SI 2 "immediate_operand" "i")
5050 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5054 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5055 int regno = REGNO (operands[1]);
5057 ops[0] = operands[0];
5058 ops[1] = gen_rtx_REG (DImode, regno);
5059 ops[2] = gen_rtx_REG (DImode, regno + 2);
5060 ops[3] = gen_rtx_REG (DImode, regno + 4);
5061 ops[4] = GEN_INT (lane);
5062 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5066 [(set_attr "type" "neon_store3_one_lane<q>")]
5069 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5070 ;; here on big endian targets.
5071 (define_insn "neon_vst3_lane<mode>"
5072 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5073 (unspec:<V_three_elem>
5074 [(match_operand:CI 1 "s_register_operand" "w")
5075 (match_operand:SI 2 "immediate_operand" "i")
5076 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5080 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5081 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5082 int regno = REGNO (operands[1]);
5084 if (lane >= max / 2)
5089 ops[0] = operands[0];
5090 ops[1] = gen_rtx_REG (DImode, regno);
5091 ops[2] = gen_rtx_REG (DImode, regno + 4);
5092 ops[3] = gen_rtx_REG (DImode, regno + 8);
5093 ops[4] = GEN_INT (lane);
5094 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5098 [(set_attr "type" "neon_store3_one_lane<q>")]
5101 (define_expand "vec_load_lanesoi<mode>"
5102 [(set (match_operand:OI 0 "s_register_operand")
5103 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5104 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5108 (define_insn "neon_vld4<mode>"
5109 [(set (match_operand:OI 0 "s_register_operand" "=w")
5110 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5111 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5115 if (<V_sz_elem> == 64)
5116 return "vld1.64\t%h0, %A1";
5118 return "vld4.<V_sz_elem>\t%h0, %A1";
5121 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5122 (const_string "neon_load1_4reg<q>")
5123 (const_string "neon_load4_4reg<q>")))]
5126 (define_expand "vec_load_lanesxi<mode>"
5127 [(match_operand:XI 0 "s_register_operand")
5128 (match_operand:XI 1 "neon_struct_operand")
5129 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5132 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5136 (define_expand "neon_vld4<mode>"
5137 [(match_operand:XI 0 "s_register_operand")
5138 (match_operand:XI 1 "neon_struct_operand")
5139 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5144 mem = adjust_address (operands[1], OImode, 0);
5145 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5146 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5147 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5151 (define_insn "neon_vld4qa<mode>"
5152 [(set (match_operand:XI 0 "s_register_operand" "=w")
5153 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5154 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5158 int regno = REGNO (operands[0]);
5160 ops[0] = gen_rtx_REG (DImode, regno);
5161 ops[1] = gen_rtx_REG (DImode, regno + 4);
5162 ops[2] = gen_rtx_REG (DImode, regno + 8);
5163 ops[3] = gen_rtx_REG (DImode, regno + 12);
5164 ops[4] = operands[1];
5165 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5168 [(set_attr "type" "neon_load4_4reg<q>")]
5171 (define_insn "neon_vld4qb<mode>"
5172 [(set (match_operand:XI 0 "s_register_operand" "=w")
5173 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5174 (match_operand:XI 2 "s_register_operand" "0")
5175 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5179 int regno = REGNO (operands[0]);
5181 ops[0] = gen_rtx_REG (DImode, regno + 2);
5182 ops[1] = gen_rtx_REG (DImode, regno + 6);
5183 ops[2] = gen_rtx_REG (DImode, regno + 10);
5184 ops[3] = gen_rtx_REG (DImode, regno + 14);
5185 ops[4] = operands[1];
5186 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5189 [(set_attr "type" "neon_load4_4reg<q>")]
5192 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5193 ;; here on big endian targets.
5194 (define_insn "neon_vld4_lane<mode>"
5195 [(set (match_operand:OI 0 "s_register_operand" "=w")
5196 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5197 (match_operand:OI 2 "s_register_operand" "0")
5198 (match_operand:SI 3 "immediate_operand" "i")
5199 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5203 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5204 int regno = REGNO (operands[0]);
5206 ops[0] = gen_rtx_REG (DImode, regno);
5207 ops[1] = gen_rtx_REG (DImode, regno + 2);
5208 ops[2] = gen_rtx_REG (DImode, regno + 4);
5209 ops[3] = gen_rtx_REG (DImode, regno + 6);
5210 ops[4] = operands[1];
5211 ops[5] = GEN_INT (lane);
5212 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5216 [(set_attr "type" "neon_load4_one_lane<q>")]
5219 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5220 ;; here on big endian targets.
5221 (define_insn "neon_vld4_lane<mode>"
5222 [(set (match_operand:XI 0 "s_register_operand" "=w")
5223 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5224 (match_operand:XI 2 "s_register_operand" "0")
5225 (match_operand:SI 3 "immediate_operand" "i")
5226 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5230 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5231 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5232 int regno = REGNO (operands[0]);
5234 if (lane >= max / 2)
5239 ops[0] = gen_rtx_REG (DImode, regno);
5240 ops[1] = gen_rtx_REG (DImode, regno + 4);
5241 ops[2] = gen_rtx_REG (DImode, regno + 8);
5242 ops[3] = gen_rtx_REG (DImode, regno + 12);
5243 ops[4] = operands[1];
5244 ops[5] = GEN_INT (lane);
5245 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5249 [(set_attr "type" "neon_load4_one_lane<q>")]
5252 (define_insn "neon_vld4_dup<mode>"
5253 [(set (match_operand:OI 0 "s_register_operand" "=w")
5254 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5255 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5259 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5261 int regno = REGNO (operands[0]);
5263 ops[0] = gen_rtx_REG (DImode, regno);
5264 ops[1] = gen_rtx_REG (DImode, regno + 2);
5265 ops[2] = gen_rtx_REG (DImode, regno + 4);
5266 ops[3] = gen_rtx_REG (DImode, regno + 6);
5267 ops[4] = operands[1];
5268 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5273 return "vld1.<V_sz_elem>\t%h0, %A1";
5276 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5277 (const_string "neon_load4_all_lanes<q>")
5278 (const_string "neon_load1_1reg<q>")))]
5281 (define_expand "vec_store_lanesoi<mode>"
5282 [(set (match_operand:OI 0 "neon_struct_operand")
5283 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5284 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5288 (define_insn "neon_vst4<mode>"
5289 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5290 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5291 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5295 if (<V_sz_elem> == 64)
5296 return "vst1.64\t%h1, %A0";
5298 return "vst4.<V_sz_elem>\t%h1, %A0";
5301 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5302 (const_string "neon_store1_4reg<q>")
5303 (const_string "neon_store4_4reg<q>")))]
5306 (define_expand "vec_store_lanesxi<mode>"
5307 [(match_operand:XI 0 "neon_struct_operand")
5308 (match_operand:XI 1 "s_register_operand")
5309 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5312 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5316 (define_expand "neon_vst4<mode>"
5317 [(match_operand:XI 0 "neon_struct_operand")
5318 (match_operand:XI 1 "s_register_operand")
5319 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5324 mem = adjust_address (operands[0], OImode, 0);
5325 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5326 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5327 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5331 (define_insn "neon_vst4qa<mode>"
5332 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5333 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5334 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5338 int regno = REGNO (operands[1]);
5340 ops[0] = operands[0];
5341 ops[1] = gen_rtx_REG (DImode, regno);
5342 ops[2] = gen_rtx_REG (DImode, regno + 4);
5343 ops[3] = gen_rtx_REG (DImode, regno + 8);
5344 ops[4] = gen_rtx_REG (DImode, regno + 12);
5345 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5348 [(set_attr "type" "neon_store4_4reg<q>")]
5351 (define_insn "neon_vst4qb<mode>"
5352 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5353 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5354 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5358 int regno = REGNO (operands[1]);
5360 ops[0] = operands[0];
5361 ops[1] = gen_rtx_REG (DImode, regno + 2);
5362 ops[2] = gen_rtx_REG (DImode, regno + 6);
5363 ops[3] = gen_rtx_REG (DImode, regno + 10);
5364 ops[4] = gen_rtx_REG (DImode, regno + 14);
5365 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5368 [(set_attr "type" "neon_store4_4reg<q>")]
5371 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5372 ;; here on big endian targets.
5373 (define_insn "neon_vst4_lane<mode>"
5374 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5375 (unspec:<V_four_elem>
5376 [(match_operand:OI 1 "s_register_operand" "w")
5377 (match_operand:SI 2 "immediate_operand" "i")
5378 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5382 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5383 int regno = REGNO (operands[1]);
5385 ops[0] = operands[0];
5386 ops[1] = gen_rtx_REG (DImode, regno);
5387 ops[2] = gen_rtx_REG (DImode, regno + 2);
5388 ops[3] = gen_rtx_REG (DImode, regno + 4);
5389 ops[4] = gen_rtx_REG (DImode, regno + 6);
5390 ops[5] = GEN_INT (lane);
5391 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5395 [(set_attr "type" "neon_store4_one_lane<q>")]
5398 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5399 ;; here on big endian targets.
5400 (define_insn "neon_vst4_lane<mode>"
5401 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5402 (unspec:<V_four_elem>
5403 [(match_operand:XI 1 "s_register_operand" "w")
5404 (match_operand:SI 2 "immediate_operand" "i")
5405 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5409 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5410 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5411 int regno = REGNO (operands[1]);
5413 if (lane >= max / 2)
5418 ops[0] = operands[0];
5419 ops[1] = gen_rtx_REG (DImode, regno);
5420 ops[2] = gen_rtx_REG (DImode, regno + 4);
5421 ops[3] = gen_rtx_REG (DImode, regno + 8);
5422 ops[4] = gen_rtx_REG (DImode, regno + 12);
5423 ops[5] = GEN_INT (lane);
5424 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5428 [(set_attr "type" "neon_store4_4reg<q>")]
5431 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5432 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5433 (SE:<V_unpack> (vec_select:<V_HALF>
5434 (match_operand:VU 1 "register_operand" "w")
5435 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5436 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5437 "vmovl.<US><V_sz_elem> %q0, %e1"
5438 [(set_attr "type" "neon_shift_imm_long")]
5441 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5442 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5443 (SE:<V_unpack> (vec_select:<V_HALF>
5444 (match_operand:VU 1 "register_operand" "w")
5445 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5446 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5447 "vmovl.<US><V_sz_elem> %q0, %f1"
5448 [(set_attr "type" "neon_shift_imm_long")]
5451 (define_expand "vec_unpack<US>_hi_<mode>"
5452 [(match_operand:<V_unpack> 0 "register_operand" "")
5453 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5454 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5456 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5459 for (i = 0; i < (<V_mode_nunits>/2); i++)
5460 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5462 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5463 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5470 (define_expand "vec_unpack<US>_lo_<mode>"
5471 [(match_operand:<V_unpack> 0 "register_operand" "")
5472 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5473 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5475 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5478 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5479 RTVEC_ELT (v, i) = GEN_INT (i);
5480 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5481 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5488 (define_insn "neon_vec_<US>mult_lo_<mode>"
5489 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5490 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5491 (match_operand:VU 1 "register_operand" "w")
5492 (match_operand:VU 2 "vect_par_constant_low" "")))
5493 (SE:<V_unpack> (vec_select:<V_HALF>
5494 (match_operand:VU 3 "register_operand" "w")
5496 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5497 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5498 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5501 (define_expand "vec_widen_<US>mult_lo_<mode>"
5502 [(match_operand:<V_unpack> 0 "register_operand" "")
5503 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5504 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5505 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5507 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5510 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5511 RTVEC_ELT (v, i) = GEN_INT (i);
5512 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5514 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5522 (define_insn "neon_vec_<US>mult_hi_<mode>"
5523 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5524 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5525 (match_operand:VU 1 "register_operand" "w")
5526 (match_operand:VU 2 "vect_par_constant_high" "")))
5527 (SE:<V_unpack> (vec_select:<V_HALF>
5528 (match_operand:VU 3 "register_operand" "w")
5530 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5531 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5532 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5535 (define_expand "vec_widen_<US>mult_hi_<mode>"
5536 [(match_operand:<V_unpack> 0 "register_operand" "")
5537 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5538 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5539 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5541 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5544 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5545 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5546 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5548 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5557 (define_insn "neon_vec_<US>shiftl_<mode>"
5558 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5559 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5560 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5563 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5565 [(set_attr "type" "neon_shift_imm_long")]
5568 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5569 [(match_operand:<V_unpack> 0 "register_operand" "")
5570 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5571 (match_operand:SI 2 "immediate_operand" "i")]
5572 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5574 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5575 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5581 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5582 [(match_operand:<V_unpack> 0 "register_operand" "")
5583 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5584 (match_operand:SI 2 "immediate_operand" "i")]
5585 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5587 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5588 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5589 GET_MODE_SIZE (<V_HALF>mode)),
5595 ;; Vectorize for non-neon-quad case
5596 (define_insn "neon_unpack<US>_<mode>"
5597 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5598 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5600 "vmovl.<US><V_sz_elem> %q0, %P1"
5601 [(set_attr "type" "neon_move")]
5604 (define_expand "vec_unpack<US>_lo_<mode>"
5605 [(match_operand:<V_double_width> 0 "register_operand" "")
5606 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5609 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5610 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5611 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5617 (define_expand "vec_unpack<US>_hi_<mode>"
5618 [(match_operand:<V_double_width> 0 "register_operand" "")
5619 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5622 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5623 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5624 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5630 (define_insn "neon_vec_<US>mult_<mode>"
5631 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5632 (mult:<V_widen> (SE:<V_widen>
5633 (match_operand:VDI 1 "register_operand" "w"))
5635 (match_operand:VDI 2 "register_operand" "w"))))]
5637 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5638 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5641 (define_expand "vec_widen_<US>mult_hi_<mode>"
5642 [(match_operand:<V_double_width> 0 "register_operand" "")
5643 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5644 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5647 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5648 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5649 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5656 (define_expand "vec_widen_<US>mult_lo_<mode>"
5657 [(match_operand:<V_double_width> 0 "register_operand" "")
5658 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5659 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5662 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5663 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5664 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5671 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5672 [(match_operand:<V_double_width> 0 "register_operand" "")
5673 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5674 (match_operand:SI 2 "immediate_operand" "i")]
5677 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5678 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5679 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5685 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5686 [(match_operand:<V_double_width> 0 "register_operand" "")
5687 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5688 (match_operand:SI 2 "immediate_operand" "i")]
5691 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5692 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5693 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5699 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5700 ; because the ordering of vector elements in Q registers is different from what
5701 ; the semantics of the instructions require.
5703 (define_insn "vec_pack_trunc_<mode>"
5704 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5705 (vec_concat:<V_narrow_pack>
5706 (truncate:<V_narrow>
5707 (match_operand:VN 1 "register_operand" "w"))
5708 (truncate:<V_narrow>
5709 (match_operand:VN 2 "register_operand" "w"))))]
5710 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5711 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5712 [(set_attr "type" "multiple")
5713 (set_attr "length" "8")]
5716 ;; For the non-quad case.
5717 (define_insn "neon_vec_pack_trunc_<mode>"
5718 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5719 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5720 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5721 "vmovn.i<V_sz_elem>\t%P0, %q1"
5722 [(set_attr "type" "neon_move_narrow_q")]
5725 (define_expand "vec_pack_trunc_<mode>"
5726 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5727 (match_operand:VSHFT 1 "register_operand" "")
5728 (match_operand:VSHFT 2 "register_operand")]
5729 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5731 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5733 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5734 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5735 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5739 (define_insn "neon_vabd<mode>_2"
5740 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5741 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5742 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5743 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5744 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5746 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5747 (const_string "neon_fp_abd_s<q>")
5748 (const_string "neon_abd<q>")))]
5751 (define_insn "neon_vabd<mode>_3"
5752 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5753 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5754 (match_operand:VDQ 2 "s_register_operand" "w")]
5756 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5757 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5759 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5760 (const_string "neon_fp_abd_s<q>")
5761 (const_string "neon_abd<q>")))]
5764 ;; Copy from core-to-neon regs, then extend, not vice-versa
5767 [(set (match_operand:DI 0 "s_register_operand" "")
5768 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5769 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5770 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5771 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5773 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5777 [(set (match_operand:DI 0 "s_register_operand" "")
5778 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5779 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5780 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5781 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5783 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5787 [(set (match_operand:DI 0 "s_register_operand" "")
5788 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5789 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5790 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5791 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5793 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5797 [(set (match_operand:DI 0 "s_register_operand" "")
5798 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5799 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5800 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5801 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5803 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5807 [(set (match_operand:DI 0 "s_register_operand" "")
5808 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5809 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5810 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5811 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5813 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5817 [(set (match_operand:DI 0 "s_register_operand" "")
5818 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5819 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5820 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5821 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5823 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));