1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2016 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,mov_reg,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 (define_expand "movti"
117 [(set (match_operand:TI 0 "nonimmediate_operand" "")
118 (match_operand:TI 1 "general_operand" ""))]
121 if (can_create_pseudo_p ())
123 if (!REG_P (operands[0]))
124 operands[1] = force_reg (TImode, operands[1]);
128 (define_expand "mov<mode>"
129 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
130 (match_operand:VSTRUCT 1 "general_operand" ""))]
133 if (can_create_pseudo_p ())
135 if (!REG_P (operands[0]))
136 operands[1] = force_reg (<MODE>mode, operands[1]);
140 (define_expand "movv4hf"
141 [(set (match_operand:V4HF 0 "s_register_operand")
142 (match_operand:V4HF 1 "s_register_operand"))]
143 "TARGET_NEON && TARGET_FP16"
145 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
146 causing an ICE on big-endian because it cannot extract subregs in
148 if (can_create_pseudo_p ())
150 if (!REG_P (operands[0]))
151 operands[1] = force_reg (V4HFmode, operands[1]);
155 (define_expand "movv8hf"
156 [(set (match_operand:V8HF 0 "")
157 (match_operand:V8HF 1 ""))]
158 "TARGET_NEON && TARGET_FP16"
160 /* We need to use force_reg to avoid CANNOT_CHANGE_MODE_CLASS
161 causing an ICE on big-endian because it cannot extract subregs in
163 if (can_create_pseudo_p ())
165 if (!REG_P (operands[0]))
166 operands[1] = force_reg (V8HFmode, operands[1]);
170 (define_insn "*neon_mov<mode>"
171 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
172 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
174 && (register_operand (operands[0], <MODE>mode)
175 || register_operand (operands[1], <MODE>mode))"
177 switch (which_alternative)
180 case 1: case 2: return output_move_neon (operands);
181 default: gcc_unreachable ();
184 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
185 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
188 [(set (match_operand:EI 0 "s_register_operand" "")
189 (match_operand:EI 1 "s_register_operand" ""))]
190 "TARGET_NEON && reload_completed"
191 [(set (match_dup 0) (match_dup 1))
192 (set (match_dup 2) (match_dup 3))]
194 int rdest = REGNO (operands[0]);
195 int rsrc = REGNO (operands[1]);
198 dest[0] = gen_rtx_REG (TImode, rdest);
199 src[0] = gen_rtx_REG (TImode, rsrc);
200 dest[1] = gen_rtx_REG (DImode, rdest + 4);
201 src[1] = gen_rtx_REG (DImode, rsrc + 4);
203 neon_disambiguate_copy (operands, dest, src, 2);
207 [(set (match_operand:OI 0 "s_register_operand" "")
208 (match_operand:OI 1 "s_register_operand" ""))]
209 "TARGET_NEON && reload_completed"
210 [(set (match_dup 0) (match_dup 1))
211 (set (match_dup 2) (match_dup 3))]
213 int rdest = REGNO (operands[0]);
214 int rsrc = REGNO (operands[1]);
217 dest[0] = gen_rtx_REG (TImode, rdest);
218 src[0] = gen_rtx_REG (TImode, rsrc);
219 dest[1] = gen_rtx_REG (TImode, rdest + 4);
220 src[1] = gen_rtx_REG (TImode, rsrc + 4);
222 neon_disambiguate_copy (operands, dest, src, 2);
226 [(set (match_operand:CI 0 "s_register_operand" "")
227 (match_operand:CI 1 "s_register_operand" ""))]
228 "TARGET_NEON && reload_completed"
229 [(set (match_dup 0) (match_dup 1))
230 (set (match_dup 2) (match_dup 3))
231 (set (match_dup 4) (match_dup 5))]
233 int rdest = REGNO (operands[0]);
234 int rsrc = REGNO (operands[1]);
237 dest[0] = gen_rtx_REG (TImode, rdest);
238 src[0] = gen_rtx_REG (TImode, rsrc);
239 dest[1] = gen_rtx_REG (TImode, rdest + 4);
240 src[1] = gen_rtx_REG (TImode, rsrc + 4);
241 dest[2] = gen_rtx_REG (TImode, rdest + 8);
242 src[2] = gen_rtx_REG (TImode, rsrc + 8);
244 neon_disambiguate_copy (operands, dest, src, 3);
248 [(set (match_operand:XI 0 "s_register_operand" "")
249 (match_operand:XI 1 "s_register_operand" ""))]
250 "TARGET_NEON && reload_completed"
251 [(set (match_dup 0) (match_dup 1))
252 (set (match_dup 2) (match_dup 3))
253 (set (match_dup 4) (match_dup 5))
254 (set (match_dup 6) (match_dup 7))]
256 int rdest = REGNO (operands[0]);
257 int rsrc = REGNO (operands[1]);
260 dest[0] = gen_rtx_REG (TImode, rdest);
261 src[0] = gen_rtx_REG (TImode, rsrc);
262 dest[1] = gen_rtx_REG (TImode, rdest + 4);
263 src[1] = gen_rtx_REG (TImode, rsrc + 4);
264 dest[2] = gen_rtx_REG (TImode, rdest + 8);
265 src[2] = gen_rtx_REG (TImode, rsrc + 8);
266 dest[3] = gen_rtx_REG (TImode, rdest + 12);
267 src[3] = gen_rtx_REG (TImode, rsrc + 12);
269 neon_disambiguate_copy (operands, dest, src, 4);
272 (define_expand "movmisalign<mode>"
273 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
274 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
275 UNSPEC_MISALIGNED_ACCESS))]
276 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
279 /* This pattern is not permitted to fail during expansion: if both arguments
280 are non-registers (e.g. memory := constant, which can be created by the
281 auto-vectorizer), force operand 1 into a register. */
282 if (!s_register_operand (operands[0], <MODE>mode)
283 && !s_register_operand (operands[1], <MODE>mode))
284 operands[1] = force_reg (<MODE>mode, operands[1]);
286 if (s_register_operand (operands[0], <MODE>mode))
287 adjust_mem = operands[1];
289 adjust_mem = operands[0];
291 /* Legitimize address. */
292 if (!neon_vector_mem_operand (adjust_mem, 2, true))
293 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
297 (define_insn "*movmisalign<mode>_neon_store"
298 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
299 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vst1.<V_sz_elem>\t{%P1}, %A0"
303 [(set_attr "type" "neon_store1_1reg<q>")])
305 (define_insn "*movmisalign<mode>_neon_load"
306 [(set (match_operand:VDX 0 "s_register_operand" "=w")
307 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
309 UNSPEC_MISALIGNED_ACCESS))]
310 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
311 "vld1.<V_sz_elem>\t{%P0}, %A1"
312 [(set_attr "type" "neon_load1_1reg<q>")])
314 (define_insn "*movmisalign<mode>_neon_store"
315 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
316 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vst1.<V_sz_elem>\t{%q1}, %A0"
320 [(set_attr "type" "neon_store1_1reg<q>")])
322 (define_insn "*movmisalign<mode>_neon_load"
323 [(set (match_operand:VQX 0 "s_register_operand" "=w")
324 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
326 UNSPEC_MISALIGNED_ACCESS))]
327 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
328 "vld1.<V_sz_elem>\t{%q0}, %A1"
329 [(set_attr "type" "neon_load1_1reg<q>")])
331 (define_insn "vec_set<mode>_internal"
332 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
334 (vec_duplicate:VD_LANE
335 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
336 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
337 (match_operand:SI 2 "immediate_operand" "i,i")))]
340 int elt = ffs ((int) INTVAL (operands[2])) - 1;
341 if (BYTES_BIG_ENDIAN)
342 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
343 operands[2] = GEN_INT (elt);
345 if (which_alternative == 0)
346 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
348 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
350 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
352 (define_insn "vec_set<mode>_internal"
353 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
356 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
357 (match_operand:VQ2 3 "s_register_operand" "0,0")
358 (match_operand:SI 2 "immediate_operand" "i,i")))]
361 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
362 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
363 int elt = elem % half_elts;
364 int hi = (elem / half_elts) * 2;
365 int regno = REGNO (operands[0]);
367 if (BYTES_BIG_ENDIAN)
368 elt = half_elts - 1 - elt;
370 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
371 operands[2] = GEN_INT (elt);
373 if (which_alternative == 0)
374 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
376 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
378 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
381 (define_insn "vec_setv2di_internal"
382 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
385 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
386 (match_operand:V2DI 3 "s_register_operand" "0,0")
387 (match_operand:SI 2 "immediate_operand" "i,i")))]
390 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
391 int regno = REGNO (operands[0]) + 2 * elem;
393 operands[0] = gen_rtx_REG (DImode, regno);
395 if (which_alternative == 0)
396 return "vld1.64\t%P0, %A1";
398 return "vmov\t%P0, %Q1, %R1";
400 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
403 (define_expand "vec_set<mode>"
404 [(match_operand:VDQ 0 "s_register_operand" "")
405 (match_operand:<V_elem> 1 "s_register_operand" "")
406 (match_operand:SI 2 "immediate_operand" "")]
409 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
410 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
411 GEN_INT (elem), operands[0]));
415 (define_insn "vec_extract<mode>"
416 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
418 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
419 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
422 if (BYTES_BIG_ENDIAN)
424 int elt = INTVAL (operands[2]);
425 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
426 operands[2] = GEN_INT (elt);
429 if (which_alternative == 0)
430 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
432 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
434 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
437 (define_insn "vec_extract<mode>"
438 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
440 (match_operand:VQ2 1 "s_register_operand" "w,w")
441 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
444 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
445 int elt = INTVAL (operands[2]) % half_elts;
446 int hi = (INTVAL (operands[2]) / half_elts) * 2;
447 int regno = REGNO (operands[1]);
449 if (BYTES_BIG_ENDIAN)
450 elt = half_elts - 1 - elt;
452 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
453 operands[2] = GEN_INT (elt);
455 if (which_alternative == 0)
456 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
458 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
460 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
463 (define_insn "vec_extractv2di"
464 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
466 (match_operand:V2DI 1 "s_register_operand" "w,w")
467 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
470 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
472 operands[1] = gen_rtx_REG (DImode, regno);
474 if (which_alternative == 0)
475 return "vst1.64\t{%P1}, %A0 @ v2di";
477 return "vmov\t%Q0, %R0, %P1 @ v2di";
479 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
482 (define_expand "vec_init<mode>"
483 [(match_operand:VDQ 0 "s_register_operand" "")
484 (match_operand 1 "" "")]
487 neon_expand_vector_init (operands[0], operands[1]);
491 ;; Doubleword and quadword arithmetic.
493 ;; NOTE: some other instructions also support 64-bit integer
494 ;; element size, which we could potentially use for "long long" operations.
496 (define_insn "*add<mode>3_neon"
497 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
498 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
499 (match_operand:VDQ 2 "s_register_operand" "w")))]
500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
501 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
503 (if_then_else (match_test "<Is_float_mode>")
504 (const_string "neon_fp_addsub_s<q>")
505 (const_string "neon_add<q>")))]
508 (define_insn "adddi3_neon"
509 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
510 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
511 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
512 (clobber (reg:CC CC_REGNUM))]
515 switch (which_alternative)
517 case 0: /* fall through */
518 case 3: return "vadd.i64\t%P0, %P1, %P2";
524 default: gcc_unreachable ();
527 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
528 multiple,multiple,multiple")
529 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
530 (set_attr "length" "*,8,8,*,8,8,8")
531 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
534 (define_insn "*sub<mode>3_neon"
535 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
536 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
537 (match_operand:VDQ 2 "s_register_operand" "w")))]
538 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
539 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
541 (if_then_else (match_test "<Is_float_mode>")
542 (const_string "neon_fp_addsub_s<q>")
543 (const_string "neon_sub<q>")))]
546 (define_insn "subdi3_neon"
547 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
548 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
549 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
550 (clobber (reg:CC CC_REGNUM))]
553 switch (which_alternative)
555 case 0: /* fall through */
556 case 4: return "vsub.i64\t%P0, %P1, %P2";
557 case 1: /* fall through */
558 case 2: /* fall through */
559 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
560 default: gcc_unreachable ();
563 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
564 (set_attr "conds" "*,clob,clob,clob,*")
565 (set_attr "length" "*,8,8,8,*")
566 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
569 (define_insn "*mul<mode>3_neon"
570 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
571 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
572 (match_operand:VDQW 2 "s_register_operand" "w")))]
573 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
574 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
576 (if_then_else (match_test "<Is_float_mode>")
577 (const_string "neon_fp_mul_s<q>")
578 (const_string "neon_mul_<V_elem_ch><q>")))]
581 (define_insn "mul<mode>3add<mode>_neon"
582 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
583 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
584 (match_operand:VDQW 3 "s_register_operand" "w"))
585 (match_operand:VDQW 1 "s_register_operand" "0")))]
586 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
587 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
589 (if_then_else (match_test "<Is_float_mode>")
590 (const_string "neon_fp_mla_s<q>")
591 (const_string "neon_mla_<V_elem_ch><q>")))]
594 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
595 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
596 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
597 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
598 (match_operand:VDQW 3 "s_register_operand" "w"))))]
599 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
600 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
602 (if_then_else (match_test "<Is_float_mode>")
603 (const_string "neon_fp_mla_s<q>")
604 (const_string "neon_mla_<V_elem_ch><q>")))]
607 ;; Fused multiply-accumulate
608 ;; We define each insn twice here:
609 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
610 ;; to be able to use when converting to FMA.
611 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
612 (define_insn "fma<VCVTF:mode>4"
613 [(set (match_operand:VCVTF 0 "register_operand" "=w")
614 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
615 (match_operand:VCVTF 2 "register_operand" "w")
616 (match_operand:VCVTF 3 "register_operand" "0")))]
617 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
618 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
619 [(set_attr "type" "neon_fp_mla_s<q>")]
622 (define_insn "fma<VCVTF:mode>4_intrinsic"
623 [(set (match_operand:VCVTF 0 "register_operand" "=w")
624 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
625 (match_operand:VCVTF 2 "register_operand" "w")
626 (match_operand:VCVTF 3 "register_operand" "0")))]
627 "TARGET_NEON && TARGET_FMA"
628 "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
629 [(set_attr "type" "neon_fp_mla_s<q>")]
632 (define_insn "*fmsub<VCVTF:mode>4"
633 [(set (match_operand:VCVTF 0 "register_operand" "=w")
634 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
635 (match_operand:VCVTF 2 "register_operand" "w")
636 (match_operand:VCVTF 3 "register_operand" "0")))]
637 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
638 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
639 [(set_attr "type" "neon_fp_mla_s<q>")]
642 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
643 [(set (match_operand:VCVTF 0 "register_operand" "=w")
644 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
645 (match_operand:VCVTF 2 "register_operand" "w")
646 (match_operand:VCVTF 3 "register_operand" "0")))]
647 "TARGET_NEON && TARGET_FMA"
648 "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
649 [(set_attr "type" "neon_fp_mla_s<q>")]
652 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
653 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
654 (unspec:VCVTF [(match_operand:VCVTF 1
655 "s_register_operand" "w")]
657 "TARGET_NEON && TARGET_FPU_ARMV8"
658 "vrint<nvrint_variant>%?.f32\\t%<V_reg>0, %<V_reg>1"
659 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
662 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
663 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
664 (FIXUORS:<V_cmp_result> (unspec:VCVTF
665 [(match_operand:VCVTF 1 "register_operand" "w")]
667 "TARGET_NEON && TARGET_FPU_ARMV8"
668 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
669 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
670 (set_attr "predicable" "no")]
673 (define_insn "ior<mode>3"
674 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
675 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
676 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
679 switch (which_alternative)
681 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
682 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
683 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
684 default: gcc_unreachable ();
687 [(set_attr "type" "neon_logic<q>")]
690 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
691 ;; vorr. We support the pseudo-instruction vand instead, because that
692 ;; corresponds to the canonical form the middle-end expects to use for
693 ;; immediate bitwise-ANDs.
695 (define_insn "and<mode>3"
696 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
697 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
698 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
701 switch (which_alternative)
703 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
704 case 1: return neon_output_logic_immediate ("vand", &operands[2],
705 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
706 default: gcc_unreachable ();
709 [(set_attr "type" "neon_logic<q>")]
712 (define_insn "orn<mode>3_neon"
713 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
714 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
715 (match_operand:VDQ 1 "s_register_operand" "w")))]
717 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
718 [(set_attr "type" "neon_logic<q>")]
721 ;; TODO: investigate whether we should disable
722 ;; this and bicdi3_neon for the A8 in line with the other
724 (define_insn_and_split "orndi3_neon"
725 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
726 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
727 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
735 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
736 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
737 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
742 operands[3] = gen_highpart (SImode, operands[0]);
743 operands[0] = gen_lowpart (SImode, operands[0]);
744 operands[4] = gen_highpart (SImode, operands[2]);
745 operands[2] = gen_lowpart (SImode, operands[2]);
746 operands[5] = gen_highpart (SImode, operands[1]);
747 operands[1] = gen_lowpart (SImode, operands[1]);
751 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
752 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
756 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
757 (set_attr "length" "*,16,8,8")
758 (set_attr "arch" "any,a,t2,t2")]
761 (define_insn "bic<mode>3_neon"
762 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
763 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
764 (match_operand:VDQ 1 "s_register_operand" "w")))]
766 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
767 [(set_attr "type" "neon_logic<q>")]
770 ;; Compare to *anddi_notdi_di.
771 (define_insn "bicdi3_neon"
772 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
773 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
774 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
780 [(set_attr "type" "neon_logic,multiple,multiple")
781 (set_attr "length" "*,8,8")]
784 (define_insn "xor<mode>3"
785 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
786 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
787 (match_operand:VDQ 2 "s_register_operand" "w")))]
789 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
790 [(set_attr "type" "neon_logic<q>")]
793 (define_insn "one_cmpl<mode>2"
794 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
795 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
797 "vmvn\t%<V_reg>0, %<V_reg>1"
798 [(set_attr "type" "neon_move<q>")]
801 (define_insn "abs<mode>2"
802 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
803 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
805 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
807 (if_then_else (match_test "<Is_float_mode>")
808 (const_string "neon_fp_abs_s<q>")
809 (const_string "neon_abs<q>")))]
812 (define_insn "neg<mode>2"
813 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
814 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
816 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
818 (if_then_else (match_test "<Is_float_mode>")
819 (const_string "neon_fp_neg_s<q>")
820 (const_string "neon_neg<q>")))]
823 (define_insn "negdi2_neon"
824 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
825 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
826 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
827 (clobber (reg:CC CC_REGNUM))]
830 [(set_attr "length" "8")
831 (set_attr "type" "multiple")]
834 ; Split negdi2_neon for vfp registers
836 [(set (match_operand:DI 0 "s_register_operand" "")
837 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
838 (clobber (match_scratch:DI 2 ""))
839 (clobber (reg:CC CC_REGNUM))]
840 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
841 [(set (match_dup 2) (const_int 0))
842 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
843 (clobber (reg:CC CC_REGNUM))])]
845 if (!REG_P (operands[2]))
846 operands[2] = operands[0];
850 ; Split negdi2_neon for core registers
852 [(set (match_operand:DI 0 "s_register_operand" "")
853 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
854 (clobber (match_scratch:DI 2 ""))
855 (clobber (reg:CC CC_REGNUM))]
856 "TARGET_32BIT && reload_completed
857 && arm_general_register_operand (operands[0], DImode)"
858 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
859 (clobber (reg:CC CC_REGNUM))])]
863 (define_insn "*umin<mode>3_neon"
864 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
865 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
866 (match_operand:VDQIW 2 "s_register_operand" "w")))]
868 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
869 [(set_attr "type" "neon_minmax<q>")]
872 (define_insn "*umax<mode>3_neon"
873 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
874 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
875 (match_operand:VDQIW 2 "s_register_operand" "w")))]
877 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
878 [(set_attr "type" "neon_minmax<q>")]
881 (define_insn "*smin<mode>3_neon"
882 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
883 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
884 (match_operand:VDQW 2 "s_register_operand" "w")))]
886 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
888 (if_then_else (match_test "<Is_float_mode>")
889 (const_string "neon_fp_minmax_s<q>")
890 (const_string "neon_minmax<q>")))]
893 (define_insn "*smax<mode>3_neon"
894 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
895 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
896 (match_operand:VDQW 2 "s_register_operand" "w")))]
898 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
900 (if_then_else (match_test "<Is_float_mode>")
901 (const_string "neon_fp_minmax_s<q>")
902 (const_string "neon_minmax<q>")))]
905 ; TODO: V2DI shifts are current disabled because there are bugs in the
906 ; generic vectorizer code. It ends up creating a V2DI constructor with
909 (define_insn "vashl<mode>3"
910 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
911 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
912 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
915 switch (which_alternative)
917 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
918 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
920 VALID_NEON_QREG_MODE (<MODE>mode),
922 default: gcc_unreachable ();
925 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
928 (define_insn "vashr<mode>3_imm"
929 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
930 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
931 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
934 return neon_output_shift_immediate ("vshr", 's', &operands[2],
935 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
938 [(set_attr "type" "neon_shift_imm<q>")]
941 (define_insn "vlshr<mode>3_imm"
942 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
943 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
944 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
947 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
948 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
951 [(set_attr "type" "neon_shift_imm<q>")]
954 ; Used for implementing logical shift-right, which is a left-shift by a negative
955 ; amount, with signed operands. This is essentially the same as ashl<mode>3
956 ; above, but using an unspec in case GCC tries anything tricky with negative
959 (define_insn "ashl<mode>3_signed"
960 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
961 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
962 (match_operand:VDQI 2 "s_register_operand" "w")]
963 UNSPEC_ASHIFT_SIGNED))]
965 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
966 [(set_attr "type" "neon_shift_reg<q>")]
969 ; Used for implementing logical shift-right, which is a left-shift by a negative
970 ; amount, with unsigned operands.
972 (define_insn "ashl<mode>3_unsigned"
973 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
974 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
975 (match_operand:VDQI 2 "s_register_operand" "w")]
976 UNSPEC_ASHIFT_UNSIGNED))]
978 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
979 [(set_attr "type" "neon_shift_reg<q>")]
982 (define_expand "vashr<mode>3"
983 [(set (match_operand:VDQIW 0 "s_register_operand" "")
984 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
985 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
988 if (s_register_operand (operands[2], <MODE>mode))
990 rtx neg = gen_reg_rtx (<MODE>mode);
991 emit_insn (gen_neg<mode>2 (neg, operands[2]));
992 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
995 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
999 (define_expand "vlshr<mode>3"
1000 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1001 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1002 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1005 if (s_register_operand (operands[2], <MODE>mode))
1007 rtx neg = gen_reg_rtx (<MODE>mode);
1008 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1009 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1012 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1018 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1019 ;; leaving the upper half uninitalized. This is OK since the shift
1020 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1021 ;; data flow analysis however, we pretend the full register is set
1023 (define_insn "neon_load_count"
1024 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1025 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1026 UNSPEC_LOAD_COUNT))]
1029 vld1.32\t{%P0[0]}, %A1
1030 vmov.32\t%P0[0], %1"
1031 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1034 (define_insn "ashldi3_neon_noclobber"
1035 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1036 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1037 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1038 "TARGET_NEON && reload_completed
1039 && (!CONST_INT_P (operands[2])
1040 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1042 vshl.u64\t%P0, %P1, %2
1043 vshl.u64\t%P0, %P1, %P2"
1044 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1047 (define_insn_and_split "ashldi3_neon"
1048 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r, ?w,w")
1049 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r, 0w,w")
1050 (match_operand:SI 2 "general_operand" "rUm, i, r, i,rUm,i")))
1051 (clobber (match_scratch:SI 3 "= X, X,?&r, X, X,X"))
1052 (clobber (match_scratch:SI 4 "= X, X,?&r, X, X,X"))
1053 (clobber (match_scratch:DI 5 "=&w, X, X, X, &w,X"))
1054 (clobber (reg:CC_C CC_REGNUM))]
1057 "TARGET_NEON && reload_completed"
1061 if (IS_VFP_REGNUM (REGNO (operands[0])))
1063 if (CONST_INT_P (operands[2]))
1065 if (INTVAL (operands[2]) < 1)
1067 emit_insn (gen_movdi (operands[0], operands[1]));
1070 else if (INTVAL (operands[2]) > 63)
1071 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1075 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1076 operands[2] = operands[5];
1079 /* Ditch the unnecessary clobbers. */
1080 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1085 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1086 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1087 || REGNO (operands[0]) == REGNO (operands[1])))
1088 /* This clobbers CC. */
1089 emit_insn (gen_arm_ashldi3_1bit (operands[0], operands[1]));
1091 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1092 operands[2], operands[3], operands[4]);
1096 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1097 (set_attr "opt" "*,*,speed,speed,*,*")
1098 (set_attr "type" "multiple")]
1101 ; The shift amount needs to be negated for right-shifts
1102 (define_insn "signed_shift_di3_neon"
1103 [(set (match_operand:DI 0 "s_register_operand" "=w")
1104 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1105 (match_operand:DI 2 "s_register_operand" " w")]
1106 UNSPEC_ASHIFT_SIGNED))]
1107 "TARGET_NEON && reload_completed"
1108 "vshl.s64\t%P0, %P1, %P2"
1109 [(set_attr "type" "neon_shift_reg")]
1112 ; The shift amount needs to be negated for right-shifts
1113 (define_insn "unsigned_shift_di3_neon"
1114 [(set (match_operand:DI 0 "s_register_operand" "=w")
1115 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1116 (match_operand:DI 2 "s_register_operand" " w")]
1117 UNSPEC_ASHIFT_UNSIGNED))]
1118 "TARGET_NEON && reload_completed"
1119 "vshl.u64\t%P0, %P1, %P2"
1120 [(set_attr "type" "neon_shift_reg")]
1123 (define_insn "ashrdi3_neon_imm_noclobber"
1124 [(set (match_operand:DI 0 "s_register_operand" "=w")
1125 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1126 (match_operand:DI 2 "const_int_operand" " i")))]
1127 "TARGET_NEON && reload_completed
1128 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1129 "vshr.s64\t%P0, %P1, %2"
1130 [(set_attr "type" "neon_shift_imm")]
1133 (define_insn "lshrdi3_neon_imm_noclobber"
1134 [(set (match_operand:DI 0 "s_register_operand" "=w")
1135 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1136 (match_operand:DI 2 "const_int_operand" " i")))]
1137 "TARGET_NEON && reload_completed
1138 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1139 "vshr.u64\t%P0, %P1, %2"
1140 [(set_attr "type" "neon_shift_imm")]
1145 (define_insn_and_split "<shift>di3_neon"
1146 [(set (match_operand:DI 0 "s_register_operand" "= w, w,?&r,?r,?w,?w")
1147 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, r,0w, w")
1148 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, r, i")))
1149 (clobber (match_scratch:SI 3 "=2r, X, &r, X,2r, X"))
1150 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X"))
1151 (clobber (match_scratch:DI 5 "=&w, X, X, X,&w, X"))
1152 (clobber (reg:CC CC_REGNUM))]
1155 "TARGET_NEON && reload_completed"
1159 if (IS_VFP_REGNUM (REGNO (operands[0])))
1161 if (CONST_INT_P (operands[2]))
1163 if (INTVAL (operands[2]) < 1)
1165 emit_insn (gen_movdi (operands[0], operands[1]));
1168 else if (INTVAL (operands[2]) > 64)
1169 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1171 /* Ditch the unnecessary clobbers. */
1172 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1178 /* We must use a negative left-shift. */
1179 emit_insn (gen_negsi2 (operands[3], operands[2]));
1180 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1181 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1187 if (CONST_INT_P (operands[2]) && INTVAL (operands[2]) == 1
1188 && (!reg_overlap_mentioned_p (operands[0], operands[1])
1189 || REGNO (operands[0]) == REGNO (operands[1])))
1190 /* This clobbers CC. */
1191 emit_insn (gen_arm_<shift>di3_1bit (operands[0], operands[1]));
1193 /* This clobbers CC (ASHIFTRT by register only). */
1194 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1195 operands[2], operands[3], operands[4]);
1200 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1201 (set_attr "opt" "*,*,speed,speed,*,*")
1202 (set_attr "type" "multiple")]
1205 ;; Widening operations
1207 (define_expand "widen_ssum<mode>3"
1208 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1209 (plus:<V_double_width>
1210 (sign_extend:<V_double_width>
1211 (match_operand:VQI 1 "s_register_operand" ""))
1212 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1215 machine_mode mode = GET_MODE (operands[1]);
1218 p1 = arm_simd_vect_par_cnst_half (mode, false);
1219 p2 = arm_simd_vect_par_cnst_half (mode, true);
1221 if (operands[0] != operands[2])
1222 emit_move_insn (operands[0], operands[2]);
1224 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1228 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1236 (define_insn "vec_sel_widen_ssum_lo<VQI:mode><VW:mode>3"
1237 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1239 (sign_extend:<VW:V_widen>
1241 (match_operand:VQI 1 "s_register_operand" "%w")
1242 (match_operand:VQI 2 "vect_par_constant_low" "")))
1243 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1246 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1247 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1249 [(set_attr "type" "neon_add_widen")])
1251 (define_insn "vec_sel_widen_ssum_hi<VQI:mode><VW:mode>3"
1252 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1254 (sign_extend:<VW:V_widen>
1255 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1256 (match_operand:VQI 2 "vect_par_constant_high" "")))
1257 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1260 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1261 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1263 [(set_attr "type" "neon_add_widen")])
1265 (define_insn "widen_ssum<mode>3"
1266 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1268 (sign_extend:<V_widen>
1269 (match_operand:VW 1 "s_register_operand" "%w"))
1270 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1272 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1273 [(set_attr "type" "neon_add_widen")]
1276 (define_expand "widen_usum<mode>3"
1277 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1278 (plus:<V_double_width>
1279 (zero_extend:<V_double_width>
1280 (match_operand:VQI 1 "s_register_operand" ""))
1281 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1284 machine_mode mode = GET_MODE (operands[1]);
1287 p1 = arm_simd_vect_par_cnst_half (mode, false);
1288 p2 = arm_simd_vect_par_cnst_half (mode, true);
1290 if (operands[0] != operands[2])
1291 emit_move_insn (operands[0], operands[2]);
1293 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1297 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1305 (define_insn "vec_sel_widen_usum_lo<VQI:mode><VW:mode>3"
1306 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1308 (zero_extend:<VW:V_widen>
1310 (match_operand:VQI 1 "s_register_operand" "%w")
1311 (match_operand:VQI 2 "vect_par_constant_low" "")))
1312 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1315 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1316 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1318 [(set_attr "type" "neon_add_widen")])
1320 (define_insn "vec_sel_widen_usum_hi<VQI:mode><VW:mode>3"
1321 [(set (match_operand:<VW:V_widen> 0 "s_register_operand" "=w")
1323 (zero_extend:<VW:V_widen>
1324 (vec_select:VW (match_operand:VQI 1 "s_register_operand" "%w")
1325 (match_operand:VQI 2 "vect_par_constant_high" "")))
1326 (match_operand:<VW:V_widen> 3 "s_register_operand" "0")))]
1329 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1330 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1332 [(set_attr "type" "neon_add_widen")])
1334 (define_insn "widen_usum<mode>3"
1335 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1336 (plus:<V_widen> (zero_extend:<V_widen>
1337 (match_operand:VW 1 "s_register_operand" "%w"))
1338 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1340 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1341 [(set_attr "type" "neon_add_widen")]
1344 ;; Helpers for quad-word reduction operations
1346 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1347 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1348 ; N/2-element vector.
1350 (define_insn "quad_halves_<code>v4si"
1351 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1353 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1354 (parallel [(const_int 0) (const_int 1)]))
1355 (vec_select:V2SI (match_dup 1)
1356 (parallel [(const_int 2) (const_int 3)]))))]
1358 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1359 [(set_attr "vqh_mnem" "<VQH_mnem>")
1360 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1363 (define_insn "quad_halves_<code>v4sf"
1364 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1366 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1367 (parallel [(const_int 0) (const_int 1)]))
1368 (vec_select:V2SF (match_dup 1)
1369 (parallel [(const_int 2) (const_int 3)]))))]
1370 "TARGET_NEON && flag_unsafe_math_optimizations"
1371 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1372 [(set_attr "vqh_mnem" "<VQH_mnem>")
1373 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1376 (define_insn "quad_halves_<code>v8hi"
1377 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1379 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1380 (parallel [(const_int 0) (const_int 1)
1381 (const_int 2) (const_int 3)]))
1382 (vec_select:V4HI (match_dup 1)
1383 (parallel [(const_int 4) (const_int 5)
1384 (const_int 6) (const_int 7)]))))]
1386 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1387 [(set_attr "vqh_mnem" "<VQH_mnem>")
1388 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1391 (define_insn "quad_halves_<code>v16qi"
1392 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1394 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1395 (parallel [(const_int 0) (const_int 1)
1396 (const_int 2) (const_int 3)
1397 (const_int 4) (const_int 5)
1398 (const_int 6) (const_int 7)]))
1399 (vec_select:V8QI (match_dup 1)
1400 (parallel [(const_int 8) (const_int 9)
1401 (const_int 10) (const_int 11)
1402 (const_int 12) (const_int 13)
1403 (const_int 14) (const_int 15)]))))]
1405 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1406 [(set_attr "vqh_mnem" "<VQH_mnem>")
1407 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1410 (define_expand "move_hi_quad_<mode>"
1411 [(match_operand:ANY128 0 "s_register_operand" "")
1412 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1415 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1416 GET_MODE_SIZE (<V_HALF>mode)),
1421 (define_expand "move_lo_quad_<mode>"
1422 [(match_operand:ANY128 0 "s_register_operand" "")
1423 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1426 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1432 ;; Reduction operations
1434 (define_expand "reduc_plus_scal_<mode>"
1435 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1436 (match_operand:VD 1 "s_register_operand" "")]
1437 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1439 rtx vec = gen_reg_rtx (<MODE>mode);
1440 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1441 &gen_neon_vpadd_internal<mode>);
1442 /* The same result is actually computed into every element. */
1443 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1447 (define_expand "reduc_plus_scal_<mode>"
1448 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1449 (match_operand:VQ 1 "s_register_operand" "")]
1450 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1451 && !BYTES_BIG_ENDIAN"
1453 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1455 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1456 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1461 (define_expand "reduc_plus_scal_v2di"
1462 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1463 (match_operand:V2DI 1 "s_register_operand" "")]
1464 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1466 rtx vec = gen_reg_rtx (V2DImode);
1468 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1469 emit_insn (gen_vec_extractv2di (operands[0], vec, const0_rtx));
1474 (define_insn "arm_reduc_plus_internal_v2di"
1475 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1476 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1478 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1479 "vadd.i64\t%e0, %e1, %f1"
1480 [(set_attr "type" "neon_add_q")]
1483 (define_expand "reduc_smin_scal_<mode>"
1484 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1485 (match_operand:VD 1 "s_register_operand" "")]
1486 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1488 rtx vec = gen_reg_rtx (<MODE>mode);
1490 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1491 &gen_neon_vpsmin<mode>);
1492 /* The result is computed into every element of the vector. */
1493 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1497 (define_expand "reduc_smin_scal_<mode>"
1498 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1499 (match_operand:VQ 1 "s_register_operand" "")]
1500 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1501 && !BYTES_BIG_ENDIAN"
1503 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1505 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1506 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1511 (define_expand "reduc_smax_scal_<mode>"
1512 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1513 (match_operand:VD 1 "s_register_operand" "")]
1514 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1516 rtx vec = gen_reg_rtx (<MODE>mode);
1517 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1518 &gen_neon_vpsmax<mode>);
1519 /* The result is computed into every element of the vector. */
1520 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1524 (define_expand "reduc_smax_scal_<mode>"
1525 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1526 (match_operand:VQ 1 "s_register_operand" "")]
1527 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1528 && !BYTES_BIG_ENDIAN"
1530 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1532 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1533 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1538 (define_expand "reduc_umin_scal_<mode>"
1539 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1540 (match_operand:VDI 1 "s_register_operand" "")]
1543 rtx vec = gen_reg_rtx (<MODE>mode);
1544 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1545 &gen_neon_vpumin<mode>);
1546 /* The result is computed into every element of the vector. */
1547 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1551 (define_expand "reduc_umin_scal_<mode>"
1552 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1553 (match_operand:VQI 1 "s_register_operand" "")]
1554 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1556 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1558 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1559 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1564 (define_expand "reduc_umax_scal_<mode>"
1565 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1566 (match_operand:VDI 1 "s_register_operand" "")]
1569 rtx vec = gen_reg_rtx (<MODE>mode);
1570 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1571 &gen_neon_vpumax<mode>);
1572 /* The result is computed into every element of the vector. */
1573 emit_insn (gen_vec_extract<mode> (operands[0], vec, const0_rtx));
1577 (define_expand "reduc_umax_scal_<mode>"
1578 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1579 (match_operand:VQI 1 "s_register_operand" "")]
1580 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1582 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1584 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1585 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1590 (define_insn "neon_vpadd_internal<mode>"
1591 [(set (match_operand:VD 0 "s_register_operand" "=w")
1592 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1593 (match_operand:VD 2 "s_register_operand" "w")]
1596 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1597 ;; Assume this schedules like vadd.
1599 (if_then_else (match_test "<Is_float_mode>")
1600 (const_string "neon_fp_reduc_add_s<q>")
1601 (const_string "neon_reduc_add<q>")))]
1604 (define_insn "neon_vpsmin<mode>"
1605 [(set (match_operand:VD 0 "s_register_operand" "=w")
1606 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1607 (match_operand:VD 2 "s_register_operand" "w")]
1610 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1612 (if_then_else (match_test "<Is_float_mode>")
1613 (const_string "neon_fp_reduc_minmax_s<q>")
1614 (const_string "neon_reduc_minmax<q>")))]
1617 (define_insn "neon_vpsmax<mode>"
1618 [(set (match_operand:VD 0 "s_register_operand" "=w")
1619 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1620 (match_operand:VD 2 "s_register_operand" "w")]
1623 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1625 (if_then_else (match_test "<Is_float_mode>")
1626 (const_string "neon_fp_reduc_minmax_s<q>")
1627 (const_string "neon_reduc_minmax<q>")))]
1630 (define_insn "neon_vpumin<mode>"
1631 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1632 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1633 (match_operand:VDI 2 "s_register_operand" "w")]
1636 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1637 [(set_attr "type" "neon_reduc_minmax<q>")]
1640 (define_insn "neon_vpumax<mode>"
1641 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1642 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1643 (match_operand:VDI 2 "s_register_operand" "w")]
1646 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1647 [(set_attr "type" "neon_reduc_minmax<q>")]
1650 ;; Saturating arithmetic
1652 ; NOTE: Neon supports many more saturating variants of instructions than the
1653 ; following, but these are all GCC currently understands.
1654 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1655 ; yet either, although these patterns may be used by intrinsics when they're
1658 (define_insn "*ss_add<mode>_neon"
1659 [(set (match_operand:VD 0 "s_register_operand" "=w")
1660 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1661 (match_operand:VD 2 "s_register_operand" "w")))]
1663 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1664 [(set_attr "type" "neon_qadd<q>")]
1667 (define_insn "*us_add<mode>_neon"
1668 [(set (match_operand:VD 0 "s_register_operand" "=w")
1669 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1670 (match_operand:VD 2 "s_register_operand" "w")))]
1672 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1673 [(set_attr "type" "neon_qadd<q>")]
1676 (define_insn "*ss_sub<mode>_neon"
1677 [(set (match_operand:VD 0 "s_register_operand" "=w")
1678 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1679 (match_operand:VD 2 "s_register_operand" "w")))]
1681 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1682 [(set_attr "type" "neon_qsub<q>")]
1685 (define_insn "*us_sub<mode>_neon"
1686 [(set (match_operand:VD 0 "s_register_operand" "=w")
1687 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1688 (match_operand:VD 2 "s_register_operand" "w")))]
1690 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1691 [(set_attr "type" "neon_qsub<q>")]
1694 ;; Conditional instructions. These are comparisons with conditional moves for
1695 ;; vectors. They perform the assignment:
1697 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1699 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1702 (define_expand "vcond<mode><mode>"
1703 [(set (match_operand:VDQW 0 "s_register_operand" "")
1705 (match_operator 3 "comparison_operator"
1706 [(match_operand:VDQW 4 "s_register_operand" "")
1707 (match_operand:VDQW 5 "nonmemory_operand" "")])
1708 (match_operand:VDQW 1 "s_register_operand" "")
1709 (match_operand:VDQW 2 "s_register_operand" "")))]
1710 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1713 int use_zero_form = 0;
1714 int swap_bsl_operands = 0;
1715 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1716 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1718 rtx (*base_comparison) (rtx, rtx, rtx);
1719 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1721 switch (GET_CODE (operands[3]))
1728 if (operands[5] == CONST0_RTX (<MODE>mode))
1735 if (!REG_P (operands[5]))
1736 operands[5] = force_reg (<MODE>mode, operands[5]);
1739 switch (GET_CODE (operands[3]))
1749 base_comparison = gen_neon_vcge<mode>;
1750 complimentary_comparison = gen_neon_vcgt<mode>;
1758 base_comparison = gen_neon_vcgt<mode>;
1759 complimentary_comparison = gen_neon_vcge<mode>;
1764 base_comparison = gen_neon_vceq<mode>;
1765 complimentary_comparison = gen_neon_vceq<mode>;
1771 switch (GET_CODE (operands[3]))
1778 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1779 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1785 Note that there also exist direct comparison against 0 forms,
1786 so catch those as a special case. */
1790 switch (GET_CODE (operands[3]))
1793 base_comparison = gen_neon_vclt<mode>;
1796 base_comparison = gen_neon_vcle<mode>;
1799 /* Do nothing, other zero form cases already have the correct
1806 emit_insn (base_comparison (mask, operands[4], operands[5]));
1808 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1815 /* Vector compare returns false for lanes which are unordered, so if we use
1816 the inverse of the comparison we actually want to emit, then
1817 swap the operands to BSL, we will end up with the correct result.
1818 Note that a NE NaN and NaN NE b are true for all a, b.
1820 Our transformations are:
1825 a NE b -> !(a EQ b) */
1828 emit_insn (base_comparison (mask, operands[4], operands[5]));
1830 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1832 swap_bsl_operands = 1;
1835 /* We check (a > b || b > a). combining these comparisons give us
1836 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1837 will then give us (a == b || a UNORDERED b) as intended. */
1839 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1840 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1841 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1842 swap_bsl_operands = 1;
1845 /* Operands are ORDERED iff (a > b || b >= a).
1846 Swapping the operands to BSL will give the UNORDERED case. */
1847 swap_bsl_operands = 1;
1850 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1851 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1852 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1858 if (swap_bsl_operands)
1859 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1862 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1867 (define_expand "vcondu<mode><mode>"
1868 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1870 (match_operator 3 "arm_comparison_operator"
1871 [(match_operand:VDQIW 4 "s_register_operand" "")
1872 (match_operand:VDQIW 5 "s_register_operand" "")])
1873 (match_operand:VDQIW 1 "s_register_operand" "")
1874 (match_operand:VDQIW 2 "s_register_operand" "")))]
1878 int inverse = 0, immediate_zero = 0;
1880 mask = gen_reg_rtx (<V_cmp_result>mode);
1882 if (operands[5] == CONST0_RTX (<MODE>mode))
1884 else if (!REG_P (operands[5]))
1885 operands[5] = force_reg (<MODE>mode, operands[5]);
1887 switch (GET_CODE (operands[3]))
1890 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1894 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1898 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1903 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1905 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1910 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1912 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1916 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1925 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1928 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1934 ;; Patterns for builtins.
1936 ; good for plain vadd, vaddq.
1938 (define_expand "neon_vadd<mode>"
1939 [(match_operand:VCVTF 0 "s_register_operand" "=w")
1940 (match_operand:VCVTF 1 "s_register_operand" "w")
1941 (match_operand:VCVTF 2 "s_register_operand" "w")]
1944 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1945 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1947 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1952 ; Note that NEON operations don't support the full IEEE 754 standard: in
1953 ; particular, denormal values are flushed to zero. This means that GCC cannot
1954 ; use those instructions for autovectorization, etc. unless
1955 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1956 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
1957 ; header) must work in either case: if -funsafe-math-optimizations is given,
1958 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1959 ; expand to unspecs (which may potentially limit the extent to which they might
1960 ; be optimized by generic code).
1962 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1964 (define_insn "neon_vadd<mode>_unspec"
1965 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1966 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1967 (match_operand:VCVTF 2 "s_register_operand" "w")]
1970 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1972 (if_then_else (match_test "<Is_float_mode>")
1973 (const_string "neon_fp_addsub_s<q>")
1974 (const_string "neon_add<q>")))]
1977 (define_insn "neon_vaddl<sup><mode>"
1978 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1979 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1980 (match_operand:VDI 2 "s_register_operand" "w")]
1983 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1984 [(set_attr "type" "neon_add_long")]
1987 (define_insn "neon_vaddw<sup><mode>"
1988 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1989 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1990 (match_operand:VDI 2 "s_register_operand" "w")]
1993 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1994 [(set_attr "type" "neon_add_widen")]
1999 (define_insn "neon_v<r>hadd<sup><mode>"
2000 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2001 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2002 (match_operand:VDQIW 2 "s_register_operand" "w")]
2005 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2006 [(set_attr "type" "neon_add_halve_q")]
2009 (define_insn "neon_vqadd<sup><mode>"
2010 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2011 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2012 (match_operand:VDQIX 2 "s_register_operand" "w")]
2015 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2016 [(set_attr "type" "neon_qadd<q>")]
2019 (define_insn "neon_v<r>addhn<mode>"
2020 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2021 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2022 (match_operand:VN 2 "s_register_operand" "w")]
2025 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2026 [(set_attr "type" "neon_add_halve_narrow_q")]
2029 ;; Polynomial and Float multiplication.
2030 (define_insn "neon_vmul<pf><mode>"
2031 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2032 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2033 (match_operand:VPF 2 "s_register_operand" "w")]
2036 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2038 (if_then_else (match_test "<Is_float_mode>")
2039 (const_string "neon_fp_mul_s<q>")
2040 (const_string "neon_mul_<V_elem_ch><q>")))]
2043 (define_expand "neon_vmla<mode>"
2044 [(match_operand:VDQW 0 "s_register_operand" "=w")
2045 (match_operand:VDQW 1 "s_register_operand" "0")
2046 (match_operand:VDQW 2 "s_register_operand" "w")
2047 (match_operand:VDQW 3 "s_register_operand" "w")]
2050 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2051 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2052 operands[2], operands[3]));
2054 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2055 operands[2], operands[3]));
2059 (define_expand "neon_vfma<VCVTF:mode>"
2060 [(match_operand:VCVTF 0 "s_register_operand")
2061 (match_operand:VCVTF 1 "s_register_operand")
2062 (match_operand:VCVTF 2 "s_register_operand")
2063 (match_operand:VCVTF 3 "s_register_operand")]
2064 "TARGET_NEON && TARGET_FMA"
2066 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2071 (define_expand "neon_vfms<VCVTF:mode>"
2072 [(match_operand:VCVTF 0 "s_register_operand")
2073 (match_operand:VCVTF 1 "s_register_operand")
2074 (match_operand:VCVTF 2 "s_register_operand")
2075 (match_operand:VCVTF 3 "s_register_operand")]
2076 "TARGET_NEON && TARGET_FMA"
2078 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2083 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2085 (define_insn "neon_vmla<mode>_unspec"
2086 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2087 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2088 (match_operand:VDQW 2 "s_register_operand" "w")
2089 (match_operand:VDQW 3 "s_register_operand" "w")]
2092 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2094 (if_then_else (match_test "<Is_float_mode>")
2095 (const_string "neon_fp_mla_s<q>")
2096 (const_string "neon_mla_<V_elem_ch><q>")))]
2099 (define_insn "neon_vmlal<sup><mode>"
2100 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2101 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2102 (match_operand:VW 2 "s_register_operand" "w")
2103 (match_operand:VW 3 "s_register_operand" "w")]
2106 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2107 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2110 (define_expand "neon_vmls<mode>"
2111 [(match_operand:VDQW 0 "s_register_operand" "=w")
2112 (match_operand:VDQW 1 "s_register_operand" "0")
2113 (match_operand:VDQW 2 "s_register_operand" "w")
2114 (match_operand:VDQW 3 "s_register_operand" "w")]
2117 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2118 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2119 operands[1], operands[2], operands[3]));
2121 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2122 operands[2], operands[3]));
2126 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2128 (define_insn "neon_vmls<mode>_unspec"
2129 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2130 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2131 (match_operand:VDQW 2 "s_register_operand" "w")
2132 (match_operand:VDQW 3 "s_register_operand" "w")]
2135 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2137 (if_then_else (match_test "<Is_float_mode>")
2138 (const_string "neon_fp_mla_s<q>")
2139 (const_string "neon_mla_<V_elem_ch><q>")))]
2142 (define_insn "neon_vmlsl<sup><mode>"
2143 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2144 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2145 (match_operand:VW 2 "s_register_operand" "w")
2146 (match_operand:VW 3 "s_register_operand" "w")]
2149 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2150 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2153 ;; vqdmulh, vqrdmulh
2154 (define_insn "neon_vq<r>dmulh<mode>"
2155 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2156 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2157 (match_operand:VMDQI 2 "s_register_operand" "w")]
2160 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2161 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2164 ;; vqrdmlah, vqrdmlsh
2165 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2166 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2167 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2168 (match_operand:VMDQI 2 "s_register_operand" "w")
2169 (match_operand:VMDQI 3 "s_register_operand" "w")]
2172 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2173 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2176 (define_insn "neon_vqdmlal<mode>"
2177 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2178 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2179 (match_operand:VMDI 2 "s_register_operand" "w")
2180 (match_operand:VMDI 3 "s_register_operand" "w")]
2183 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2184 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2187 (define_insn "neon_vqdmlsl<mode>"
2188 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2189 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2190 (match_operand:VMDI 2 "s_register_operand" "w")
2191 (match_operand:VMDI 3 "s_register_operand" "w")]
2194 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2195 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2198 (define_insn "neon_vmull<sup><mode>"
2199 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2200 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2201 (match_operand:VW 2 "s_register_operand" "w")]
2204 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2205 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2208 (define_insn "neon_vqdmull<mode>"
2209 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2210 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2211 (match_operand:VMDI 2 "s_register_operand" "w")]
2214 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2215 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2218 (define_expand "neon_vsub<mode>"
2219 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2220 (match_operand:VCVTF 1 "s_register_operand" "w")
2221 (match_operand:VCVTF 2 "s_register_operand" "w")]
2224 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2225 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2227 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2232 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2234 (define_insn "neon_vsub<mode>_unspec"
2235 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2236 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2237 (match_operand:VCVTF 2 "s_register_operand" "w")]
2240 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2242 (if_then_else (match_test "<Is_float_mode>")
2243 (const_string "neon_fp_addsub_s<q>")
2244 (const_string "neon_sub<q>")))]
2247 (define_insn "neon_vsubl<sup><mode>"
2248 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2249 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2250 (match_operand:VDI 2 "s_register_operand" "w")]
2253 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2254 [(set_attr "type" "neon_sub_long")]
2257 (define_insn "neon_vsubw<sup><mode>"
2258 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2259 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2260 (match_operand:VDI 2 "s_register_operand" "w")]
2263 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2264 [(set_attr "type" "neon_sub_widen")]
2267 (define_insn "neon_vqsub<sup><mode>"
2268 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2269 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2270 (match_operand:VDQIX 2 "s_register_operand" "w")]
2273 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2274 [(set_attr "type" "neon_qsub<q>")]
2277 (define_insn "neon_vhsub<sup><mode>"
2278 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2279 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2280 (match_operand:VDQIW 2 "s_register_operand" "w")]
2283 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2284 [(set_attr "type" "neon_sub_halve<q>")]
2287 (define_insn "neon_v<r>subhn<mode>"
2288 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2289 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2290 (match_operand:VN 2 "s_register_operand" "w")]
2293 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2294 [(set_attr "type" "neon_sub_halve_narrow_q")]
2297 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2298 ;; without unsafe math optimizations.
2299 (define_expand "neon_vc<cmp_op><mode>"
2300 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2302 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2303 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2306 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2308 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2309 && !flag_unsafe_math_optimizations)
2311 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2312 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2313 whereas this expander iterates over the integer modes as well,
2314 but we will never expand to UNSPECs for the integer comparisons. */
2318 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2323 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2332 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2339 (define_insn "neon_vc<cmp_op><mode>_insn"
2340 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2342 (COMPARISONS:<V_cmp_result>
2343 (match_operand:VDQW 1 "s_register_operand" "w,w")
2344 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2345 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2346 && !flag_unsafe_math_optimizations)"
2349 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2351 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2352 ? "f" : "<cmp_type>",
2353 which_alternative == 0
2354 ? "%<V_reg>2" : "#0");
2355 output_asm_insn (pattern, operands);
2359 (if_then_else (match_operand 2 "zero_operand")
2360 (const_string "neon_compare_zero<q>")
2361 (const_string "neon_compare<q>")))]
2364 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2365 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2366 (unspec:<V_cmp_result>
2367 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2368 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2373 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2375 which_alternative == 0
2376 ? "%<V_reg>2" : "#0");
2377 output_asm_insn (pattern, operands);
2380 [(set_attr "type" "neon_fp_compare_s<q>")]
2383 (define_insn "neon_vc<cmp_op>u<mode>"
2384 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2386 (GTUGEU:<V_cmp_result>
2387 (match_operand:VDQIW 1 "s_register_operand" "w")
2388 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2390 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2391 [(set_attr "type" "neon_compare<q>")]
2394 (define_expand "neon_vca<cmp_op><mode>"
2395 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2397 (GTGE:<V_cmp_result>
2398 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2399 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2402 if (flag_unsafe_math_optimizations)
2403 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2406 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2413 (define_insn "neon_vca<cmp_op><mode>_insn"
2414 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2416 (GTGE:<V_cmp_result>
2417 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2418 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2419 "TARGET_NEON && flag_unsafe_math_optimizations"
2420 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2421 [(set_attr "type" "neon_fp_compare_s<q>")]
2424 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2425 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2426 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2427 (match_operand:VCVTF 2 "s_register_operand" "w")]
2430 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2431 [(set_attr "type" "neon_fp_compare_s<q>")]
2434 (define_insn "neon_vtst<mode>"
2435 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2436 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2437 (match_operand:VDQIW 2 "s_register_operand" "w")]
2440 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2441 [(set_attr "type" "neon_tst<q>")]
2444 (define_insn "neon_vabd<sup><mode>"
2445 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2446 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2447 (match_operand:VDQIW 2 "s_register_operand" "w")]
2450 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2451 [(set_attr "type" "neon_abd<q>")]
2454 (define_insn "neon_vabdf<mode>"
2455 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2456 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2457 (match_operand:VCVTF 2 "s_register_operand" "w")]
2460 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2461 [(set_attr "type" "neon_fp_abd_s<q>")]
2464 (define_insn "neon_vabdl<sup><mode>"
2465 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2466 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2467 (match_operand:VW 2 "s_register_operand" "w")]
2470 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2471 [(set_attr "type" "neon_abd_long")]
2474 (define_insn "neon_vaba<sup><mode>"
2475 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2476 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
2477 (match_operand:VDQIW 3 "s_register_operand" "w")]
2479 (match_operand:VDQIW 1 "s_register_operand" "0")))]
2481 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2482 [(set_attr "type" "neon_arith_acc<q>")]
2485 (define_insn "neon_vabal<sup><mode>"
2486 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2487 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
2488 (match_operand:VW 3 "s_register_operand" "w")]
2490 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
2492 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2493 [(set_attr "type" "neon_arith_acc<q>")]
2496 (define_insn "neon_v<maxmin><sup><mode>"
2497 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2498 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2499 (match_operand:VDQIW 2 "s_register_operand" "w")]
2502 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2503 [(set_attr "type" "neon_minmax<q>")]
2506 (define_insn "neon_v<maxmin>f<mode>"
2507 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2508 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2509 (match_operand:VCVTF 2 "s_register_operand" "w")]
2512 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2513 [(set_attr "type" "neon_fp_minmax_s<q>")]
2516 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
2517 (define_insn "<fmaxmin><mode>3"
2518 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2519 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2520 (match_operand:VCVTF 2 "s_register_operand" "w")]
2522 "TARGET_NEON && TARGET_FPU_ARMV8"
2523 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2524 [(set_attr "type" "neon_fp_minmax_s<q>")]
2527 (define_expand "neon_vpadd<mode>"
2528 [(match_operand:VD 0 "s_register_operand" "=w")
2529 (match_operand:VD 1 "s_register_operand" "w")
2530 (match_operand:VD 2 "s_register_operand" "w")]
2533 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
2538 (define_insn "neon_vpaddl<sup><mode>"
2539 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2540 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
2543 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2544 [(set_attr "type" "neon_reduc_add_long")]
2547 (define_insn "neon_vpadal<sup><mode>"
2548 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
2549 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
2550 (match_operand:VDQIW 2 "s_register_operand" "w")]
2553 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
2554 [(set_attr "type" "neon_reduc_add_acc")]
2557 (define_insn "neon_vp<maxmin><sup><mode>"
2558 [(set (match_operand:VDI 0 "s_register_operand" "=w")
2559 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
2560 (match_operand:VDI 2 "s_register_operand" "w")]
2563 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2564 [(set_attr "type" "neon_reduc_minmax<q>")]
2567 (define_insn "neon_vp<maxmin>f<mode>"
2568 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2569 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2570 (match_operand:VCVTF 2 "s_register_operand" "w")]
2573 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2574 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
2577 (define_insn "neon_vrecps<mode>"
2578 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2579 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2580 (match_operand:VCVTF 2 "s_register_operand" "w")]
2583 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2584 [(set_attr "type" "neon_fp_recps_s<q>")]
2587 (define_insn "neon_vrsqrts<mode>"
2588 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2589 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2590 (match_operand:VCVTF 2 "s_register_operand" "w")]
2593 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2594 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
2597 (define_expand "neon_vabs<mode>"
2598 [(match_operand:VDQW 0 "s_register_operand" "")
2599 (match_operand:VDQW 1 "s_register_operand" "")]
2602 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
2606 (define_insn "neon_vqabs<mode>"
2607 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2608 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2611 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2612 [(set_attr "type" "neon_qabs<q>")]
2615 (define_insn "neon_bswap<mode>"
2616 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
2617 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
2619 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
2620 [(set_attr "type" "neon_rev<q>")]
2623 (define_expand "neon_vneg<mode>"
2624 [(match_operand:VDQW 0 "s_register_operand" "")
2625 (match_operand:VDQW 1 "s_register_operand" "")]
2628 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
2632 (define_expand "neon_copysignf<mode>"
2633 [(match_operand:VCVTF 0 "register_operand")
2634 (match_operand:VCVTF 1 "register_operand")
2635 (match_operand:VCVTF 2 "register_operand")]
2639 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
2640 int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
2641 rtvec v = rtvec_alloc (n_elt);
2643 /* Create bitmask for vector select. */
2644 for (i = 0; i < n_elt; ++i)
2645 RTVEC_ELT (v, i) = GEN_INT (0x80000000);
2647 emit_move_insn (v_bitmask,
2648 gen_rtx_CONST_VECTOR (<VCVTF:V_cmp_result>mode, v));
2649 emit_move_insn (operands[0], operands[2]);
2650 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
2651 <VCVTF:V_cmp_result>mode, 0);
2652 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
2659 (define_insn "neon_vqneg<mode>"
2660 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2661 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2664 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2665 [(set_attr "type" "neon_qneg<q>")]
2668 (define_insn "neon_vcls<mode>"
2669 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2670 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
2673 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
2674 [(set_attr "type" "neon_cls<q>")]
2677 (define_insn "clz<mode>2"
2678 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2679 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
2681 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
2682 [(set_attr "type" "neon_cnt<q>")]
2685 (define_expand "neon_vclz<mode>"
2686 [(match_operand:VDQIW 0 "s_register_operand" "")
2687 (match_operand:VDQIW 1 "s_register_operand" "")]
2690 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
2694 (define_insn "popcount<mode>2"
2695 [(set (match_operand:VE 0 "s_register_operand" "=w")
2696 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
2698 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
2699 [(set_attr "type" "neon_cnt<q>")]
2702 (define_expand "neon_vcnt<mode>"
2703 [(match_operand:VE 0 "s_register_operand" "=w")
2704 (match_operand:VE 1 "s_register_operand" "w")]
2707 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
2711 (define_insn "neon_vrecpe<mode>"
2712 [(set (match_operand:V32 0 "s_register_operand" "=w")
2713 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2716 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2717 [(set_attr "type" "neon_fp_recpe_s<q>")]
2720 (define_insn "neon_vrsqrte<mode>"
2721 [(set (match_operand:V32 0 "s_register_operand" "=w")
2722 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
2725 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
2726 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
2729 (define_expand "neon_vmvn<mode>"
2730 [(match_operand:VDQIW 0 "s_register_operand" "")
2731 (match_operand:VDQIW 1 "s_register_operand" "")]
2734 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
2738 (define_insn "neon_vget_lane<mode>_sext_internal"
2739 [(set (match_operand:SI 0 "s_register_operand" "=r")
2741 (vec_select:<V_elem>
2742 (match_operand:VD 1 "s_register_operand" "w")
2743 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2746 if (BYTES_BIG_ENDIAN)
2748 int elt = INTVAL (operands[2]);
2749 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2750 operands[2] = GEN_INT (elt);
2752 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
2754 [(set_attr "type" "neon_to_gp")]
2757 (define_insn "neon_vget_lane<mode>_zext_internal"
2758 [(set (match_operand:SI 0 "s_register_operand" "=r")
2760 (vec_select:<V_elem>
2761 (match_operand:VD 1 "s_register_operand" "w")
2762 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2765 if (BYTES_BIG_ENDIAN)
2767 int elt = INTVAL (operands[2]);
2768 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
2769 operands[2] = GEN_INT (elt);
2771 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
2773 [(set_attr "type" "neon_to_gp")]
2776 (define_insn "neon_vget_lane<mode>_sext_internal"
2777 [(set (match_operand:SI 0 "s_register_operand" "=r")
2779 (vec_select:<V_elem>
2780 (match_operand:VQ2 1 "s_register_operand" "w")
2781 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2785 int regno = REGNO (operands[1]);
2786 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2787 unsigned int elt = INTVAL (operands[2]);
2788 unsigned int elt_adj = elt % halfelts;
2790 if (BYTES_BIG_ENDIAN)
2791 elt_adj = halfelts - 1 - elt_adj;
2793 ops[0] = operands[0];
2794 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2795 ops[2] = GEN_INT (elt_adj);
2796 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
2800 [(set_attr "type" "neon_to_gp_q")]
2803 (define_insn "neon_vget_lane<mode>_zext_internal"
2804 [(set (match_operand:SI 0 "s_register_operand" "=r")
2806 (vec_select:<V_elem>
2807 (match_operand:VQ2 1 "s_register_operand" "w")
2808 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2812 int regno = REGNO (operands[1]);
2813 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
2814 unsigned int elt = INTVAL (operands[2]);
2815 unsigned int elt_adj = elt % halfelts;
2817 if (BYTES_BIG_ENDIAN)
2818 elt_adj = halfelts - 1 - elt_adj;
2820 ops[0] = operands[0];
2821 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
2822 ops[2] = GEN_INT (elt_adj);
2823 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
2827 [(set_attr "type" "neon_to_gp_q")]
2830 (define_expand "neon_vget_lane<mode>"
2831 [(match_operand:<V_ext> 0 "s_register_operand" "")
2832 (match_operand:VDQW 1 "s_register_operand" "")
2833 (match_operand:SI 2 "immediate_operand" "")]
2836 if (BYTES_BIG_ENDIAN)
2838 /* The intrinsics are defined in terms of a model where the
2839 element ordering in memory is vldm order, whereas the generic
2840 RTL is defined in terms of a model where the element ordering
2841 in memory is array order. Convert the lane number to conform
2843 unsigned int elt = INTVAL (operands[2]);
2844 unsigned int reg_nelts
2845 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2846 elt ^= reg_nelts - 1;
2847 operands[2] = GEN_INT (elt);
2850 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2851 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2853 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
2859 (define_expand "neon_vget_laneu<mode>"
2860 [(match_operand:<V_ext> 0 "s_register_operand" "")
2861 (match_operand:VDQIW 1 "s_register_operand" "")
2862 (match_operand:SI 2 "immediate_operand" "")]
2865 if (BYTES_BIG_ENDIAN)
2867 /* The intrinsics are defined in terms of a model where the
2868 element ordering in memory is vldm order, whereas the generic
2869 RTL is defined in terms of a model where the element ordering
2870 in memory is array order. Convert the lane number to conform
2872 unsigned int elt = INTVAL (operands[2]);
2873 unsigned int reg_nelts
2874 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2875 elt ^= reg_nelts - 1;
2876 operands[2] = GEN_INT (elt);
2879 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
2880 emit_insn (gen_vec_extract<mode> (operands[0], operands[1], operands[2]));
2882 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
2888 (define_expand "neon_vget_lanedi"
2889 [(match_operand:DI 0 "s_register_operand" "=r")
2890 (match_operand:DI 1 "s_register_operand" "w")
2891 (match_operand:SI 2 "immediate_operand" "")]
2894 emit_move_insn (operands[0], operands[1]);
2898 (define_expand "neon_vget_lanev2di"
2899 [(match_operand:DI 0 "s_register_operand" "")
2900 (match_operand:V2DI 1 "s_register_operand" "")
2901 (match_operand:SI 2 "immediate_operand" "")]
2906 if (BYTES_BIG_ENDIAN)
2908 /* The intrinsics are defined in terms of a model where the
2909 element ordering in memory is vldm order, whereas the generic
2910 RTL is defined in terms of a model where the element ordering
2911 in memory is array order. Convert the lane number to conform
2913 unsigned int elt = INTVAL (operands[2]);
2914 unsigned int reg_nelts = 2;
2915 elt ^= reg_nelts - 1;
2916 operands[2] = GEN_INT (elt);
2919 lane = INTVAL (operands[2]);
2920 gcc_assert ((lane ==0) || (lane == 1));
2921 emit_move_insn (operands[0], lane == 0
2922 ? gen_lowpart (DImode, operands[1])
2923 : gen_highpart (DImode, operands[1]));
2927 (define_expand "neon_vset_lane<mode>"
2928 [(match_operand:VDQ 0 "s_register_operand" "=w")
2929 (match_operand:<V_elem> 1 "s_register_operand" "r")
2930 (match_operand:VDQ 2 "s_register_operand" "0")
2931 (match_operand:SI 3 "immediate_operand" "i")]
2934 unsigned int elt = INTVAL (operands[3]);
2936 if (BYTES_BIG_ENDIAN)
2938 unsigned int reg_nelts
2939 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
2940 elt ^= reg_nelts - 1;
2943 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
2944 GEN_INT (1 << elt), operands[2]));
2948 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
2950 (define_expand "neon_vset_lanedi"
2951 [(match_operand:DI 0 "s_register_operand" "=w")
2952 (match_operand:DI 1 "s_register_operand" "r")
2953 (match_operand:DI 2 "s_register_operand" "0")
2954 (match_operand:SI 3 "immediate_operand" "i")]
2957 emit_move_insn (operands[0], operands[1]);
2961 (define_expand "neon_vcreate<mode>"
2962 [(match_operand:VD_RE 0 "s_register_operand" "")
2963 (match_operand:DI 1 "general_operand" "")]
2966 rtx src = gen_lowpart (<MODE>mode, operands[1]);
2967 emit_move_insn (operands[0], src);
2971 (define_insn "neon_vdup_n<mode>"
2972 [(set (match_operand:VX 0 "s_register_operand" "=w")
2973 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
2975 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
2976 [(set_attr "type" "neon_from_gp<q>")]
2979 (define_insn "neon_vdup_nv4hf"
2980 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
2981 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
2984 [(set_attr "type" "neon_from_gp")]
2987 (define_insn "neon_vdup_nv8hf"
2988 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
2989 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
2992 [(set_attr "type" "neon_from_gp_q")]
2995 (define_insn "neon_vdup_n<mode>"
2996 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
2997 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3000 vdup.<V_sz_elem>\t%<V_reg>0, %1
3001 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3002 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3005 (define_expand "neon_vdup_ndi"
3006 [(match_operand:DI 0 "s_register_operand" "=w")
3007 (match_operand:DI 1 "s_register_operand" "r")]
3010 emit_move_insn (operands[0], operands[1]);
3015 (define_insn "neon_vdup_nv2di"
3016 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3017 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3020 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3021 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3022 [(set_attr "length" "8")
3023 (set_attr "type" "multiple")]
3026 (define_insn "neon_vdup_lane<mode>_internal"
3027 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3029 (vec_select:<V_elem>
3030 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3031 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3034 if (BYTES_BIG_ENDIAN)
3036 int elt = INTVAL (operands[2]);
3037 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3038 operands[2] = GEN_INT (elt);
3041 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3043 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3045 [(set_attr "type" "neon_dup<q>")]
3048 (define_expand "neon_vdup_lane<mode>"
3049 [(match_operand:VDQW 0 "s_register_operand" "=w")
3050 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3051 (match_operand:SI 2 "immediate_operand" "i")]
3054 if (BYTES_BIG_ENDIAN)
3056 unsigned int elt = INTVAL (operands[2]);
3057 unsigned int reg_nelts
3058 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3059 elt ^= reg_nelts - 1;
3060 operands[2] = GEN_INT (elt);
3062 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3067 ; Scalar index is ignored, since only zero is valid here.
3068 (define_expand "neon_vdup_lanedi"
3069 [(match_operand:DI 0 "s_register_operand" "=w")
3070 (match_operand:DI 1 "s_register_operand" "w")
3071 (match_operand:SI 2 "immediate_operand" "i")]
3074 emit_move_insn (operands[0], operands[1]);
3078 ; Likewise for v2di, as the DImode second operand has only a single element.
3079 (define_expand "neon_vdup_lanev2di"
3080 [(match_operand:V2DI 0 "s_register_operand" "=w")
3081 (match_operand:DI 1 "s_register_operand" "w")
3082 (match_operand:SI 2 "immediate_operand" "i")]
3085 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3089 ; Disabled before reload because we don't want combine doing something silly,
3090 ; but used by the post-reload expansion of neon_vcombine.
3091 (define_insn "*neon_vswp<mode>"
3092 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3093 (match_operand:VDQX 1 "s_register_operand" "+w"))
3094 (set (match_dup 1) (match_dup 0))]
3095 "TARGET_NEON && reload_completed"
3096 "vswp\t%<V_reg>0, %<V_reg>1"
3097 [(set_attr "type" "neon_permute<q>")]
3100 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3102 ;; FIXME: A different implementation of this builtin could make it much
3103 ;; more likely that we wouldn't actually need to output anything (we could make
3104 ;; it so that the reg allocator puts things in the right places magically
3105 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3107 (define_insn_and_split "neon_vcombine<mode>"
3108 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3109 (vec_concat:<V_DOUBLE>
3110 (match_operand:VDX 1 "s_register_operand" "w")
3111 (match_operand:VDX 2 "s_register_operand" "w")))]
3114 "&& reload_completed"
3117 neon_split_vcombine (operands);
3120 [(set_attr "type" "multiple")]
3123 (define_expand "neon_vget_high<mode>"
3124 [(match_operand:<V_HALF> 0 "s_register_operand")
3125 (match_operand:VQX 1 "s_register_operand")]
3128 emit_move_insn (operands[0],
3129 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3130 GET_MODE_SIZE (<V_HALF>mode)));
3134 (define_expand "neon_vget_low<mode>"
3135 [(match_operand:<V_HALF> 0 "s_register_operand")
3136 (match_operand:VQX 1 "s_register_operand")]
3139 emit_move_insn (operands[0],
3140 simplify_gen_subreg (<V_HALF>mode, operands[1],
3145 (define_insn "float<mode><V_cvtto>2"
3146 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3147 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3148 "TARGET_NEON && !flag_rounding_math"
3149 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3150 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3153 (define_insn "floatuns<mode><V_cvtto>2"
3154 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3155 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3156 "TARGET_NEON && !flag_rounding_math"
3157 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3158 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3161 (define_insn "fix_trunc<mode><V_cvtto>2"
3162 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3163 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3165 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3166 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3169 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3170 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3171 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3173 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
3174 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3177 (define_insn "neon_vcvt<sup><mode>"
3178 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3179 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
3182 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
3183 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3186 (define_insn "neon_vcvt<sup><mode>"
3187 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3188 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
3191 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
3192 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3195 (define_insn "neon_vcvtv4sfv4hf"
3196 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
3197 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
3199 "TARGET_NEON && TARGET_FP16"
3200 "vcvt.f32.f16\t%q0, %P1"
3201 [(set_attr "type" "neon_fp_cvt_widen_h")]
3204 (define_insn "neon_vcvtv4hfv4sf"
3205 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3206 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
3208 "TARGET_NEON && TARGET_FP16"
3209 "vcvt.f16.f32\t%P0, %q1"
3210 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
3213 (define_insn "neon_vcvt<sup>_n<mode>"
3214 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3215 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
3216 (match_operand:SI 2 "immediate_operand" "i")]
3220 neon_const_bounds (operands[2], 1, 33);
3221 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
3223 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3226 (define_insn "neon_vcvt<sup>_n<mode>"
3227 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3228 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
3229 (match_operand:SI 2 "immediate_operand" "i")]
3233 neon_const_bounds (operands[2], 1, 33);
3234 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
3236 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3239 (define_insn "neon_vmovn<mode>"
3240 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3241 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3244 "vmovn.<V_if_elem>\t%P0, %q1"
3245 [(set_attr "type" "neon_shift_imm_narrow_q")]
3248 (define_insn "neon_vqmovn<sup><mode>"
3249 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3250 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3253 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
3254 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3257 (define_insn "neon_vqmovun<mode>"
3258 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3259 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
3262 "vqmovun.<V_s_elem>\t%P0, %q1"
3263 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3266 (define_insn "neon_vmovl<sup><mode>"
3267 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3268 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
3271 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
3272 [(set_attr "type" "neon_shift_imm_long")]
3275 (define_insn "neon_vmul_lane<mode>"
3276 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3277 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
3278 (match_operand:VMD 2 "s_register_operand"
3279 "<scalar_mul_constraint>")
3280 (match_operand:SI 3 "immediate_operand" "i")]
3284 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
3287 (if_then_else (match_test "<Is_float_mode>")
3288 (const_string "neon_fp_mul_s_scalar<q>")
3289 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3292 (define_insn "neon_vmul_lane<mode>"
3293 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3294 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
3295 (match_operand:<V_HALF> 2 "s_register_operand"
3296 "<scalar_mul_constraint>")
3297 (match_operand:SI 3 "immediate_operand" "i")]
3301 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
3304 (if_then_else (match_test "<Is_float_mode>")
3305 (const_string "neon_fp_mul_s_scalar<q>")
3306 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
3309 (define_insn "neon_vmull<sup>_lane<mode>"
3310 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3311 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3312 (match_operand:VMDI 2 "s_register_operand"
3313 "<scalar_mul_constraint>")
3314 (match_operand:SI 3 "immediate_operand" "i")]
3318 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
3320 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
3323 (define_insn "neon_vqdmull_lane<mode>"
3324 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3325 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
3326 (match_operand:VMDI 2 "s_register_operand"
3327 "<scalar_mul_constraint>")
3328 (match_operand:SI 3 "immediate_operand" "i")]
3329 UNSPEC_VQDMULL_LANE))]
3332 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
3334 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
3337 (define_insn "neon_vq<r>dmulh_lane<mode>"
3338 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3339 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
3340 (match_operand:<V_HALF> 2 "s_register_operand"
3341 "<scalar_mul_constraint>")
3342 (match_operand:SI 3 "immediate_operand" "i")]
3346 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
3348 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3351 (define_insn "neon_vq<r>dmulh_lane<mode>"
3352 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3353 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
3354 (match_operand:VMDI 2 "s_register_operand"
3355 "<scalar_mul_constraint>")
3356 (match_operand:SI 3 "immediate_operand" "i")]
3360 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
3362 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
3365 ;; vqrdmlah_lane, vqrdmlsh_lane
3366 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3367 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
3368 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
3369 (match_operand:VMQI 2 "s_register_operand" "w")
3370 (match_operand:<V_HALF> 3 "s_register_operand"
3371 "<scalar_mul_constraint>")
3372 (match_operand:SI 4 "immediate_operand" "i")]
3377 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
3379 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
3382 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
3383 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
3384 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
3385 (match_operand:VMDI 2 "s_register_operand" "w")
3386 (match_operand:VMDI 3 "s_register_operand"
3387 "<scalar_mul_constraint>")
3388 (match_operand:SI 4 "immediate_operand" "i")]
3393 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
3395 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
3398 (define_insn "neon_vmla_lane<mode>"
3399 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3400 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3401 (match_operand:VMD 2 "s_register_operand" "w")
3402 (match_operand:VMD 3 "s_register_operand"
3403 "<scalar_mul_constraint>")
3404 (match_operand:SI 4 "immediate_operand" "i")]
3408 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3411 (if_then_else (match_test "<Is_float_mode>")
3412 (const_string "neon_fp_mla_s_scalar<q>")
3413 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3416 (define_insn "neon_vmla_lane<mode>"
3417 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3418 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3419 (match_operand:VMQ 2 "s_register_operand" "w")
3420 (match_operand:<V_HALF> 3 "s_register_operand"
3421 "<scalar_mul_constraint>")
3422 (match_operand:SI 4 "immediate_operand" "i")]
3426 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3429 (if_then_else (match_test "<Is_float_mode>")
3430 (const_string "neon_fp_mla_s_scalar<q>")
3431 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3434 (define_insn "neon_vmlal<sup>_lane<mode>"
3435 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3436 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3437 (match_operand:VMDI 2 "s_register_operand" "w")
3438 (match_operand:VMDI 3 "s_register_operand"
3439 "<scalar_mul_constraint>")
3440 (match_operand:SI 4 "immediate_operand" "i")]
3444 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3446 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3449 (define_insn "neon_vqdmlal_lane<mode>"
3450 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3451 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3452 (match_operand:VMDI 2 "s_register_operand" "w")
3453 (match_operand:VMDI 3 "s_register_operand"
3454 "<scalar_mul_constraint>")
3455 (match_operand:SI 4 "immediate_operand" "i")]
3456 UNSPEC_VQDMLAL_LANE))]
3459 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3461 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3464 (define_insn "neon_vmls_lane<mode>"
3465 [(set (match_operand:VMD 0 "s_register_operand" "=w")
3466 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
3467 (match_operand:VMD 2 "s_register_operand" "w")
3468 (match_operand:VMD 3 "s_register_operand"
3469 "<scalar_mul_constraint>")
3470 (match_operand:SI 4 "immediate_operand" "i")]
3474 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
3477 (if_then_else (match_test "<Is_float_mode>")
3478 (const_string "neon_fp_mla_s_scalar<q>")
3479 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3482 (define_insn "neon_vmls_lane<mode>"
3483 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
3484 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
3485 (match_operand:VMQ 2 "s_register_operand" "w")
3486 (match_operand:<V_HALF> 3 "s_register_operand"
3487 "<scalar_mul_constraint>")
3488 (match_operand:SI 4 "immediate_operand" "i")]
3492 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
3495 (if_then_else (match_test "<Is_float_mode>")
3496 (const_string "neon_fp_mla_s_scalar<q>")
3497 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
3500 (define_insn "neon_vmlsl<sup>_lane<mode>"
3501 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3502 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3503 (match_operand:VMDI 2 "s_register_operand" "w")
3504 (match_operand:VMDI 3 "s_register_operand"
3505 "<scalar_mul_constraint>")
3506 (match_operand:SI 4 "immediate_operand" "i")]
3510 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
3512 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
3515 (define_insn "neon_vqdmlsl_lane<mode>"
3516 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3517 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
3518 (match_operand:VMDI 2 "s_register_operand" "w")
3519 (match_operand:VMDI 3 "s_register_operand"
3520 "<scalar_mul_constraint>")
3521 (match_operand:SI 4 "immediate_operand" "i")]
3522 UNSPEC_VQDMLSL_LANE))]
3525 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
3527 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
3530 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
3531 ; core register into a temp register, then use a scalar taken from that. This
3532 ; isn't an optimal solution if e.g. the scalar has just been read from memory
3533 ; or extracted from another vector. The latter case it's currently better to
3534 ; use the "_lane" variant, and the former case can probably be implemented
3535 ; using vld1_lane, but that hasn't been done yet.
3537 (define_expand "neon_vmul_n<mode>"
3538 [(match_operand:VMD 0 "s_register_operand" "")
3539 (match_operand:VMD 1 "s_register_operand" "")
3540 (match_operand:<V_elem> 2 "s_register_operand" "")]
3543 rtx tmp = gen_reg_rtx (<MODE>mode);
3544 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3545 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3550 (define_expand "neon_vmul_n<mode>"
3551 [(match_operand:VMQ 0 "s_register_operand" "")
3552 (match_operand:VMQ 1 "s_register_operand" "")
3553 (match_operand:<V_elem> 2 "s_register_operand" "")]
3556 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3557 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3558 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
3563 (define_expand "neon_vmulls_n<mode>"
3564 [(match_operand:<V_widen> 0 "s_register_operand" "")
3565 (match_operand:VMDI 1 "s_register_operand" "")
3566 (match_operand:<V_elem> 2 "s_register_operand" "")]
3569 rtx tmp = gen_reg_rtx (<MODE>mode);
3570 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3571 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
3576 (define_expand "neon_vmullu_n<mode>"
3577 [(match_operand:<V_widen> 0 "s_register_operand" "")
3578 (match_operand:VMDI 1 "s_register_operand" "")
3579 (match_operand:<V_elem> 2 "s_register_operand" "")]
3582 rtx tmp = gen_reg_rtx (<MODE>mode);
3583 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3584 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
3589 (define_expand "neon_vqdmull_n<mode>"
3590 [(match_operand:<V_widen> 0 "s_register_operand" "")
3591 (match_operand:VMDI 1 "s_register_operand" "")
3592 (match_operand:<V_elem> 2 "s_register_operand" "")]
3595 rtx tmp = gen_reg_rtx (<MODE>mode);
3596 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3597 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
3602 (define_expand "neon_vqdmulh_n<mode>"
3603 [(match_operand:VMDI 0 "s_register_operand" "")
3604 (match_operand:VMDI 1 "s_register_operand" "")
3605 (match_operand:<V_elem> 2 "s_register_operand" "")]
3608 rtx tmp = gen_reg_rtx (<MODE>mode);
3609 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3610 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3615 (define_expand "neon_vqrdmulh_n<mode>"
3616 [(match_operand:VMDI 0 "s_register_operand" "")
3617 (match_operand:VMDI 1 "s_register_operand" "")
3618 (match_operand:<V_elem> 2 "s_register_operand" "")]
3621 rtx tmp = gen_reg_rtx (<MODE>mode);
3622 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
3623 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3628 (define_expand "neon_vqdmulh_n<mode>"
3629 [(match_operand:VMQI 0 "s_register_operand" "")
3630 (match_operand:VMQI 1 "s_register_operand" "")
3631 (match_operand:<V_elem> 2 "s_register_operand" "")]
3634 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3635 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3636 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
3641 (define_expand "neon_vqrdmulh_n<mode>"
3642 [(match_operand:VMQI 0 "s_register_operand" "")
3643 (match_operand:VMQI 1 "s_register_operand" "")
3644 (match_operand:<V_elem> 2 "s_register_operand" "")]
3647 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3648 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
3649 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
3654 (define_expand "neon_vmla_n<mode>"
3655 [(match_operand:VMD 0 "s_register_operand" "")
3656 (match_operand:VMD 1 "s_register_operand" "")
3657 (match_operand:VMD 2 "s_register_operand" "")
3658 (match_operand:<V_elem> 3 "s_register_operand" "")]
3661 rtx tmp = gen_reg_rtx (<MODE>mode);
3662 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3663 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3668 (define_expand "neon_vmla_n<mode>"
3669 [(match_operand:VMQ 0 "s_register_operand" "")
3670 (match_operand:VMQ 1 "s_register_operand" "")
3671 (match_operand:VMQ 2 "s_register_operand" "")
3672 (match_operand:<V_elem> 3 "s_register_operand" "")]
3675 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3676 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3677 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
3682 (define_expand "neon_vmlals_n<mode>"
3683 [(match_operand:<V_widen> 0 "s_register_operand" "")
3684 (match_operand:<V_widen> 1 "s_register_operand" "")
3685 (match_operand:VMDI 2 "s_register_operand" "")
3686 (match_operand:<V_elem> 3 "s_register_operand" "")]
3689 rtx tmp = gen_reg_rtx (<MODE>mode);
3690 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3691 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
3696 (define_expand "neon_vmlalu_n<mode>"
3697 [(match_operand:<V_widen> 0 "s_register_operand" "")
3698 (match_operand:<V_widen> 1 "s_register_operand" "")
3699 (match_operand:VMDI 2 "s_register_operand" "")
3700 (match_operand:<V_elem> 3 "s_register_operand" "")]
3703 rtx tmp = gen_reg_rtx (<MODE>mode);
3704 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3705 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
3710 (define_expand "neon_vqdmlal_n<mode>"
3711 [(match_operand:<V_widen> 0 "s_register_operand" "")
3712 (match_operand:<V_widen> 1 "s_register_operand" "")
3713 (match_operand:VMDI 2 "s_register_operand" "")
3714 (match_operand:<V_elem> 3 "s_register_operand" "")]
3717 rtx tmp = gen_reg_rtx (<MODE>mode);
3718 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3719 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
3724 (define_expand "neon_vmls_n<mode>"
3725 [(match_operand:VMD 0 "s_register_operand" "")
3726 (match_operand:VMD 1 "s_register_operand" "")
3727 (match_operand:VMD 2 "s_register_operand" "")
3728 (match_operand:<V_elem> 3 "s_register_operand" "")]
3731 rtx tmp = gen_reg_rtx (<MODE>mode);
3732 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3733 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3738 (define_expand "neon_vmls_n<mode>"
3739 [(match_operand:VMQ 0 "s_register_operand" "")
3740 (match_operand:VMQ 1 "s_register_operand" "")
3741 (match_operand:VMQ 2 "s_register_operand" "")
3742 (match_operand:<V_elem> 3 "s_register_operand" "")]
3745 rtx tmp = gen_reg_rtx (<V_HALF>mode);
3746 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
3747 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
3752 (define_expand "neon_vmlsls_n<mode>"
3753 [(match_operand:<V_widen> 0 "s_register_operand" "")
3754 (match_operand:<V_widen> 1 "s_register_operand" "")
3755 (match_operand:VMDI 2 "s_register_operand" "")
3756 (match_operand:<V_elem> 3 "s_register_operand" "")]
3759 rtx tmp = gen_reg_rtx (<MODE>mode);
3760 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3761 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
3766 (define_expand "neon_vmlslu_n<mode>"
3767 [(match_operand:<V_widen> 0 "s_register_operand" "")
3768 (match_operand:<V_widen> 1 "s_register_operand" "")
3769 (match_operand:VMDI 2 "s_register_operand" "")
3770 (match_operand:<V_elem> 3 "s_register_operand" "")]
3773 rtx tmp = gen_reg_rtx (<MODE>mode);
3774 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3775 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
3780 (define_expand "neon_vqdmlsl_n<mode>"
3781 [(match_operand:<V_widen> 0 "s_register_operand" "")
3782 (match_operand:<V_widen> 1 "s_register_operand" "")
3783 (match_operand:VMDI 2 "s_register_operand" "")
3784 (match_operand:<V_elem> 3 "s_register_operand" "")]
3787 rtx tmp = gen_reg_rtx (<MODE>mode);
3788 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
3789 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
3794 (define_insn "neon_vext<mode>"
3795 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
3796 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
3797 (match_operand:VDQX 2 "s_register_operand" "w")
3798 (match_operand:SI 3 "immediate_operand" "i")]
3802 neon_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
3803 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
3805 [(set_attr "type" "neon_ext<q>")]
3808 (define_insn "neon_vrev64<mode>"
3809 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
3810 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
3813 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3814 [(set_attr "type" "neon_rev<q>")]
3817 (define_insn "neon_vrev32<mode>"
3818 [(set (match_operand:VX 0 "s_register_operand" "=w")
3819 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
3822 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3823 [(set_attr "type" "neon_rev<q>")]
3826 (define_insn "neon_vrev16<mode>"
3827 [(set (match_operand:VE 0 "s_register_operand" "=w")
3828 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
3831 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3832 [(set_attr "type" "neon_rev<q>")]
3835 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
3836 ; allocation. For an intrinsic of form:
3837 ; rD = vbsl_* (rS, rN, rM)
3838 ; We can use any of:
3839 ; vbsl rS, rN, rM (if D = S)
3840 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
3841 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
3843 (define_insn "neon_vbsl<mode>_internal"
3844 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
3845 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
3846 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
3847 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
3851 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
3852 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
3853 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
3854 [(set_attr "type" "neon_bsl<q>")]
3857 (define_expand "neon_vbsl<mode>"
3858 [(set (match_operand:VDQX 0 "s_register_operand" "")
3859 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
3860 (match_operand:VDQX 2 "s_register_operand" "")
3861 (match_operand:VDQX 3 "s_register_operand" "")]
3865 /* We can't alias operands together if they have different modes. */
3866 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
3870 (define_insn "neon_v<shift_op><sup><mode>"
3871 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3872 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3873 (match_operand:VDQIX 2 "s_register_operand" "w")]
3876 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3877 [(set_attr "type" "neon_shift_imm<q>")]
3881 (define_insn "neon_v<shift_op><sup><mode>"
3882 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3883 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3884 (match_operand:VDQIX 2 "s_register_operand" "w")]
3887 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3888 [(set_attr "type" "neon_sat_shift_imm<q>")]
3892 (define_insn "neon_v<shift_op><sup>_n<mode>"
3893 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3894 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3895 (match_operand:SI 2 "immediate_operand" "i")]
3899 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
3900 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3902 [(set_attr "type" "neon_shift_imm<q>")]
3905 ;; vshrn_n, vrshrn_n
3906 (define_insn "neon_v<shift_op>_n<mode>"
3907 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3908 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3909 (match_operand:SI 2 "immediate_operand" "i")]
3913 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3914 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
3916 [(set_attr "type" "neon_shift_imm_narrow_q")]
3919 ;; vqshrn_n, vqrshrn_n
3920 (define_insn "neon_v<shift_op><sup>_n<mode>"
3921 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3922 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3923 (match_operand:SI 2 "immediate_operand" "i")]
3927 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3928 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
3930 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3933 ;; vqshrun_n, vqrshrun_n
3934 (define_insn "neon_v<shift_op>_n<mode>"
3935 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
3936 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
3937 (match_operand:SI 2 "immediate_operand" "i")]
3941 neon_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
3942 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
3944 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
3947 (define_insn "neon_vshl_n<mode>"
3948 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3949 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3950 (match_operand:SI 2 "immediate_operand" "i")]
3954 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3955 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
3957 [(set_attr "type" "neon_shift_imm<q>")]
3960 (define_insn "neon_vqshl_<sup>_n<mode>"
3961 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3962 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3963 (match_operand:SI 2 "immediate_operand" "i")]
3967 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3968 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
3970 [(set_attr "type" "neon_sat_shift_imm<q>")]
3973 (define_insn "neon_vqshlu_n<mode>"
3974 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
3975 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
3976 (match_operand:SI 2 "immediate_operand" "i")]
3980 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
3981 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
3983 [(set_attr "type" "neon_sat_shift_imm<q>")]
3986 (define_insn "neon_vshll<sup>_n<mode>"
3987 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3988 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3989 (match_operand:SI 2 "immediate_operand" "i")]
3993 /* The boundaries are: 0 < imm <= size. */
3994 neon_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
3995 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
3997 [(set_attr "type" "neon_shift_imm_long")]
4001 (define_insn "neon_v<shift_op><sup>_n<mode>"
4002 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4003 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4004 (match_operand:VDQIX 2 "s_register_operand" "w")
4005 (match_operand:SI 3 "immediate_operand" "i")]
4009 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4010 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4012 [(set_attr "type" "neon_shift_acc<q>")]
4015 (define_insn "neon_vsri_n<mode>"
4016 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4017 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4018 (match_operand:VDQIX 2 "s_register_operand" "w")
4019 (match_operand:SI 3 "immediate_operand" "i")]
4023 neon_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4024 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4026 [(set_attr "type" "neon_shift_reg<q>")]
4029 (define_insn "neon_vsli_n<mode>"
4030 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4031 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4032 (match_operand:VDQIX 2 "s_register_operand" "w")
4033 (match_operand:SI 3 "immediate_operand" "i")]
4037 neon_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4038 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4040 [(set_attr "type" "neon_shift_reg<q>")]
4043 (define_insn "neon_vtbl1v8qi"
4044 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4045 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4046 (match_operand:V8QI 2 "s_register_operand" "w")]
4049 "vtbl.8\t%P0, {%P1}, %P2"
4050 [(set_attr "type" "neon_tbl1")]
4053 (define_insn "neon_vtbl2v8qi"
4054 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4055 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4056 (match_operand:V8QI 2 "s_register_operand" "w")]
4061 int tabbase = REGNO (operands[1]);
4063 ops[0] = operands[0];
4064 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4065 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4066 ops[3] = operands[2];
4067 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4071 [(set_attr "type" "neon_tbl2")]
4074 (define_insn "neon_vtbl3v8qi"
4075 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4076 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4077 (match_operand:V8QI 2 "s_register_operand" "w")]
4082 int tabbase = REGNO (operands[1]);
4084 ops[0] = operands[0];
4085 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4086 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4087 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4088 ops[4] = operands[2];
4089 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4093 [(set_attr "type" "neon_tbl3")]
4096 (define_insn "neon_vtbl4v8qi"
4097 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4098 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
4099 (match_operand:V8QI 2 "s_register_operand" "w")]
4104 int tabbase = REGNO (operands[1]);
4106 ops[0] = operands[0];
4107 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4108 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4109 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4110 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4111 ops[5] = operands[2];
4112 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4116 [(set_attr "type" "neon_tbl4")]
4119 ;; These three are used by the vec_perm infrastructure for V16QImode.
4120 (define_insn_and_split "neon_vtbl1v16qi"
4121 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4122 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
4123 (match_operand:V16QI 2 "s_register_operand" "w")]
4127 "&& reload_completed"
4130 rtx op0, op1, op2, part0, part2;
4134 op1 = gen_lowpart (TImode, operands[1]);
4137 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4138 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4139 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4140 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4142 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4143 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4144 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4145 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4148 [(set_attr "type" "multiple")]
4151 (define_insn_and_split "neon_vtbl2v16qi"
4152 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
4153 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
4154 (match_operand:V16QI 2 "s_register_operand" "w")]
4158 "&& reload_completed"
4161 rtx op0, op1, op2, part0, part2;
4168 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
4169 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4170 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4171 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4173 ofs = subreg_highpart_offset (V8QImode, V16QImode);
4174 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
4175 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
4176 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
4179 [(set_attr "type" "multiple")]
4182 ;; ??? Logically we should extend the regular neon_vcombine pattern to
4183 ;; handle quad-word input modes, producing octa-word output modes. But
4184 ;; that requires us to add support for octa-word vector modes in moves.
4185 ;; That seems overkill for this one use in vec_perm.
4186 (define_insn_and_split "neon_vcombinev16qi"
4187 [(set (match_operand:OI 0 "s_register_operand" "=w")
4188 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
4189 (match_operand:V16QI 2 "s_register_operand" "w")]
4193 "&& reload_completed"
4196 neon_split_vcombine (operands);
4199 [(set_attr "type" "multiple")]
4202 (define_insn "neon_vtbx1v8qi"
4203 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4204 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4205 (match_operand:V8QI 2 "s_register_operand" "w")
4206 (match_operand:V8QI 3 "s_register_operand" "w")]
4209 "vtbx.8\t%P0, {%P2}, %P3"
4210 [(set_attr "type" "neon_tbl1")]
4213 (define_insn "neon_vtbx2v8qi"
4214 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4215 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4216 (match_operand:TI 2 "s_register_operand" "w")
4217 (match_operand:V8QI 3 "s_register_operand" "w")]
4222 int tabbase = REGNO (operands[2]);
4224 ops[0] = operands[0];
4225 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4226 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4227 ops[3] = operands[3];
4228 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
4232 [(set_attr "type" "neon_tbl2")]
4235 (define_insn "neon_vtbx3v8qi"
4236 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4237 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4238 (match_operand:EI 2 "s_register_operand" "w")
4239 (match_operand:V8QI 3 "s_register_operand" "w")]
4244 int tabbase = REGNO (operands[2]);
4246 ops[0] = operands[0];
4247 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4248 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4249 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4250 ops[4] = operands[3];
4251 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
4255 [(set_attr "type" "neon_tbl3")]
4258 (define_insn "neon_vtbx4v8qi"
4259 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4260 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
4261 (match_operand:OI 2 "s_register_operand" "w")
4262 (match_operand:V8QI 3 "s_register_operand" "w")]
4267 int tabbase = REGNO (operands[2]);
4269 ops[0] = operands[0];
4270 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4271 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4272 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
4273 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
4274 ops[5] = operands[3];
4275 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
4279 [(set_attr "type" "neon_tbl4")]
4282 (define_expand "neon_vtrn<mode>_internal"
4284 [(set (match_operand:VDQW 0 "s_register_operand" "")
4285 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4286 (match_operand:VDQW 2 "s_register_operand" "")]
4288 (set (match_operand:VDQW 3 "s_register_operand" "")
4289 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
4294 ;; Note: Different operand numbering to handle tied registers correctly.
4295 (define_insn "*neon_vtrn<mode>_insn"
4296 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4297 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4298 (match_operand:VDQW 3 "s_register_operand" "2")]
4300 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4301 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4304 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4305 [(set_attr "type" "neon_permute<q>")]
4308 (define_expand "neon_vzip<mode>_internal"
4310 [(set (match_operand:VDQW 0 "s_register_operand" "")
4311 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4312 (match_operand:VDQW 2 "s_register_operand" "")]
4314 (set (match_operand:VDQW 3 "s_register_operand" "")
4315 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
4320 ;; Note: Different operand numbering to handle tied registers correctly.
4321 (define_insn "*neon_vzip<mode>_insn"
4322 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4323 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4324 (match_operand:VDQW 3 "s_register_operand" "2")]
4326 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4327 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4330 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4331 [(set_attr "type" "neon_zip<q>")]
4334 (define_expand "neon_vuzp<mode>_internal"
4336 [(set (match_operand:VDQW 0 "s_register_operand" "")
4337 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "")
4338 (match_operand:VDQW 2 "s_register_operand" "")]
4340 (set (match_operand:VDQW 3 "s_register_operand" "")
4341 (unspec:VDQW [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
4346 ;; Note: Different operand numbering to handle tied registers correctly.
4347 (define_insn "*neon_vuzp<mode>_insn"
4348 [(set (match_operand:VDQW 0 "s_register_operand" "=&w")
4349 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
4350 (match_operand:VDQW 3 "s_register_operand" "2")]
4352 (set (match_operand:VDQW 2 "s_register_operand" "=&w")
4353 (unspec:VDQW [(match_dup 1) (match_dup 3)]
4356 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
4357 [(set_attr "type" "neon_zip<q>")]
4360 (define_expand "vec_load_lanes<mode><mode>"
4361 [(set (match_operand:VDQX 0 "s_register_operand")
4362 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
4366 (define_insn "neon_vld1<mode>"
4367 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4368 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
4371 "vld1.<V_sz_elem>\t%h0, %A1"
4372 [(set_attr "type" "neon_load1_1reg<q>")]
4375 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
4376 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
4378 (define_insn "neon_vld1_lane<mode>"
4379 [(set (match_operand:VDX 0 "s_register_operand" "=w")
4380 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4381 (match_operand:VDX 2 "s_register_operand" "0")
4382 (match_operand:SI 3 "immediate_operand" "i")]
4386 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4387 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4388 operands[3] = GEN_INT (lane);
4390 return "vld1.<V_sz_elem>\t%P0, %A1";
4392 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4394 [(set_attr "type" "neon_load1_one_lane<q>")]
4397 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4398 ;; here on big endian targets.
4399 (define_insn "neon_vld1_lane<mode>"
4400 [(set (match_operand:VQX 0 "s_register_operand" "=w")
4401 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
4402 (match_operand:VQX 2 "s_register_operand" "0")
4403 (match_operand:SI 3 "immediate_operand" "i")]
4407 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4408 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4409 operands[3] = GEN_INT (lane);
4410 int regno = REGNO (operands[0]);
4411 if (lane >= max / 2)
4415 operands[3] = GEN_INT (lane);
4417 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
4419 return "vld1.<V_sz_elem>\t%P0, %A1";
4421 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
4423 [(set_attr "type" "neon_load1_one_lane<q>")]
4426 (define_insn "neon_vld1_dup<mode>"
4427 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
4428 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4430 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
4431 [(set_attr "type" "neon_load1_all_lanes<q>")]
4434 ;; Special case for DImode. Treat it exactly like a simple load.
4435 (define_expand "neon_vld1_dupdi"
4436 [(set (match_operand:DI 0 "s_register_operand" "")
4437 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
4443 (define_insn "neon_vld1_dup<mode>"
4444 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
4445 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
4448 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4450 [(set_attr "type" "neon_load1_all_lanes<q>")]
4453 (define_insn_and_split "neon_vld1_dupv2di"
4454 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
4455 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
4458 "&& reload_completed"
4461 rtx tmprtx = gen_lowpart (DImode, operands[0]);
4462 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
4463 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
4466 [(set_attr "length" "8")
4467 (set_attr "type" "neon_load1_all_lanes_q")]
4470 (define_expand "vec_store_lanes<mode><mode>"
4471 [(set (match_operand:VDQX 0 "neon_struct_operand")
4472 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
4476 (define_insn "neon_vst1<mode>"
4477 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
4478 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
4481 "vst1.<V_sz_elem>\t%h1, %A0"
4482 [(set_attr "type" "neon_store1_1reg<q>")])
4484 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4485 ;; here on big endian targets.
4486 (define_insn "neon_vst1_lane<mode>"
4487 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4489 [(match_operand:VDX 1 "s_register_operand" "w")
4490 (match_operand:SI 2 "immediate_operand" "i")]
4494 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4495 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4496 operands[2] = GEN_INT (lane);
4498 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4500 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4502 [(set_attr "type" "neon_store1_one_lane<q>")]
4505 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4506 ;; here on big endian targets.
4507 (define_insn "neon_vst1_lane<mode>"
4508 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
4510 [(match_operand:VQX 1 "s_register_operand" "w")
4511 (match_operand:SI 2 "immediate_operand" "i")]
4515 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4516 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4517 int regno = REGNO (operands[1]);
4518 if (lane >= max / 2)
4523 operands[2] = GEN_INT (lane);
4524 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
4526 return "vst1.<V_sz_elem>\t{%P1}, %A0";
4528 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
4530 [(set_attr "type" "neon_store1_one_lane<q>")]
4533 (define_expand "vec_load_lanesti<mode>"
4534 [(set (match_operand:TI 0 "s_register_operand")
4535 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
4536 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4540 (define_insn "neon_vld2<mode>"
4541 [(set (match_operand:TI 0 "s_register_operand" "=w")
4542 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
4543 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4547 if (<V_sz_elem> == 64)
4548 return "vld1.64\t%h0, %A1";
4550 return "vld2.<V_sz_elem>\t%h0, %A1";
4553 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4554 (const_string "neon_load1_2reg<q>")
4555 (const_string "neon_load2_2reg<q>")))]
4558 (define_expand "vec_load_lanesoi<mode>"
4559 [(set (match_operand:OI 0 "s_register_operand")
4560 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
4561 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4565 (define_insn "neon_vld2<mode>"
4566 [(set (match_operand:OI 0 "s_register_operand" "=w")
4567 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
4568 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4571 "vld2.<V_sz_elem>\t%h0, %A1"
4572 [(set_attr "type" "neon_load2_2reg_q")])
4574 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4575 ;; here on big endian targets.
4576 (define_insn "neon_vld2_lane<mode>"
4577 [(set (match_operand:TI 0 "s_register_operand" "=w")
4578 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4579 (match_operand:TI 2 "s_register_operand" "0")
4580 (match_operand:SI 3 "immediate_operand" "i")
4581 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4585 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4586 int regno = REGNO (operands[0]);
4588 ops[0] = gen_rtx_REG (DImode, regno);
4589 ops[1] = gen_rtx_REG (DImode, regno + 2);
4590 ops[2] = operands[1];
4591 ops[3] = GEN_INT (lane);
4592 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4595 [(set_attr "type" "neon_load2_one_lane<q>")]
4598 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4599 ;; here on big endian targets.
4600 (define_insn "neon_vld2_lane<mode>"
4601 [(set (match_operand:OI 0 "s_register_operand" "=w")
4602 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4603 (match_operand:OI 2 "s_register_operand" "0")
4604 (match_operand:SI 3 "immediate_operand" "i")
4605 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4609 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4610 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4611 int regno = REGNO (operands[0]);
4613 if (lane >= max / 2)
4618 ops[0] = gen_rtx_REG (DImode, regno);
4619 ops[1] = gen_rtx_REG (DImode, regno + 4);
4620 ops[2] = operands[1];
4621 ops[3] = GEN_INT (lane);
4622 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
4625 [(set_attr "type" "neon_load2_one_lane<q>")]
4628 (define_insn "neon_vld2_dup<mode>"
4629 [(set (match_operand:TI 0 "s_register_operand" "=w")
4630 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
4631 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4635 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4636 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
4638 return "vld1.<V_sz_elem>\t%h0, %A1";
4641 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4642 (const_string "neon_load2_all_lanes<q>")
4643 (const_string "neon_load1_1reg<q>")))]
4646 (define_expand "vec_store_lanesti<mode>"
4647 [(set (match_operand:TI 0 "neon_struct_operand")
4648 (unspec:TI [(match_operand:TI 1 "s_register_operand")
4649 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4653 (define_insn "neon_vst2<mode>"
4654 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
4655 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
4656 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4660 if (<V_sz_elem> == 64)
4661 return "vst1.64\t%h1, %A0";
4663 return "vst2.<V_sz_elem>\t%h1, %A0";
4666 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4667 (const_string "neon_store1_2reg<q>")
4668 (const_string "neon_store2_one_lane<q>")))]
4671 (define_expand "vec_store_lanesoi<mode>"
4672 [(set (match_operand:OI 0 "neon_struct_operand")
4673 (unspec:OI [(match_operand:OI 1 "s_register_operand")
4674 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4678 (define_insn "neon_vst2<mode>"
4679 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
4680 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
4681 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4684 "vst2.<V_sz_elem>\t%h1, %A0"
4685 [(set_attr "type" "neon_store2_4reg<q>")]
4688 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4689 ;; here on big endian targets.
4690 (define_insn "neon_vst2_lane<mode>"
4691 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4692 (unspec:<V_two_elem>
4693 [(match_operand:TI 1 "s_register_operand" "w")
4694 (match_operand:SI 2 "immediate_operand" "i")
4695 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4699 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4700 int regno = REGNO (operands[1]);
4702 ops[0] = operands[0];
4703 ops[1] = gen_rtx_REG (DImode, regno);
4704 ops[2] = gen_rtx_REG (DImode, regno + 2);
4705 ops[3] = GEN_INT (lane);
4706 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4709 [(set_attr "type" "neon_store2_one_lane<q>")]
4712 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4713 ;; here on big endian targets.
4714 (define_insn "neon_vst2_lane<mode>"
4715 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
4716 (unspec:<V_two_elem>
4717 [(match_operand:OI 1 "s_register_operand" "w")
4718 (match_operand:SI 2 "immediate_operand" "i")
4719 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4723 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
4724 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4725 int regno = REGNO (operands[1]);
4727 if (lane >= max / 2)
4732 ops[0] = operands[0];
4733 ops[1] = gen_rtx_REG (DImode, regno);
4734 ops[2] = gen_rtx_REG (DImode, regno + 4);
4735 ops[3] = GEN_INT (lane);
4736 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
4739 [(set_attr "type" "neon_store2_one_lane<q>")]
4742 (define_expand "vec_load_lanesei<mode>"
4743 [(set (match_operand:EI 0 "s_register_operand")
4744 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
4745 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4749 (define_insn "neon_vld3<mode>"
4750 [(set (match_operand:EI 0 "s_register_operand" "=w")
4751 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
4752 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4756 if (<V_sz_elem> == 64)
4757 return "vld1.64\t%h0, %A1";
4759 return "vld3.<V_sz_elem>\t%h0, %A1";
4762 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4763 (const_string "neon_load1_3reg<q>")
4764 (const_string "neon_load3_3reg<q>")))]
4767 (define_expand "vec_load_lanesci<mode>"
4768 [(match_operand:CI 0 "s_register_operand")
4769 (match_operand:CI 1 "neon_struct_operand")
4770 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4773 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
4777 (define_expand "neon_vld3<mode>"
4778 [(match_operand:CI 0 "s_register_operand")
4779 (match_operand:CI 1 "neon_struct_operand")
4780 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4785 mem = adjust_address (operands[1], EImode, 0);
4786 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
4787 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4788 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
4792 (define_insn "neon_vld3qa<mode>"
4793 [(set (match_operand:CI 0 "s_register_operand" "=w")
4794 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4795 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4799 int regno = REGNO (operands[0]);
4801 ops[0] = gen_rtx_REG (DImode, regno);
4802 ops[1] = gen_rtx_REG (DImode, regno + 4);
4803 ops[2] = gen_rtx_REG (DImode, regno + 8);
4804 ops[3] = operands[1];
4805 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4808 [(set_attr "type" "neon_load3_3reg<q>")]
4811 (define_insn "neon_vld3qb<mode>"
4812 [(set (match_operand:CI 0 "s_register_operand" "=w")
4813 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
4814 (match_operand:CI 2 "s_register_operand" "0")
4815 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4819 int regno = REGNO (operands[0]);
4821 ops[0] = gen_rtx_REG (DImode, regno + 2);
4822 ops[1] = gen_rtx_REG (DImode, regno + 6);
4823 ops[2] = gen_rtx_REG (DImode, regno + 10);
4824 ops[3] = operands[1];
4825 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
4828 [(set_attr "type" "neon_load3_3reg<q>")]
4831 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4832 ;; here on big endian targets.
4833 (define_insn "neon_vld3_lane<mode>"
4834 [(set (match_operand:EI 0 "s_register_operand" "=w")
4835 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4836 (match_operand:EI 2 "s_register_operand" "0")
4837 (match_operand:SI 3 "immediate_operand" "i")
4838 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4842 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
4843 int regno = REGNO (operands[0]);
4845 ops[0] = gen_rtx_REG (DImode, regno);
4846 ops[1] = gen_rtx_REG (DImode, regno + 2);
4847 ops[2] = gen_rtx_REG (DImode, regno + 4);
4848 ops[3] = operands[1];
4849 ops[4] = GEN_INT (lane);
4850 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4854 [(set_attr "type" "neon_load3_one_lane<q>")]
4857 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
4858 ;; here on big endian targets.
4859 (define_insn "neon_vld3_lane<mode>"
4860 [(set (match_operand:CI 0 "s_register_operand" "=w")
4861 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4862 (match_operand:CI 2 "s_register_operand" "0")
4863 (match_operand:SI 3 "immediate_operand" "i")
4864 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4868 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
4869 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
4870 int regno = REGNO (operands[0]);
4872 if (lane >= max / 2)
4877 ops[0] = gen_rtx_REG (DImode, regno);
4878 ops[1] = gen_rtx_REG (DImode, regno + 4);
4879 ops[2] = gen_rtx_REG (DImode, regno + 8);
4880 ops[3] = operands[1];
4881 ops[4] = GEN_INT (lane);
4882 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
4886 [(set_attr "type" "neon_load3_one_lane<q>")]
4889 (define_insn "neon_vld3_dup<mode>"
4890 [(set (match_operand:EI 0 "s_register_operand" "=w")
4891 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
4892 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4896 if (GET_MODE_NUNITS (<MODE>mode) > 1)
4898 int regno = REGNO (operands[0]);
4900 ops[0] = gen_rtx_REG (DImode, regno);
4901 ops[1] = gen_rtx_REG (DImode, regno + 2);
4902 ops[2] = gen_rtx_REG (DImode, regno + 4);
4903 ops[3] = operands[1];
4904 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
4908 return "vld1.<V_sz_elem>\t%h0, %A1";
4911 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
4912 (const_string "neon_load3_all_lanes<q>")
4913 (const_string "neon_load1_1reg<q>")))])
4915 (define_expand "vec_store_lanesei<mode>"
4916 [(set (match_operand:EI 0 "neon_struct_operand")
4917 (unspec:EI [(match_operand:EI 1 "s_register_operand")
4918 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4922 (define_insn "neon_vst3<mode>"
4923 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4924 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
4925 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4929 if (<V_sz_elem> == 64)
4930 return "vst1.64\t%h1, %A0";
4932 return "vst3.<V_sz_elem>\t%h1, %A0";
4935 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
4936 (const_string "neon_store1_3reg<q>")
4937 (const_string "neon_store3_one_lane<q>")))])
4939 (define_expand "vec_store_lanesci<mode>"
4940 [(match_operand:CI 0 "neon_struct_operand")
4941 (match_operand:CI 1 "s_register_operand")
4942 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4945 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
4949 (define_expand "neon_vst3<mode>"
4950 [(match_operand:CI 0 "neon_struct_operand")
4951 (match_operand:CI 1 "s_register_operand")
4952 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4957 mem = adjust_address (operands[0], EImode, 0);
4958 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
4959 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
4960 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
4964 (define_insn "neon_vst3qa<mode>"
4965 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4966 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4967 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4971 int regno = REGNO (operands[1]);
4973 ops[0] = operands[0];
4974 ops[1] = gen_rtx_REG (DImode, regno);
4975 ops[2] = gen_rtx_REG (DImode, regno + 4);
4976 ops[3] = gen_rtx_REG (DImode, regno + 8);
4977 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4980 [(set_attr "type" "neon_store3_3reg<q>")]
4983 (define_insn "neon_vst3qb<mode>"
4984 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
4985 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
4986 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4990 int regno = REGNO (operands[1]);
4992 ops[0] = operands[0];
4993 ops[1] = gen_rtx_REG (DImode, regno + 2);
4994 ops[2] = gen_rtx_REG (DImode, regno + 6);
4995 ops[3] = gen_rtx_REG (DImode, regno + 10);
4996 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
4999 [(set_attr "type" "neon_store3_3reg<q>")]
5002 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5003 ;; here on big endian targets.
5004 (define_insn "neon_vst3_lane<mode>"
5005 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5006 (unspec:<V_three_elem>
5007 [(match_operand:EI 1 "s_register_operand" "w")
5008 (match_operand:SI 2 "immediate_operand" "i")
5009 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5013 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5014 int regno = REGNO (operands[1]);
5016 ops[0] = operands[0];
5017 ops[1] = gen_rtx_REG (DImode, regno);
5018 ops[2] = gen_rtx_REG (DImode, regno + 2);
5019 ops[3] = gen_rtx_REG (DImode, regno + 4);
5020 ops[4] = GEN_INT (lane);
5021 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5025 [(set_attr "type" "neon_store3_one_lane<q>")]
5028 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5029 ;; here on big endian targets.
5030 (define_insn "neon_vst3_lane<mode>"
5031 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5032 (unspec:<V_three_elem>
5033 [(match_operand:CI 1 "s_register_operand" "w")
5034 (match_operand:SI 2 "immediate_operand" "i")
5035 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5039 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5040 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5041 int regno = REGNO (operands[1]);
5043 if (lane >= max / 2)
5048 ops[0] = operands[0];
5049 ops[1] = gen_rtx_REG (DImode, regno);
5050 ops[2] = gen_rtx_REG (DImode, regno + 4);
5051 ops[3] = gen_rtx_REG (DImode, regno + 8);
5052 ops[4] = GEN_INT (lane);
5053 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5057 [(set_attr "type" "neon_store3_one_lane<q>")]
5060 (define_expand "vec_load_lanesoi<mode>"
5061 [(set (match_operand:OI 0 "s_register_operand")
5062 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5063 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5067 (define_insn "neon_vld4<mode>"
5068 [(set (match_operand:OI 0 "s_register_operand" "=w")
5069 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5070 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5074 if (<V_sz_elem> == 64)
5075 return "vld1.64\t%h0, %A1";
5077 return "vld4.<V_sz_elem>\t%h0, %A1";
5080 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5081 (const_string "neon_load1_4reg<q>")
5082 (const_string "neon_load4_4reg<q>")))]
5085 (define_expand "vec_load_lanesxi<mode>"
5086 [(match_operand:XI 0 "s_register_operand")
5087 (match_operand:XI 1 "neon_struct_operand")
5088 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5091 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
5095 (define_expand "neon_vld4<mode>"
5096 [(match_operand:XI 0 "s_register_operand")
5097 (match_operand:XI 1 "neon_struct_operand")
5098 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5103 mem = adjust_address (operands[1], OImode, 0);
5104 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
5105 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5106 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
5110 (define_insn "neon_vld4qa<mode>"
5111 [(set (match_operand:XI 0 "s_register_operand" "=w")
5112 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5113 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5117 int regno = REGNO (operands[0]);
5119 ops[0] = gen_rtx_REG (DImode, regno);
5120 ops[1] = gen_rtx_REG (DImode, regno + 4);
5121 ops[2] = gen_rtx_REG (DImode, regno + 8);
5122 ops[3] = gen_rtx_REG (DImode, regno + 12);
5123 ops[4] = operands[1];
5124 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5127 [(set_attr "type" "neon_load4_4reg<q>")]
5130 (define_insn "neon_vld4qb<mode>"
5131 [(set (match_operand:XI 0 "s_register_operand" "=w")
5132 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
5133 (match_operand:XI 2 "s_register_operand" "0")
5134 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5138 int regno = REGNO (operands[0]);
5140 ops[0] = gen_rtx_REG (DImode, regno + 2);
5141 ops[1] = gen_rtx_REG (DImode, regno + 6);
5142 ops[2] = gen_rtx_REG (DImode, regno + 10);
5143 ops[3] = gen_rtx_REG (DImode, regno + 14);
5144 ops[4] = operands[1];
5145 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
5148 [(set_attr "type" "neon_load4_4reg<q>")]
5151 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5152 ;; here on big endian targets.
5153 (define_insn "neon_vld4_lane<mode>"
5154 [(set (match_operand:OI 0 "s_register_operand" "=w")
5155 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5156 (match_operand:OI 2 "s_register_operand" "0")
5157 (match_operand:SI 3 "immediate_operand" "i")
5158 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5162 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5163 int regno = REGNO (operands[0]);
5165 ops[0] = gen_rtx_REG (DImode, regno);
5166 ops[1] = gen_rtx_REG (DImode, regno + 2);
5167 ops[2] = gen_rtx_REG (DImode, regno + 4);
5168 ops[3] = gen_rtx_REG (DImode, regno + 6);
5169 ops[4] = operands[1];
5170 ops[5] = GEN_INT (lane);
5171 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5175 [(set_attr "type" "neon_load4_one_lane<q>")]
5178 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5179 ;; here on big endian targets.
5180 (define_insn "neon_vld4_lane<mode>"
5181 [(set (match_operand:XI 0 "s_register_operand" "=w")
5182 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5183 (match_operand:XI 2 "s_register_operand" "0")
5184 (match_operand:SI 3 "immediate_operand" "i")
5185 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5189 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5190 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5191 int regno = REGNO (operands[0]);
5193 if (lane >= max / 2)
5198 ops[0] = gen_rtx_REG (DImode, regno);
5199 ops[1] = gen_rtx_REG (DImode, regno + 4);
5200 ops[2] = gen_rtx_REG (DImode, regno + 8);
5201 ops[3] = gen_rtx_REG (DImode, regno + 12);
5202 ops[4] = operands[1];
5203 ops[5] = GEN_INT (lane);
5204 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
5208 [(set_attr "type" "neon_load4_one_lane<q>")]
5211 (define_insn "neon_vld4_dup<mode>"
5212 [(set (match_operand:OI 0 "s_register_operand" "=w")
5213 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
5214 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5218 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5220 int regno = REGNO (operands[0]);
5222 ops[0] = gen_rtx_REG (DImode, regno);
5223 ops[1] = gen_rtx_REG (DImode, regno + 2);
5224 ops[2] = gen_rtx_REG (DImode, regno + 4);
5225 ops[3] = gen_rtx_REG (DImode, regno + 6);
5226 ops[4] = operands[1];
5227 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
5232 return "vld1.<V_sz_elem>\t%h0, %A1";
5235 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5236 (const_string "neon_load4_all_lanes<q>")
5237 (const_string "neon_load1_1reg<q>")))]
5240 (define_expand "vec_store_lanesoi<mode>"
5241 [(set (match_operand:OI 0 "neon_struct_operand")
5242 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5243 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5247 (define_insn "neon_vst4<mode>"
5248 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5249 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5250 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5254 if (<V_sz_elem> == 64)
5255 return "vst1.64\t%h1, %A0";
5257 return "vst4.<V_sz_elem>\t%h1, %A0";
5260 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5261 (const_string "neon_store1_4reg<q>")
5262 (const_string "neon_store4_4reg<q>")))]
5265 (define_expand "vec_store_lanesxi<mode>"
5266 [(match_operand:XI 0 "neon_struct_operand")
5267 (match_operand:XI 1 "s_register_operand")
5268 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5271 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
5275 (define_expand "neon_vst4<mode>"
5276 [(match_operand:XI 0 "neon_struct_operand")
5277 (match_operand:XI 1 "s_register_operand")
5278 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5283 mem = adjust_address (operands[0], OImode, 0);
5284 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
5285 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
5286 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
5290 (define_insn "neon_vst4qa<mode>"
5291 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5292 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5293 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5297 int regno = REGNO (operands[1]);
5299 ops[0] = operands[0];
5300 ops[1] = gen_rtx_REG (DImode, regno);
5301 ops[2] = gen_rtx_REG (DImode, regno + 4);
5302 ops[3] = gen_rtx_REG (DImode, regno + 8);
5303 ops[4] = gen_rtx_REG (DImode, regno + 12);
5304 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5307 [(set_attr "type" "neon_store4_4reg<q>")]
5310 (define_insn "neon_vst4qb<mode>"
5311 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5312 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
5313 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5317 int regno = REGNO (operands[1]);
5319 ops[0] = operands[0];
5320 ops[1] = gen_rtx_REG (DImode, regno + 2);
5321 ops[2] = gen_rtx_REG (DImode, regno + 6);
5322 ops[3] = gen_rtx_REG (DImode, regno + 10);
5323 ops[4] = gen_rtx_REG (DImode, regno + 14);
5324 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
5327 [(set_attr "type" "neon_store4_4reg<q>")]
5330 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5331 ;; here on big endian targets.
5332 (define_insn "neon_vst4_lane<mode>"
5333 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5334 (unspec:<V_four_elem>
5335 [(match_operand:OI 1 "s_register_operand" "w")
5336 (match_operand:SI 2 "immediate_operand" "i")
5337 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5341 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5342 int regno = REGNO (operands[1]);
5344 ops[0] = operands[0];
5345 ops[1] = gen_rtx_REG (DImode, regno);
5346 ops[2] = gen_rtx_REG (DImode, regno + 2);
5347 ops[3] = gen_rtx_REG (DImode, regno + 4);
5348 ops[4] = gen_rtx_REG (DImode, regno + 6);
5349 ops[5] = GEN_INT (lane);
5350 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5354 [(set_attr "type" "neon_store4_one_lane<q>")]
5357 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5358 ;; here on big endian targets.
5359 (define_insn "neon_vst4_lane<mode>"
5360 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
5361 (unspec:<V_four_elem>
5362 [(match_operand:XI 1 "s_register_operand" "w")
5363 (match_operand:SI 2 "immediate_operand" "i")
5364 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5368 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5369 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5370 int regno = REGNO (operands[1]);
5372 if (lane >= max / 2)
5377 ops[0] = operands[0];
5378 ops[1] = gen_rtx_REG (DImode, regno);
5379 ops[2] = gen_rtx_REG (DImode, regno + 4);
5380 ops[3] = gen_rtx_REG (DImode, regno + 8);
5381 ops[4] = gen_rtx_REG (DImode, regno + 12);
5382 ops[5] = GEN_INT (lane);
5383 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
5387 [(set_attr "type" "neon_store4_4reg<q>")]
5390 (define_insn "neon_vec_unpack<US>_lo_<mode>"
5391 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5392 (SE:<V_unpack> (vec_select:<V_HALF>
5393 (match_operand:VU 1 "register_operand" "w")
5394 (match_operand:VU 2 "vect_par_constant_low" ""))))]
5395 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5396 "vmovl.<US><V_sz_elem> %q0, %e1"
5397 [(set_attr "type" "neon_shift_imm_long")]
5400 (define_insn "neon_vec_unpack<US>_hi_<mode>"
5401 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5402 (SE:<V_unpack> (vec_select:<V_HALF>
5403 (match_operand:VU 1 "register_operand" "w")
5404 (match_operand:VU 2 "vect_par_constant_high" ""))))]
5405 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5406 "vmovl.<US><V_sz_elem> %q0, %f1"
5407 [(set_attr "type" "neon_shift_imm_long")]
5410 (define_expand "vec_unpack<US>_hi_<mode>"
5411 [(match_operand:<V_unpack> 0 "register_operand" "")
5412 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
5413 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5415 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5418 for (i = 0; i < (<V_mode_nunits>/2); i++)
5419 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
5421 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5422 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
5429 (define_expand "vec_unpack<US>_lo_<mode>"
5430 [(match_operand:<V_unpack> 0 "register_operand" "")
5431 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
5432 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5434 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5437 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5438 RTVEC_ELT (v, i) = GEN_INT (i);
5439 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5440 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
5447 (define_insn "neon_vec_<US>mult_lo_<mode>"
5448 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5449 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5450 (match_operand:VU 1 "register_operand" "w")
5451 (match_operand:VU 2 "vect_par_constant_low" "")))
5452 (SE:<V_unpack> (vec_select:<V_HALF>
5453 (match_operand:VU 3 "register_operand" "w")
5455 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5456 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
5457 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5460 (define_expand "vec_widen_<US>mult_lo_<mode>"
5461 [(match_operand:<V_unpack> 0 "register_operand" "")
5462 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5463 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5464 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5466 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5469 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5470 RTVEC_ELT (v, i) = GEN_INT (i);
5471 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5473 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
5481 (define_insn "neon_vec_<US>mult_hi_<mode>"
5482 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
5483 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
5484 (match_operand:VU 1 "register_operand" "w")
5485 (match_operand:VU 2 "vect_par_constant_high" "")))
5486 (SE:<V_unpack> (vec_select:<V_HALF>
5487 (match_operand:VU 3 "register_operand" "w")
5489 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5490 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
5491 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5494 (define_expand "vec_widen_<US>mult_hi_<mode>"
5495 [(match_operand:<V_unpack> 0 "register_operand" "")
5496 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5497 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
5498 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5500 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
5503 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
5504 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
5505 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
5507 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
5516 (define_insn "neon_vec_<US>shiftl_<mode>"
5517 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5518 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
5519 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
5522 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
5524 [(set_attr "type" "neon_shift_imm_long")]
5527 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5528 [(match_operand:<V_unpack> 0 "register_operand" "")
5529 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5530 (match_operand:SI 2 "immediate_operand" "i")]
5531 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5533 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5534 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
5540 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5541 [(match_operand:<V_unpack> 0 "register_operand" "")
5542 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
5543 (match_operand:SI 2 "immediate_operand" "i")]
5544 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5546 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
5547 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
5548 GET_MODE_SIZE (<V_HALF>mode)),
5554 ;; Vectorize for non-neon-quad case
5555 (define_insn "neon_unpack<US>_<mode>"
5556 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5557 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
5559 "vmovl.<US><V_sz_elem> %q0, %P1"
5560 [(set_attr "type" "neon_move")]
5563 (define_expand "vec_unpack<US>_lo_<mode>"
5564 [(match_operand:<V_double_width> 0 "register_operand" "")
5565 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5568 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5569 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5570 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5576 (define_expand "vec_unpack<US>_hi_<mode>"
5577 [(match_operand:<V_double_width> 0 "register_operand" "")
5578 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
5581 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5582 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
5583 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5589 (define_insn "neon_vec_<US>mult_<mode>"
5590 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
5591 (mult:<V_widen> (SE:<V_widen>
5592 (match_operand:VDI 1 "register_operand" "w"))
5594 (match_operand:VDI 2 "register_operand" "w"))))]
5596 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
5597 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
5600 (define_expand "vec_widen_<US>mult_hi_<mode>"
5601 [(match_operand:<V_double_width> 0 "register_operand" "")
5602 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5603 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5606 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5607 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5608 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5615 (define_expand "vec_widen_<US>mult_lo_<mode>"
5616 [(match_operand:<V_double_width> 0 "register_operand" "")
5617 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5618 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
5621 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5622 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
5623 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5630 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
5631 [(match_operand:<V_double_width> 0 "register_operand" "")
5632 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5633 (match_operand:SI 2 "immediate_operand" "i")]
5636 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5637 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5638 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
5644 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
5645 [(match_operand:<V_double_width> 0 "register_operand" "")
5646 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
5647 (match_operand:SI 2 "immediate_operand" "i")]
5650 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
5651 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
5652 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
5658 ; FIXME: These instruction patterns can't be used safely in big-endian mode
5659 ; because the ordering of vector elements in Q registers is different from what
5660 ; the semantics of the instructions require.
5662 (define_insn "vec_pack_trunc_<mode>"
5663 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
5664 (vec_concat:<V_narrow_pack>
5665 (truncate:<V_narrow>
5666 (match_operand:VN 1 "register_operand" "w"))
5667 (truncate:<V_narrow>
5668 (match_operand:VN 2 "register_operand" "w"))))]
5669 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5670 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
5671 [(set_attr "type" "multiple")
5672 (set_attr "length" "8")]
5675 ;; For the non-quad case.
5676 (define_insn "neon_vec_pack_trunc_<mode>"
5677 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
5678 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
5679 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5680 "vmovn.i<V_sz_elem>\t%P0, %q1"
5681 [(set_attr "type" "neon_move_narrow_q")]
5684 (define_expand "vec_pack_trunc_<mode>"
5685 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
5686 (match_operand:VSHFT 1 "register_operand" "")
5687 (match_operand:VSHFT 2 "register_operand")]
5688 "TARGET_NEON && !BYTES_BIG_ENDIAN"
5690 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
5692 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
5693 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
5694 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
5698 (define_insn "neon_vabd<mode>_2"
5699 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5700 (abs:VDQ (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
5701 (match_operand:VDQ 2 "s_register_operand" "w"))))]
5702 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5703 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5705 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5706 (const_string "neon_fp_abd_s<q>")
5707 (const_string "neon_abd<q>")))]
5710 (define_insn "neon_vabd<mode>_3"
5711 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
5712 (abs:VDQ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")
5713 (match_operand:VDQ 2 "s_register_operand" "w")]
5715 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
5716 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
5718 (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
5719 (const_string "neon_fp_abd_s<q>")
5720 (const_string "neon_abd<q>")))]
5723 ;; Copy from core-to-neon regs, then extend, not vice-versa
5726 [(set (match_operand:DI 0 "s_register_operand" "")
5727 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5728 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5729 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5730 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
5732 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5736 [(set (match_operand:DI 0 "s_register_operand" "")
5737 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5738 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5739 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5740 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
5742 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5746 [(set (match_operand:DI 0 "s_register_operand" "")
5747 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5748 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5749 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5750 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
5752 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
5756 [(set (match_operand:DI 0 "s_register_operand" "")
5757 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
5758 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5759 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
5760 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
5762 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
5766 [(set (match_operand:DI 0 "s_register_operand" "")
5767 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
5768 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5769 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
5770 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
5772 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
5776 [(set (match_operand:DI 0 "s_register_operand" "")
5777 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
5778 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
5779 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
5780 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
5782 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));