1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2019 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, w, ?r,?w,?r, ?Us,*r")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2 || which_alternative == 3)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 4: return output_move_neon (operands);
57 case 2: case 3: gcc_unreachable ();
58 case 5: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 6: return "vmov\t%P0, %Q1, %R1 @ <mode>";
61 default: return output_move_double (operands, true, NULL);
64 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
65 neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\
66 neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\
68 (set_attr "length" "4,4,4,4,4,4,4,8,8,8")
69 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,1020,*,*")
70 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,1018,*,*")
71 (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")])
73 (define_insn "*neon_mov<mode>"
74 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
75 "=w,Un,w, w, w, ?r,?w,?r,?r, ?Us")
76 (match_operand:VQXMOV 1 "general_operand"
77 " w,w, Dm,DN,Uni, w, r, r, Usi, r"))]
79 && (register_operand (operands[0], <MODE>mode)
80 || register_operand (operands[1], <MODE>mode))"
82 if (which_alternative == 2 || which_alternative == 3)
85 static char templ[40];
87 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
88 &operands[1], &width);
90 gcc_assert (is_valid != 0);
93 return "vmov.f32\t%q0, %1 @ <mode>";
95 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
100 switch (which_alternative)
102 case 0: return "vmov\t%q0, %q1 @ <mode>";
103 case 1: case 4: return output_move_neon (operands);
104 case 2: case 3: gcc_unreachable ();
105 case 5: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
106 case 6: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
107 default: return output_move_quad (operands);
110 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
111 neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\
112 neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg")
113 (set_attr "length" "4,8,4,4,8,8,8,16,8,16")
114 (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*")
115 (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*")
116 (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")])
118 /* We define these mov expanders to match the standard mov$a optab to prevent
119 the mid-end from trying to do a subreg for these modes which is the most
120 inefficient way to expand the move. Also big-endian subreg's aren't
121 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
122 Without these RTL generation patterns the mid-end would attempt to take a
123 sub-reg and may ICE if it can't. */
125 (define_expand "movti"
126 [(set (match_operand:TI 0 "nonimmediate_operand")
127 (match_operand:TI 1 "general_operand"))]
130 if (can_create_pseudo_p ())
132 if (!REG_P (operands[0]))
133 operands[1] = force_reg (TImode, operands[1]);
137 (define_expand "mov<mode>"
138 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
139 (match_operand:VSTRUCT 1 "general_operand"))]
142 if (can_create_pseudo_p ())
144 if (!REG_P (operands[0]))
145 operands[1] = force_reg (<MODE>mode, operands[1]);
149 (define_expand "mov<mode>"
150 [(set (match_operand:VH 0 "s_register_operand")
151 (match_operand:VH 1 "s_register_operand"))]
154 if (can_create_pseudo_p ())
156 if (!REG_P (operands[0]))
157 operands[1] = force_reg (<MODE>mode, operands[1]);
161 (define_insn "*neon_mov<mode>"
162 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
163 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
165 && (register_operand (operands[0], <MODE>mode)
166 || register_operand (operands[1], <MODE>mode))"
168 switch (which_alternative)
171 case 1: case 2: return output_move_neon (operands);
172 default: gcc_unreachable ();
175 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
176 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
179 [(set (match_operand:EI 0 "s_register_operand" "")
180 (match_operand:EI 1 "s_register_operand" ""))]
181 "TARGET_NEON && reload_completed"
182 [(set (match_dup 0) (match_dup 1))
183 (set (match_dup 2) (match_dup 3))]
185 int rdest = REGNO (operands[0]);
186 int rsrc = REGNO (operands[1]);
189 dest[0] = gen_rtx_REG (TImode, rdest);
190 src[0] = gen_rtx_REG (TImode, rsrc);
191 dest[1] = gen_rtx_REG (DImode, rdest + 4);
192 src[1] = gen_rtx_REG (DImode, rsrc + 4);
194 neon_disambiguate_copy (operands, dest, src, 2);
198 [(set (match_operand:OI 0 "s_register_operand" "")
199 (match_operand:OI 1 "s_register_operand" ""))]
200 "TARGET_NEON && reload_completed"
201 [(set (match_dup 0) (match_dup 1))
202 (set (match_dup 2) (match_dup 3))]
204 int rdest = REGNO (operands[0]);
205 int rsrc = REGNO (operands[1]);
208 dest[0] = gen_rtx_REG (TImode, rdest);
209 src[0] = gen_rtx_REG (TImode, rsrc);
210 dest[1] = gen_rtx_REG (TImode, rdest + 4);
211 src[1] = gen_rtx_REG (TImode, rsrc + 4);
213 neon_disambiguate_copy (operands, dest, src, 2);
217 [(set (match_operand:CI 0 "s_register_operand" "")
218 (match_operand:CI 1 "s_register_operand" ""))]
219 "TARGET_NEON && reload_completed"
220 [(set (match_dup 0) (match_dup 1))
221 (set (match_dup 2) (match_dup 3))
222 (set (match_dup 4) (match_dup 5))]
224 int rdest = REGNO (operands[0]);
225 int rsrc = REGNO (operands[1]);
228 dest[0] = gen_rtx_REG (TImode, rdest);
229 src[0] = gen_rtx_REG (TImode, rsrc);
230 dest[1] = gen_rtx_REG (TImode, rdest + 4);
231 src[1] = gen_rtx_REG (TImode, rsrc + 4);
232 dest[2] = gen_rtx_REG (TImode, rdest + 8);
233 src[2] = gen_rtx_REG (TImode, rsrc + 8);
235 neon_disambiguate_copy (operands, dest, src, 3);
239 [(set (match_operand:XI 0 "s_register_operand" "")
240 (match_operand:XI 1 "s_register_operand" ""))]
241 "TARGET_NEON && reload_completed"
242 [(set (match_dup 0) (match_dup 1))
243 (set (match_dup 2) (match_dup 3))
244 (set (match_dup 4) (match_dup 5))
245 (set (match_dup 6) (match_dup 7))]
247 int rdest = REGNO (operands[0]);
248 int rsrc = REGNO (operands[1]);
251 dest[0] = gen_rtx_REG (TImode, rdest);
252 src[0] = gen_rtx_REG (TImode, rsrc);
253 dest[1] = gen_rtx_REG (TImode, rdest + 4);
254 src[1] = gen_rtx_REG (TImode, rsrc + 4);
255 dest[2] = gen_rtx_REG (TImode, rdest + 8);
256 src[2] = gen_rtx_REG (TImode, rsrc + 8);
257 dest[3] = gen_rtx_REG (TImode, rdest + 12);
258 src[3] = gen_rtx_REG (TImode, rsrc + 12);
260 neon_disambiguate_copy (operands, dest, src, 4);
263 (define_expand "movmisalign<mode>"
264 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
265 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
266 UNSPEC_MISALIGNED_ACCESS))]
267 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
270 /* This pattern is not permitted to fail during expansion: if both arguments
271 are non-registers (e.g. memory := constant, which can be created by the
272 auto-vectorizer), force operand 1 into a register. */
273 if (!s_register_operand (operands[0], <MODE>mode)
274 && !s_register_operand (operands[1], <MODE>mode))
275 operands[1] = force_reg (<MODE>mode, operands[1]);
277 if (s_register_operand (operands[0], <MODE>mode))
278 adjust_mem = operands[1];
280 adjust_mem = operands[0];
282 /* Legitimize address. */
283 if (!neon_vector_mem_operand (adjust_mem, 2, true))
284 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
288 (define_insn "*movmisalign<mode>_neon_store"
289 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
290 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
291 UNSPEC_MISALIGNED_ACCESS))]
292 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
293 "vst1.<V_sz_elem>\t{%P1}, %A0"
294 [(set_attr "type" "neon_store1_1reg<q>")])
296 (define_insn "*movmisalign<mode>_neon_load"
297 [(set (match_operand:VDX 0 "s_register_operand" "=w")
298 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
300 UNSPEC_MISALIGNED_ACCESS))]
301 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
302 "vld1.<V_sz_elem>\t{%P0}, %A1"
303 [(set_attr "type" "neon_load1_1reg<q>")])
305 (define_insn "*movmisalign<mode>_neon_store"
306 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
307 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
308 UNSPEC_MISALIGNED_ACCESS))]
309 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
310 "vst1.<V_sz_elem>\t{%q1}, %A0"
311 [(set_attr "type" "neon_store1_1reg<q>")])
313 (define_insn "*movmisalign<mode>_neon_load"
314 [(set (match_operand:VQX 0 "s_register_operand" "=w")
315 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
317 UNSPEC_MISALIGNED_ACCESS))]
318 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
319 "vld1.<V_sz_elem>\t{%q0}, %A1"
320 [(set_attr "type" "neon_load1_1reg<q>")])
322 (define_insn "@vec_set<mode>_internal"
323 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
325 (vec_duplicate:VD_LANE
326 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
327 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
328 (match_operand:SI 2 "immediate_operand" "i,i")))]
331 int elt = ffs ((int) INTVAL (operands[2])) - 1;
332 if (BYTES_BIG_ENDIAN)
333 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
334 operands[2] = GEN_INT (elt);
336 if (which_alternative == 0)
337 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
339 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
341 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
343 (define_insn "@vec_set<mode>_internal"
344 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
347 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
348 (match_operand:VQ2 3 "s_register_operand" "0,0")
349 (match_operand:SI 2 "immediate_operand" "i,i")))]
352 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
353 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
354 int elt = elem % half_elts;
355 int hi = (elem / half_elts) * 2;
356 int regno = REGNO (operands[0]);
358 if (BYTES_BIG_ENDIAN)
359 elt = half_elts - 1 - elt;
361 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
362 operands[2] = GEN_INT (elt);
364 if (which_alternative == 0)
365 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
367 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
369 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
372 (define_insn "@vec_set<mode>_internal"
373 [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w")
375 (vec_duplicate:V2DI_ONLY
376 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
377 (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0")
378 (match_operand:SI 2 "immediate_operand" "i,i")))]
381 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
382 int regno = REGNO (operands[0]) + 2 * elem;
384 operands[0] = gen_rtx_REG (DImode, regno);
386 if (which_alternative == 0)
387 return "vld1.64\t%P0, %A1";
389 return "vmov\t%P0, %Q1, %R1";
391 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
394 (define_expand "vec_set<mode>"
395 [(match_operand:VDQ 0 "s_register_operand")
396 (match_operand:<V_elem> 1 "s_register_operand")
397 (match_operand:SI 2 "immediate_operand")]
400 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
401 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
402 GEN_INT (elem), operands[0]));
406 (define_insn "vec_extract<mode><V_elem_l>"
407 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
409 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
410 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
413 if (BYTES_BIG_ENDIAN)
415 int elt = INTVAL (operands[2]);
416 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
417 operands[2] = GEN_INT (elt);
420 if (which_alternative == 0)
421 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
423 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
425 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
428 (define_insn "vec_extract<mode><V_elem_l>"
429 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
431 (match_operand:VQ2 1 "s_register_operand" "w,w")
432 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
435 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
436 int elt = INTVAL (operands[2]) % half_elts;
437 int hi = (INTVAL (operands[2]) / half_elts) * 2;
438 int regno = REGNO (operands[1]);
440 if (BYTES_BIG_ENDIAN)
441 elt = half_elts - 1 - elt;
443 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
444 operands[2] = GEN_INT (elt);
446 if (which_alternative == 0)
447 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
449 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
451 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
454 (define_insn "vec_extractv2didi"
455 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
457 (match_operand:V2DI 1 "s_register_operand" "w,w")
458 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
461 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
463 operands[1] = gen_rtx_REG (DImode, regno);
465 if (which_alternative == 0)
466 return "vst1.64\t{%P1}, %A0 @ v2di";
468 return "vmov\t%Q0, %R0, %P1 @ v2di";
470 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
473 (define_expand "vec_init<mode><V_elem_l>"
474 [(match_operand:VDQ 0 "s_register_operand")
475 (match_operand 1 "" "")]
478 neon_expand_vector_init (operands[0], operands[1]);
482 ;; Doubleword and quadword arithmetic.
484 ;; NOTE: some other instructions also support 64-bit integer
485 ;; element size, which we could potentially use for "long long" operations.
487 (define_insn "*add<mode>3_neon"
488 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
489 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
490 (match_operand:VDQ 2 "s_register_operand" "w")))]
491 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
492 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
494 (if_then_else (match_test "<Is_float_mode>")
495 (const_string "neon_fp_addsub_s<q>")
496 (const_string "neon_add<q>")))]
499 ;; As with SFmode, full support for HFmode vector arithmetic is only available
500 ;; when flag-unsafe-math-optimizations is enabled.
502 (define_insn "add<mode>3"
504 (match_operand:VH 0 "s_register_operand" "=w")
506 (match_operand:VH 1 "s_register_operand" "w")
507 (match_operand:VH 2 "s_register_operand" "w")))]
508 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
509 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
511 (if_then_else (match_test "<Is_float_mode>")
512 (const_string "neon_fp_addsub_s<q>")
513 (const_string "neon_add<q>")))]
516 (define_insn "add<mode>3_fp16"
518 (match_operand:VH 0 "s_register_operand" "=w")
520 (match_operand:VH 1 "s_register_operand" "w")
521 (match_operand:VH 2 "s_register_operand" "w")))]
522 "TARGET_NEON_FP16INST"
523 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
525 (if_then_else (match_test "<Is_float_mode>")
526 (const_string "neon_fp_addsub_s<q>")
527 (const_string "neon_add<q>")))]
530 (define_insn "adddi3_neon"
531 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
532 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
533 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
534 (clobber (reg:CC CC_REGNUM))]
537 switch (which_alternative)
539 case 0: /* fall through */
540 case 3: return "vadd.i64\t%P0, %P1, %P2";
546 default: gcc_unreachable ();
549 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
550 multiple,multiple,multiple")
551 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
552 (set_attr "length" "*,8,8,*,8,8,8")
553 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
556 (define_insn "*sub<mode>3_neon"
557 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
558 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
559 (match_operand:VDQ 2 "s_register_operand" "w")))]
560 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
561 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
563 (if_then_else (match_test "<Is_float_mode>")
564 (const_string "neon_fp_addsub_s<q>")
565 (const_string "neon_sub<q>")))]
568 (define_insn "sub<mode>3"
570 (match_operand:VH 0 "s_register_operand" "=w")
572 (match_operand:VH 1 "s_register_operand" "w")
573 (match_operand:VH 2 "s_register_operand" "w")))]
574 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
575 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
576 [(set_attr "type" "neon_sub<q>")]
579 (define_insn "sub<mode>3_fp16"
581 (match_operand:VH 0 "s_register_operand" "=w")
583 (match_operand:VH 1 "s_register_operand" "w")
584 (match_operand:VH 2 "s_register_operand" "w")))]
585 "TARGET_NEON_FP16INST"
586 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
587 [(set_attr "type" "neon_sub<q>")]
590 (define_insn "subdi3_neon"
591 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
592 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
593 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
594 (clobber (reg:CC CC_REGNUM))]
597 switch (which_alternative)
599 case 0: /* fall through */
600 case 4: return "vsub.i64\t%P0, %P1, %P2";
601 case 1: /* fall through */
602 case 2: /* fall through */
603 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
604 default: gcc_unreachable ();
607 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
608 (set_attr "conds" "*,clob,clob,clob,*")
609 (set_attr "length" "*,8,8,8,*")
610 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
613 (define_insn "*mul<mode>3_neon"
614 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
615 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
616 (match_operand:VDQW 2 "s_register_operand" "w")))]
617 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
618 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
620 (if_then_else (match_test "<Is_float_mode>")
621 (const_string "neon_fp_mul_s<q>")
622 (const_string "neon_mul_<V_elem_ch><q>")))]
625 /* Perform division using multiply-by-reciprocal.
626 Reciprocal is calculated using Newton-Raphson method.
627 Enabled with -funsafe-math-optimizations -freciprocal-math
628 and disabled for -Os since it increases code size . */
630 (define_expand "div<mode>3"
631 [(set (match_operand:VCVTF 0 "s_register_operand")
632 (div:VCVTF (match_operand:VCVTF 1 "s_register_operand")
633 (match_operand:VCVTF 2 "s_register_operand")))]
634 "TARGET_NEON && !optimize_size
635 && flag_reciprocal_math"
637 rtx rec = gen_reg_rtx (<MODE>mode);
638 rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
640 /* Reciprocal estimate. */
641 emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
643 /* Perform 2 iterations of newton-raphson method. */
644 for (int i = 0; i < 2; i++)
646 emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
647 emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
650 /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec. */
651 emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
657 (define_insn "mul<mode>3add<mode>_neon"
658 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
659 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
660 (match_operand:VDQW 3 "s_register_operand" "w"))
661 (match_operand:VDQW 1 "s_register_operand" "0")))]
662 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
663 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
665 (if_then_else (match_test "<Is_float_mode>")
666 (const_string "neon_fp_mla_s<q>")
667 (const_string "neon_mla_<V_elem_ch><q>")))]
670 (define_insn "mul<mode>3add<mode>_neon"
671 [(set (match_operand:VH 0 "s_register_operand" "=w")
672 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
673 (match_operand:VH 3 "s_register_operand" "w"))
674 (match_operand:VH 1 "s_register_operand" "0")))]
675 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
676 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
677 [(set_attr "type" "neon_fp_mla_s<q>")]
680 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
681 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
682 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
683 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
684 (match_operand:VDQW 3 "s_register_operand" "w"))))]
685 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
686 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
688 (if_then_else (match_test "<Is_float_mode>")
689 (const_string "neon_fp_mla_s<q>")
690 (const_string "neon_mla_<V_elem_ch><q>")))]
693 ;; Fused multiply-accumulate
694 ;; We define each insn twice here:
695 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
696 ;; to be able to use when converting to FMA.
697 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
698 (define_insn "fma<VCVTF:mode>4"
699 [(set (match_operand:VCVTF 0 "register_operand" "=w")
700 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
701 (match_operand:VCVTF 2 "register_operand" "w")
702 (match_operand:VCVTF 3 "register_operand" "0")))]
703 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
704 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
705 [(set_attr "type" "neon_fp_mla_s<q>")]
708 (define_insn "fma<VCVTF:mode>4_intrinsic"
709 [(set (match_operand:VCVTF 0 "register_operand" "=w")
710 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
711 (match_operand:VCVTF 2 "register_operand" "w")
712 (match_operand:VCVTF 3 "register_operand" "0")))]
713 "TARGET_NEON && TARGET_FMA"
714 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
715 [(set_attr "type" "neon_fp_mla_s<q>")]
718 (define_insn "fma<VH:mode>4"
719 [(set (match_operand:VH 0 "register_operand" "=w")
721 (match_operand:VH 1 "register_operand" "w")
722 (match_operand:VH 2 "register_operand" "w")
723 (match_operand:VH 3 "register_operand" "0")))]
724 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
725 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
726 [(set_attr "type" "neon_fp_mla_s<q>")]
729 (define_insn "fma<VH:mode>4_intrinsic"
730 [(set (match_operand:VH 0 "register_operand" "=w")
732 (match_operand:VH 1 "register_operand" "w")
733 (match_operand:VH 2 "register_operand" "w")
734 (match_operand:VH 3 "register_operand" "0")))]
735 "TARGET_NEON_FP16INST"
736 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
737 [(set_attr "type" "neon_fp_mla_s<q>")]
740 (define_insn "*fmsub<VCVTF:mode>4"
741 [(set (match_operand:VCVTF 0 "register_operand" "=w")
742 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
743 (match_operand:VCVTF 2 "register_operand" "w")
744 (match_operand:VCVTF 3 "register_operand" "0")))]
745 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
746 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
747 [(set_attr "type" "neon_fp_mla_s<q>")]
750 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
751 [(set (match_operand:VCVTF 0 "register_operand" "=w")
753 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
754 (match_operand:VCVTF 2 "register_operand" "w")
755 (match_operand:VCVTF 3 "register_operand" "0")))]
756 "TARGET_NEON && TARGET_FMA"
757 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
758 [(set_attr "type" "neon_fp_mla_s<q>")]
761 (define_insn "fmsub<VH:mode>4_intrinsic"
762 [(set (match_operand:VH 0 "register_operand" "=w")
764 (neg:VH (match_operand:VH 1 "register_operand" "w"))
765 (match_operand:VH 2 "register_operand" "w")
766 (match_operand:VH 3 "register_operand" "0")))]
767 "TARGET_NEON_FP16INST"
768 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
769 [(set_attr "type" "neon_fp_mla_s<q>")]
772 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
773 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
774 (unspec:VCVTF [(match_operand:VCVTF 1
775 "s_register_operand" "w")]
777 "TARGET_NEON && TARGET_VFP5"
778 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
779 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
782 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
783 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
784 (FIXUORS:<V_cmp_result> (unspec:VCVTF
785 [(match_operand:VCVTF 1 "register_operand" "w")]
787 "TARGET_NEON && TARGET_VFP5"
788 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
789 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
790 (set_attr "predicable" "no")]
793 (define_insn "ior<mode>3"
794 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
795 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
796 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
799 switch (which_alternative)
801 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
802 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
803 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
804 default: gcc_unreachable ();
807 [(set_attr "type" "neon_logic<q>")]
810 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
811 ;; vorr. We support the pseudo-instruction vand instead, because that
812 ;; corresponds to the canonical form the middle-end expects to use for
813 ;; immediate bitwise-ANDs.
815 (define_insn "and<mode>3"
816 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
817 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
818 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
821 switch (which_alternative)
823 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
824 case 1: return neon_output_logic_immediate ("vand", &operands[2],
825 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
826 default: gcc_unreachable ();
829 [(set_attr "type" "neon_logic<q>")]
832 (define_insn "orn<mode>3_neon"
833 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
834 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
835 (match_operand:VDQ 1 "s_register_operand" "w")))]
837 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
838 [(set_attr "type" "neon_logic<q>")]
841 (define_insn "bic<mode>3_neon"
842 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
843 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
844 (match_operand:VDQ 1 "s_register_operand" "w")))]
846 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
847 [(set_attr "type" "neon_logic<q>")]
850 (define_insn "xor<mode>3"
851 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
852 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
853 (match_operand:VDQ 2 "s_register_operand" "w")))]
855 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
856 [(set_attr "type" "neon_logic<q>")]
859 (define_insn "one_cmpl<mode>2"
860 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
861 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
863 "vmvn\t%<V_reg>0, %<V_reg>1"
864 [(set_attr "type" "neon_move<q>")]
867 (define_insn "abs<mode>2"
868 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
869 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
871 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
873 (if_then_else (match_test "<Is_float_mode>")
874 (const_string "neon_fp_abs_s<q>")
875 (const_string "neon_abs<q>")))]
878 (define_insn "neg<mode>2"
879 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
880 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
882 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
884 (if_then_else (match_test "<Is_float_mode>")
885 (const_string "neon_fp_neg_s<q>")
886 (const_string "neon_neg<q>")))]
889 (define_insn "negdi2_neon"
890 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
891 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
892 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
893 (clobber (reg:CC CC_REGNUM))]
896 [(set_attr "length" "8")
897 (set_attr "type" "multiple")]
900 ; Split negdi2_neon for vfp registers
902 [(set (match_operand:DI 0 "s_register_operand" "")
903 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
904 (clobber (match_scratch:DI 2 ""))
905 (clobber (reg:CC CC_REGNUM))]
906 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
907 [(set (match_dup 2) (const_int 0))
908 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
909 (clobber (reg:CC CC_REGNUM))])]
911 if (!REG_P (operands[2]))
912 operands[2] = operands[0];
916 ; Split negdi2_neon for core registers
918 [(set (match_operand:DI 0 "s_register_operand" "")
919 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
920 (clobber (match_scratch:DI 2 ""))
921 (clobber (reg:CC CC_REGNUM))]
922 "TARGET_32BIT && reload_completed
923 && arm_general_register_operand (operands[0], DImode)"
924 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
925 (clobber (reg:CC CC_REGNUM))])]
929 (define_insn "<absneg_str><mode>2"
930 [(set (match_operand:VH 0 "s_register_operand" "=w")
931 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
932 "TARGET_NEON_FP16INST"
933 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
934 [(set_attr "type" "neon_abs<q>")]
937 (define_expand "neon_v<absneg_str><mode>"
939 (match_operand:VH 0 "s_register_operand")
940 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
941 "TARGET_NEON_FP16INST"
943 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
947 (define_insn "neon_v<fp16_rnd_str><mode>"
948 [(set (match_operand:VH 0 "s_register_operand" "=w")
950 [(match_operand:VH 1 "s_register_operand" "w")]
952 "TARGET_NEON_FP16INST"
953 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
954 [(set_attr "type" "neon_fp_round_s<q>")]
957 (define_insn "neon_vrsqrte<mode>"
958 [(set (match_operand:VH 0 "s_register_operand" "=w")
960 [(match_operand:VH 1 "s_register_operand" "w")]
962 "TARGET_NEON_FP16INST"
963 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
964 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
967 (define_insn "*umin<mode>3_neon"
968 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
969 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
970 (match_operand:VDQIW 2 "s_register_operand" "w")))]
972 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
973 [(set_attr "type" "neon_minmax<q>")]
976 (define_insn "*umax<mode>3_neon"
977 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
978 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
979 (match_operand:VDQIW 2 "s_register_operand" "w")))]
981 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
982 [(set_attr "type" "neon_minmax<q>")]
985 (define_insn "*smin<mode>3_neon"
986 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
987 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
988 (match_operand:VDQW 2 "s_register_operand" "w")))]
990 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
992 (if_then_else (match_test "<Is_float_mode>")
993 (const_string "neon_fp_minmax_s<q>")
994 (const_string "neon_minmax<q>")))]
997 (define_insn "*smax<mode>3_neon"
998 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
999 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1000 (match_operand:VDQW 2 "s_register_operand" "w")))]
1002 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1004 (if_then_else (match_test "<Is_float_mode>")
1005 (const_string "neon_fp_minmax_s<q>")
1006 (const_string "neon_minmax<q>")))]
1009 ; TODO: V2DI shifts are current disabled because there are bugs in the
1010 ; generic vectorizer code. It ends up creating a V2DI constructor with
1013 (define_insn "vashl<mode>3"
1014 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1015 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1016 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
1019 switch (which_alternative)
1021 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1022 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1024 VALID_NEON_QREG_MODE (<MODE>mode),
1026 default: gcc_unreachable ();
1029 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1032 (define_insn "vashr<mode>3_imm"
1033 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1034 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1035 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
1038 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1039 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1042 [(set_attr "type" "neon_shift_imm<q>")]
1045 (define_insn "vlshr<mode>3_imm"
1046 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1047 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1048 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
1051 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1052 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1055 [(set_attr "type" "neon_shift_imm<q>")]
1058 ; Used for implementing logical shift-right, which is a left-shift by a negative
1059 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1060 ; above, but using an unspec in case GCC tries anything tricky with negative
1063 (define_insn "ashl<mode>3_signed"
1064 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1065 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1066 (match_operand:VDQI 2 "s_register_operand" "w")]
1067 UNSPEC_ASHIFT_SIGNED))]
1069 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1070 [(set_attr "type" "neon_shift_reg<q>")]
1073 ; Used for implementing logical shift-right, which is a left-shift by a negative
1074 ; amount, with unsigned operands.
1076 (define_insn "ashl<mode>3_unsigned"
1077 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1078 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1079 (match_operand:VDQI 2 "s_register_operand" "w")]
1080 UNSPEC_ASHIFT_UNSIGNED))]
1082 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1083 [(set_attr "type" "neon_shift_reg<q>")]
1086 (define_expand "vashr<mode>3"
1087 [(set (match_operand:VDQIW 0 "s_register_operand")
1088 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
1089 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
1092 if (s_register_operand (operands[2], <MODE>mode))
1094 rtx neg = gen_reg_rtx (<MODE>mode);
1095 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1096 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1099 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1103 (define_expand "vlshr<mode>3"
1104 [(set (match_operand:VDQIW 0 "s_register_operand")
1105 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
1106 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
1109 if (s_register_operand (operands[2], <MODE>mode))
1111 rtx neg = gen_reg_rtx (<MODE>mode);
1112 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1113 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1116 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1122 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1123 ;; leaving the upper half uninitalized. This is OK since the shift
1124 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1125 ;; data flow analysis however, we pretend the full register is set
1127 (define_insn "neon_load_count"
1128 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1129 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1130 UNSPEC_LOAD_COUNT))]
1133 vld1.32\t{%P0[0]}, %A1
1134 vmov.32\t%P0[0], %1"
1135 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1138 ;; Widening operations
1140 (define_expand "widen_ssum<mode>3"
1141 [(set (match_operand:<V_double_width> 0 "s_register_operand")
1142 (plus:<V_double_width>
1143 (sign_extend:<V_double_width>
1144 (match_operand:VQI 1 "s_register_operand"))
1145 (match_operand:<V_double_width> 2 "s_register_operand")))]
1148 machine_mode mode = GET_MODE (operands[1]);
1151 p1 = arm_simd_vect_par_cnst_half (mode, false);
1152 p2 = arm_simd_vect_par_cnst_half (mode, true);
1154 if (operands[0] != operands[2])
1155 emit_move_insn (operands[0], operands[2]);
1157 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1161 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1169 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1170 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1171 (plus:<V_double_width>
1172 (sign_extend:<V_double_width>
1173 (vec_select:<V_HALF>
1174 (match_operand:VQI 1 "s_register_operand" "%w")
1175 (match_operand:VQI 2 "vect_par_constant_low" "")))
1176 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1179 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1180 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1182 [(set_attr "type" "neon_add_widen")])
1184 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1185 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1186 (plus:<V_double_width>
1187 (sign_extend:<V_double_width>
1188 (vec_select:<V_HALF>
1189 (match_operand:VQI 1 "s_register_operand" "%w")
1190 (match_operand:VQI 2 "vect_par_constant_high" "")))
1191 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1194 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1195 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1197 [(set_attr "type" "neon_add_widen")])
1199 (define_insn "widen_ssum<mode>3"
1200 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1202 (sign_extend:<V_widen>
1203 (match_operand:VW 1 "s_register_operand" "%w"))
1204 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1206 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1207 [(set_attr "type" "neon_add_widen")]
1210 (define_expand "widen_usum<mode>3"
1211 [(set (match_operand:<V_double_width> 0 "s_register_operand")
1212 (plus:<V_double_width>
1213 (zero_extend:<V_double_width>
1214 (match_operand:VQI 1 "s_register_operand"))
1215 (match_operand:<V_double_width> 2 "s_register_operand")))]
1218 machine_mode mode = GET_MODE (operands[1]);
1221 p1 = arm_simd_vect_par_cnst_half (mode, false);
1222 p2 = arm_simd_vect_par_cnst_half (mode, true);
1224 if (operands[0] != operands[2])
1225 emit_move_insn (operands[0], operands[2]);
1227 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1231 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1239 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1240 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1241 (plus:<V_double_width>
1242 (zero_extend:<V_double_width>
1243 (vec_select:<V_HALF>
1244 (match_operand:VQI 1 "s_register_operand" "%w")
1245 (match_operand:VQI 2 "vect_par_constant_low" "")))
1246 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1249 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1250 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1252 [(set_attr "type" "neon_add_widen")])
1254 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1255 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1256 (plus:<V_double_width>
1257 (zero_extend:<V_double_width>
1258 (vec_select:<V_HALF>
1259 (match_operand:VQI 1 "s_register_operand" "%w")
1260 (match_operand:VQI 2 "vect_par_constant_high" "")))
1261 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1264 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1265 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1267 [(set_attr "type" "neon_add_widen")])
1269 (define_insn "widen_usum<mode>3"
1270 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1271 (plus:<V_widen> (zero_extend:<V_widen>
1272 (match_operand:VW 1 "s_register_operand" "%w"))
1273 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1275 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1276 [(set_attr "type" "neon_add_widen")]
1279 ;; Helpers for quad-word reduction operations
1281 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1282 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1283 ; N/2-element vector.
1285 (define_insn "quad_halves_<code>v4si"
1286 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1288 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1289 (parallel [(const_int 0) (const_int 1)]))
1290 (vec_select:V2SI (match_dup 1)
1291 (parallel [(const_int 2) (const_int 3)]))))]
1293 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1294 [(set_attr "vqh_mnem" "<VQH_mnem>")
1295 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1298 (define_insn "quad_halves_<code>v4sf"
1299 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1301 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1302 (parallel [(const_int 0) (const_int 1)]))
1303 (vec_select:V2SF (match_dup 1)
1304 (parallel [(const_int 2) (const_int 3)]))))]
1305 "TARGET_NEON && flag_unsafe_math_optimizations"
1306 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1307 [(set_attr "vqh_mnem" "<VQH_mnem>")
1308 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1311 (define_insn "quad_halves_<code>v8hi"
1312 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1314 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1315 (parallel [(const_int 0) (const_int 1)
1316 (const_int 2) (const_int 3)]))
1317 (vec_select:V4HI (match_dup 1)
1318 (parallel [(const_int 4) (const_int 5)
1319 (const_int 6) (const_int 7)]))))]
1321 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1322 [(set_attr "vqh_mnem" "<VQH_mnem>")
1323 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1326 (define_insn "quad_halves_<code>v16qi"
1327 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1329 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1330 (parallel [(const_int 0) (const_int 1)
1331 (const_int 2) (const_int 3)
1332 (const_int 4) (const_int 5)
1333 (const_int 6) (const_int 7)]))
1334 (vec_select:V8QI (match_dup 1)
1335 (parallel [(const_int 8) (const_int 9)
1336 (const_int 10) (const_int 11)
1337 (const_int 12) (const_int 13)
1338 (const_int 14) (const_int 15)]))))]
1340 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1341 [(set_attr "vqh_mnem" "<VQH_mnem>")
1342 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1345 (define_expand "move_hi_quad_<mode>"
1346 [(match_operand:ANY128 0 "s_register_operand")
1347 (match_operand:<V_HALF> 1 "s_register_operand")]
1350 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1351 GET_MODE_SIZE (<V_HALF>mode)),
1356 (define_expand "move_lo_quad_<mode>"
1357 [(match_operand:ANY128 0 "s_register_operand")
1358 (match_operand:<V_HALF> 1 "s_register_operand")]
1361 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1367 ;; Reduction operations
1369 (define_expand "reduc_plus_scal_<mode>"
1370 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1371 (match_operand:VD 1 "s_register_operand")]
1372 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1374 rtx vec = gen_reg_rtx (<MODE>mode);
1375 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1376 &gen_neon_vpadd_internal<mode>);
1377 /* The same result is actually computed into every element. */
1378 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1382 (define_expand "reduc_plus_scal_<mode>"
1383 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1384 (match_operand:VQ 1 "s_register_operand")]
1385 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1386 && !BYTES_BIG_ENDIAN"
1388 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1390 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1391 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1396 (define_expand "reduc_plus_scal_v2di"
1397 [(match_operand:DI 0 "nonimmediate_operand")
1398 (match_operand:V2DI 1 "s_register_operand")]
1399 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1401 rtx vec = gen_reg_rtx (V2DImode);
1403 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1404 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1409 (define_insn "arm_reduc_plus_internal_v2di"
1410 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1411 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1413 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1414 "vadd.i64\t%e0, %e1, %f1"
1415 [(set_attr "type" "neon_add_q")]
1418 (define_expand "reduc_smin_scal_<mode>"
1419 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1420 (match_operand:VD 1 "s_register_operand")]
1421 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1423 rtx vec = gen_reg_rtx (<MODE>mode);
1425 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1426 &gen_neon_vpsmin<mode>);
1427 /* The result is computed into every element of the vector. */
1428 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1432 (define_expand "reduc_smin_scal_<mode>"
1433 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1434 (match_operand:VQ 1 "s_register_operand")]
1435 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1436 && !BYTES_BIG_ENDIAN"
1438 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1440 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1441 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1446 (define_expand "reduc_smax_scal_<mode>"
1447 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1448 (match_operand:VD 1 "s_register_operand")]
1449 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1451 rtx vec = gen_reg_rtx (<MODE>mode);
1452 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1453 &gen_neon_vpsmax<mode>);
1454 /* The result is computed into every element of the vector. */
1455 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1459 (define_expand "reduc_smax_scal_<mode>"
1460 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1461 (match_operand:VQ 1 "s_register_operand")]
1462 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1463 && !BYTES_BIG_ENDIAN"
1465 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1467 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1468 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1473 (define_expand "reduc_umin_scal_<mode>"
1474 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1475 (match_operand:VDI 1 "s_register_operand")]
1478 rtx vec = gen_reg_rtx (<MODE>mode);
1479 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1480 &gen_neon_vpumin<mode>);
1481 /* The result is computed into every element of the vector. */
1482 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1486 (define_expand "reduc_umin_scal_<mode>"
1487 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1488 (match_operand:VQI 1 "s_register_operand")]
1489 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1491 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1493 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1494 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1499 (define_expand "reduc_umax_scal_<mode>"
1500 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1501 (match_operand:VDI 1 "s_register_operand")]
1504 rtx vec = gen_reg_rtx (<MODE>mode);
1505 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1506 &gen_neon_vpumax<mode>);
1507 /* The result is computed into every element of the vector. */
1508 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1512 (define_expand "reduc_umax_scal_<mode>"
1513 [(match_operand:<V_elem> 0 "nonimmediate_operand")
1514 (match_operand:VQI 1 "s_register_operand")]
1515 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1517 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1519 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1520 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1525 (define_insn "neon_vpadd_internal<mode>"
1526 [(set (match_operand:VD 0 "s_register_operand" "=w")
1527 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1528 (match_operand:VD 2 "s_register_operand" "w")]
1531 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1532 ;; Assume this schedules like vadd.
1534 (if_then_else (match_test "<Is_float_mode>")
1535 (const_string "neon_fp_reduc_add_s<q>")
1536 (const_string "neon_reduc_add<q>")))]
1539 (define_insn "neon_vpaddv4hf"
1541 (match_operand:V4HF 0 "s_register_operand" "=w")
1542 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1543 (match_operand:V4HF 2 "s_register_operand" "w")]
1545 "TARGET_NEON_FP16INST"
1546 "vpadd.f16\t%P0, %P1, %P2"
1547 [(set_attr "type" "neon_reduc_add")]
1550 (define_insn "neon_vpsmin<mode>"
1551 [(set (match_operand:VD 0 "s_register_operand" "=w")
1552 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1553 (match_operand:VD 2 "s_register_operand" "w")]
1556 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1558 (if_then_else (match_test "<Is_float_mode>")
1559 (const_string "neon_fp_reduc_minmax_s<q>")
1560 (const_string "neon_reduc_minmax<q>")))]
1563 (define_insn "neon_vpsmax<mode>"
1564 [(set (match_operand:VD 0 "s_register_operand" "=w")
1565 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1566 (match_operand:VD 2 "s_register_operand" "w")]
1569 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1571 (if_then_else (match_test "<Is_float_mode>")
1572 (const_string "neon_fp_reduc_minmax_s<q>")
1573 (const_string "neon_reduc_minmax<q>")))]
1576 (define_insn "neon_vpumin<mode>"
1577 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1578 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1579 (match_operand:VDI 2 "s_register_operand" "w")]
1582 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1583 [(set_attr "type" "neon_reduc_minmax<q>")]
1586 (define_insn "neon_vpumax<mode>"
1587 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1588 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1589 (match_operand:VDI 2 "s_register_operand" "w")]
1592 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1593 [(set_attr "type" "neon_reduc_minmax<q>")]
1596 ;; Saturating arithmetic
1598 ; NOTE: Neon supports many more saturating variants of instructions than the
1599 ; following, but these are all GCC currently understands.
1600 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1601 ; yet either, although these patterns may be used by intrinsics when they're
1604 (define_insn "*ss_add<mode>_neon"
1605 [(set (match_operand:VD 0 "s_register_operand" "=w")
1606 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1607 (match_operand:VD 2 "s_register_operand" "w")))]
1609 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1610 [(set_attr "type" "neon_qadd<q>")]
1613 (define_insn "*us_add<mode>_neon"
1614 [(set (match_operand:VD 0 "s_register_operand" "=w")
1615 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1616 (match_operand:VD 2 "s_register_operand" "w")))]
1618 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1619 [(set_attr "type" "neon_qadd<q>")]
1622 (define_insn "*ss_sub<mode>_neon"
1623 [(set (match_operand:VD 0 "s_register_operand" "=w")
1624 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1625 (match_operand:VD 2 "s_register_operand" "w")))]
1627 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1628 [(set_attr "type" "neon_qsub<q>")]
1631 (define_insn "*us_sub<mode>_neon"
1632 [(set (match_operand:VD 0 "s_register_operand" "=w")
1633 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1634 (match_operand:VD 2 "s_register_operand" "w")))]
1636 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1637 [(set_attr "type" "neon_qsub<q>")]
1640 ;; Conditional instructions. These are comparisons with conditional moves for
1641 ;; vectors. They perform the assignment:
1643 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1645 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1648 (define_expand "vcond<mode><mode>"
1649 [(set (match_operand:VDQW 0 "s_register_operand")
1651 (match_operator 3 "comparison_operator"
1652 [(match_operand:VDQW 4 "s_register_operand")
1653 (match_operand:VDQW 5 "nonmemory_operand")])
1654 (match_operand:VDQW 1 "s_register_operand")
1655 (match_operand:VDQW 2 "s_register_operand")))]
1656 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1659 int use_zero_form = 0;
1660 int swap_bsl_operands = 0;
1661 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1662 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1664 rtx (*base_comparison) (rtx, rtx, rtx);
1665 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1667 switch (GET_CODE (operands[3]))
1674 if (operands[5] == CONST0_RTX (<MODE>mode))
1681 if (!REG_P (operands[5]))
1682 operands[5] = force_reg (<MODE>mode, operands[5]);
1685 switch (GET_CODE (operands[3]))
1695 base_comparison = gen_neon_vcge<mode>;
1696 complimentary_comparison = gen_neon_vcgt<mode>;
1704 base_comparison = gen_neon_vcgt<mode>;
1705 complimentary_comparison = gen_neon_vcge<mode>;
1710 base_comparison = gen_neon_vceq<mode>;
1711 complimentary_comparison = gen_neon_vceq<mode>;
1717 switch (GET_CODE (operands[3]))
1724 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1725 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1731 Note that there also exist direct comparison against 0 forms,
1732 so catch those as a special case. */
1736 switch (GET_CODE (operands[3]))
1739 base_comparison = gen_neon_vclt<mode>;
1742 base_comparison = gen_neon_vcle<mode>;
1745 /* Do nothing, other zero form cases already have the correct
1752 emit_insn (base_comparison (mask, operands[4], operands[5]));
1754 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1761 /* Vector compare returns false for lanes which are unordered, so if we use
1762 the inverse of the comparison we actually want to emit, then
1763 swap the operands to BSL, we will end up with the correct result.
1764 Note that a NE NaN and NaN NE b are true for all a, b.
1766 Our transformations are:
1771 a NE b -> !(a EQ b) */
1774 emit_insn (base_comparison (mask, operands[4], operands[5]));
1776 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1778 swap_bsl_operands = 1;
1781 /* We check (a > b || b > a). combining these comparisons give us
1782 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1783 will then give us (a == b || a UNORDERED b) as intended. */
1785 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1786 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1787 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1788 swap_bsl_operands = 1;
1791 /* Operands are ORDERED iff (a > b || b >= a).
1792 Swapping the operands to BSL will give the UNORDERED case. */
1793 swap_bsl_operands = 1;
1796 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1797 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1798 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1804 if (swap_bsl_operands)
1805 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1808 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1813 (define_expand "vcondu<mode><mode>"
1814 [(set (match_operand:VDQIW 0 "s_register_operand")
1816 (match_operator 3 "arm_comparison_operator"
1817 [(match_operand:VDQIW 4 "s_register_operand")
1818 (match_operand:VDQIW 5 "s_register_operand")])
1819 (match_operand:VDQIW 1 "s_register_operand")
1820 (match_operand:VDQIW 2 "s_register_operand")))]
1824 int inverse = 0, immediate_zero = 0;
1826 mask = gen_reg_rtx (<V_cmp_result>mode);
1828 if (operands[5] == CONST0_RTX (<MODE>mode))
1830 else if (!REG_P (operands[5]))
1831 operands[5] = force_reg (<MODE>mode, operands[5]);
1833 switch (GET_CODE (operands[3]))
1836 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1840 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1844 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1849 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1851 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1856 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1858 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1862 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1871 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1874 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1880 ;; Patterns for builtins.
1882 ; good for plain vadd, vaddq.
1884 (define_expand "neon_vadd<mode>"
1885 [(match_operand:VCVTF 0 "s_register_operand")
1886 (match_operand:VCVTF 1 "s_register_operand")
1887 (match_operand:VCVTF 2 "s_register_operand")]
1890 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1891 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1893 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1898 (define_expand "neon_vadd<mode>"
1899 [(match_operand:VH 0 "s_register_operand")
1900 (match_operand:VH 1 "s_register_operand")
1901 (match_operand:VH 2 "s_register_operand")]
1902 "TARGET_NEON_FP16INST"
1904 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
1908 (define_expand "neon_vsub<mode>"
1909 [(match_operand:VH 0 "s_register_operand")
1910 (match_operand:VH 1 "s_register_operand")
1911 (match_operand:VH 2 "s_register_operand")]
1912 "TARGET_NEON_FP16INST"
1914 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
1918 ; Note that NEON operations don't support the full IEEE 754 standard: in
1919 ; particular, denormal values are flushed to zero. This means that GCC cannot
1920 ; use those instructions for autovectorization, etc. unless
1921 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1922 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
1923 ; header) must work in either case: if -funsafe-math-optimizations is given,
1924 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1925 ; expand to unspecs (which may potentially limit the extent to which they might
1926 ; be optimized by generic code).
1928 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1930 (define_insn "neon_vadd<mode>_unspec"
1931 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1932 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1933 (match_operand:VCVTF 2 "s_register_operand" "w")]
1936 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1938 (if_then_else (match_test "<Is_float_mode>")
1939 (const_string "neon_fp_addsub_s<q>")
1940 (const_string "neon_add<q>")))]
1943 (define_insn "neon_vaddl<sup><mode>"
1944 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1945 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1946 (match_operand:VDI 2 "s_register_operand" "w")]
1949 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1950 [(set_attr "type" "neon_add_long")]
1953 (define_insn "neon_vaddw<sup><mode>"
1954 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1955 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1956 (match_operand:VDI 2 "s_register_operand" "w")]
1959 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1960 [(set_attr "type" "neon_add_widen")]
1965 (define_insn "neon_v<r>hadd<sup><mode>"
1966 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1967 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1968 (match_operand:VDQIW 2 "s_register_operand" "w")]
1971 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1972 [(set_attr "type" "neon_add_halve_q")]
1975 (define_insn "neon_vqadd<sup><mode>"
1976 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1977 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1978 (match_operand:VDQIX 2 "s_register_operand" "w")]
1981 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1982 [(set_attr "type" "neon_qadd<q>")]
1985 (define_insn "neon_v<r>addhn<mode>"
1986 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1987 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1988 (match_operand:VN 2 "s_register_operand" "w")]
1991 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1992 [(set_attr "type" "neon_add_halve_narrow_q")]
1995 ;; Polynomial and Float multiplication.
1996 (define_insn "neon_vmul<pf><mode>"
1997 [(set (match_operand:VPF 0 "s_register_operand" "=w")
1998 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1999 (match_operand:VPF 2 "s_register_operand" "w")]
2002 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2004 (if_then_else (match_test "<Is_float_mode>")
2005 (const_string "neon_fp_mul_s<q>")
2006 (const_string "neon_mul_<V_elem_ch><q>")))]
2009 (define_insn "mul<mode>3"
2011 (match_operand:VH 0 "s_register_operand" "=w")
2013 (match_operand:VH 1 "s_register_operand" "w")
2014 (match_operand:VH 2 "s_register_operand" "w")))]
2015 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2016 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2017 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2020 (define_insn "neon_vmulf<mode>"
2022 (match_operand:VH 0 "s_register_operand" "=w")
2024 (match_operand:VH 1 "s_register_operand" "w")
2025 (match_operand:VH 2 "s_register_operand" "w")))]
2026 "TARGET_NEON_FP16INST"
2027 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2028 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2031 (define_expand "neon_vmla<mode>"
2032 [(match_operand:VDQW 0 "s_register_operand")
2033 (match_operand:VDQW 1 "s_register_operand")
2034 (match_operand:VDQW 2 "s_register_operand")
2035 (match_operand:VDQW 3 "s_register_operand")]
2038 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2039 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2040 operands[2], operands[3]));
2042 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2043 operands[2], operands[3]));
2047 (define_expand "neon_vfma<VCVTF:mode>"
2048 [(match_operand:VCVTF 0 "s_register_operand")
2049 (match_operand:VCVTF 1 "s_register_operand")
2050 (match_operand:VCVTF 2 "s_register_operand")
2051 (match_operand:VCVTF 3 "s_register_operand")]
2052 "TARGET_NEON && TARGET_FMA"
2054 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2059 (define_expand "neon_vfma<VH:mode>"
2060 [(match_operand:VH 0 "s_register_operand")
2061 (match_operand:VH 1 "s_register_operand")
2062 (match_operand:VH 2 "s_register_operand")
2063 (match_operand:VH 3 "s_register_operand")]
2064 "TARGET_NEON_FP16INST"
2066 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2071 (define_expand "neon_vfms<VCVTF:mode>"
2072 [(match_operand:VCVTF 0 "s_register_operand")
2073 (match_operand:VCVTF 1 "s_register_operand")
2074 (match_operand:VCVTF 2 "s_register_operand")
2075 (match_operand:VCVTF 3 "s_register_operand")]
2076 "TARGET_NEON && TARGET_FMA"
2078 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2083 (define_expand "neon_vfms<VH:mode>"
2084 [(match_operand:VH 0 "s_register_operand")
2085 (match_operand:VH 1 "s_register_operand")
2086 (match_operand:VH 2 "s_register_operand")
2087 (match_operand:VH 3 "s_register_operand")]
2088 "TARGET_NEON_FP16INST"
2090 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2095 ;; The expand RTL structure here is not important.
2096 ;; We use the gen_* functions anyway.
2097 ;; We just need something to wrap the iterators around.
2099 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2100 [(set (match_operand:VCVTF 0 "s_register_operand")
2102 [(match_operand:VCVTF 1 "s_register_operand")
2104 (match_operand:<VFML> 2 "s_register_operand")
2105 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2108 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2109 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2117 (define_insn "vfmal_low<mode>_intrinsic"
2118 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2121 (vec_select:<VFMLSEL>
2122 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2123 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2125 (vec_select:<VFMLSEL>
2126 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2127 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2128 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2130 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2131 [(set_attr "type" "neon_fp_mla_s<q>")]
2134 (define_insn "vfmsl_high<mode>_intrinsic"
2135 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2139 (vec_select:<VFMLSEL>
2140 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2141 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2143 (vec_select:<VFMLSEL>
2144 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2145 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2146 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2148 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2149 [(set_attr "type" "neon_fp_mla_s<q>")]
2152 (define_insn "vfmal_high<mode>_intrinsic"
2153 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2156 (vec_select:<VFMLSEL>
2157 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2158 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2160 (vec_select:<VFMLSEL>
2161 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2162 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2163 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2165 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2166 [(set_attr "type" "neon_fp_mla_s<q>")]
2169 (define_insn "vfmsl_low<mode>_intrinsic"
2170 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2174 (vec_select:<VFMLSEL>
2175 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2176 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2178 (vec_select:<VFMLSEL>
2179 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2180 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2181 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2183 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2184 [(set_attr "type" "neon_fp_mla_s<q>")]
2187 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2188 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2190 [(match_operand:VCVTF 1 "s_register_operand")
2192 (match_operand:<VFML> 2 "s_register_operand")
2193 (match_operand:<VFML> 3 "s_register_operand"))
2194 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2197 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2198 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2199 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2200 (operands[0], operands[1],
2201 operands[2], operands[3],
2206 (define_insn "vfmal_lane_low<mode>_intrinsic"
2207 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2210 (vec_select:<VFMLSEL>
2211 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2212 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2214 (vec_duplicate:<VFMLSEL>
2216 (match_operand:<VFML> 3 "s_register_operand" "x")
2217 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2218 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2221 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2222 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2224 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2225 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2229 operands[5] = GEN_INT (lane);
2230 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2233 [(set_attr "type" "neon_fp_mla_s<q>")]
2236 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2237 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2239 [(match_operand:VCVTF 1 "s_register_operand")
2241 (match_operand:<VFML> 2 "s_register_operand")
2242 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2243 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2247 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2248 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2249 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2250 (operands[0], operands[1], operands[2], operands[3],
2255 ;; Used to implement the intrinsics:
2256 ;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2257 ;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2258 ;; Needs a bit of care to get the modes of the different sub-expressions right
2259 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2260 ;; S or D subregister to select the appropriate lane from.
2262 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2263 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2266 (vec_select:<VFMLSEL>
2267 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2268 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2270 (vec_duplicate:<VFMLSEL>
2272 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2273 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2274 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2277 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2278 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2279 int new_lane = lane % elts_per_reg;
2280 int regdiff = lane / elts_per_reg;
2281 operands[5] = GEN_INT (new_lane);
2282 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2283 because we want the print_operand code to print the appropriate
2284 S or D register prefix. */
2285 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2286 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2287 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2289 [(set_attr "type" "neon_fp_mla_s<q>")]
2292 ;; Used to implement the intrinsics:
2293 ;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2294 ;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2295 ;; Needs a bit of care to get the modes of the different sub-expressions right
2296 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2297 ;; S or D subregister to select the appropriate lane from.
2299 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2300 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2303 (vec_select:<VFMLSEL>
2304 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2305 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2307 (vec_duplicate:<VFMLSEL>
2309 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2310 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2311 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2314 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2315 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2316 int new_lane = lane % elts_per_reg;
2317 int regdiff = lane / elts_per_reg;
2318 operands[5] = GEN_INT (new_lane);
2319 /* We re-create operands[3] in the halved VFMLSEL mode
2320 because we've calculated the correct half-width subreg to extract
2321 the lane from and we want to print *that* subreg instead. */
2322 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2323 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2325 [(set_attr "type" "neon_fp_mla_s<q>")]
2328 (define_insn "vfmal_lane_high<mode>_intrinsic"
2329 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2332 (vec_select:<VFMLSEL>
2333 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2334 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2336 (vec_duplicate:<VFMLSEL>
2338 (match_operand:<VFML> 3 "s_register_operand" "x")
2339 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2340 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2343 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2344 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2346 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2347 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2351 operands[5] = GEN_INT (lane);
2352 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2355 [(set_attr "type" "neon_fp_mla_s<q>")]
2358 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2359 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2363 (vec_select:<VFMLSEL>
2364 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2365 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2367 (vec_duplicate:<VFMLSEL>
2369 (match_operand:<VFML> 3 "s_register_operand" "x")
2370 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2371 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2374 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2375 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2377 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2378 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2382 operands[5] = GEN_INT (lane);
2383 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2386 [(set_attr "type" "neon_fp_mla_s<q>")]
2389 ;; Used to implement the intrinsics:
2390 ;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2391 ;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2392 ;; Needs a bit of care to get the modes of the different sub-expressions right
2393 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2394 ;; S or D subregister to select the appropriate lane from.
2396 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2397 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2401 (vec_select:<VFMLSEL>
2402 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2403 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2405 (vec_duplicate:<VFMLSEL>
2407 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2408 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2409 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2412 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2413 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2414 int new_lane = lane % elts_per_reg;
2415 int regdiff = lane / elts_per_reg;
2416 operands[5] = GEN_INT (new_lane);
2417 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2418 because we want the print_operand code to print the appropriate
2419 S or D register prefix. */
2420 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2421 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2422 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2424 [(set_attr "type" "neon_fp_mla_s<q>")]
2427 ;; Used to implement the intrinsics:
2428 ;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2429 ;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2430 ;; Needs a bit of care to get the modes of the different sub-expressions right
2431 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2432 ;; S or D subregister to select the appropriate lane from.
2434 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2435 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2439 (vec_select:<VFMLSEL>
2440 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2441 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2443 (vec_duplicate:<VFMLSEL>
2445 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2446 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2447 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2450 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2451 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2452 int new_lane = lane % elts_per_reg;
2453 int regdiff = lane / elts_per_reg;
2454 operands[5] = GEN_INT (new_lane);
2455 /* We re-create operands[3] in the halved VFMLSEL mode
2456 because we've calculated the correct half-width subreg to extract
2457 the lane from and we want to print *that* subreg instead. */
2458 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2459 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2461 [(set_attr "type" "neon_fp_mla_s<q>")]
2464 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2465 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2469 (vec_select:<VFMLSEL>
2470 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2471 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2473 (vec_duplicate:<VFMLSEL>
2475 (match_operand:<VFML> 3 "s_register_operand" "x")
2476 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2477 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2480 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2481 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2483 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2484 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2488 operands[5] = GEN_INT (lane);
2489 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2492 [(set_attr "type" "neon_fp_mla_s<q>")]
2495 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2497 (define_insn "neon_vmla<mode>_unspec"
2498 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2499 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2500 (match_operand:VDQW 2 "s_register_operand" "w")
2501 (match_operand:VDQW 3 "s_register_operand" "w")]
2504 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2506 (if_then_else (match_test "<Is_float_mode>")
2507 (const_string "neon_fp_mla_s<q>")
2508 (const_string "neon_mla_<V_elem_ch><q>")))]
2511 (define_insn "neon_vmlal<sup><mode>"
2512 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2513 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2514 (match_operand:VW 2 "s_register_operand" "w")
2515 (match_operand:VW 3 "s_register_operand" "w")]
2518 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2519 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2522 (define_expand "neon_vmls<mode>"
2523 [(match_operand:VDQW 0 "s_register_operand")
2524 (match_operand:VDQW 1 "s_register_operand")
2525 (match_operand:VDQW 2 "s_register_operand")
2526 (match_operand:VDQW 3 "s_register_operand")]
2529 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2530 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2531 operands[1], operands[2], operands[3]));
2533 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2534 operands[2], operands[3]));
2538 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2540 (define_insn "neon_vmls<mode>_unspec"
2541 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2542 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2543 (match_operand:VDQW 2 "s_register_operand" "w")
2544 (match_operand:VDQW 3 "s_register_operand" "w")]
2547 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2549 (if_then_else (match_test "<Is_float_mode>")
2550 (const_string "neon_fp_mla_s<q>")
2551 (const_string "neon_mla_<V_elem_ch><q>")))]
2554 (define_insn "neon_vmlsl<sup><mode>"
2555 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2556 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2557 (match_operand:VW 2 "s_register_operand" "w")
2558 (match_operand:VW 3 "s_register_operand" "w")]
2561 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2562 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2565 ;; vqdmulh, vqrdmulh
2566 (define_insn "neon_vq<r>dmulh<mode>"
2567 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2568 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2569 (match_operand:VMDQI 2 "s_register_operand" "w")]
2572 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2573 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2576 ;; vqrdmlah, vqrdmlsh
2577 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2578 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2579 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2580 (match_operand:VMDQI 2 "s_register_operand" "w")
2581 (match_operand:VMDQI 3 "s_register_operand" "w")]
2584 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2585 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2588 (define_insn "neon_vqdmlal<mode>"
2589 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2590 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2591 (match_operand:VMDI 2 "s_register_operand" "w")
2592 (match_operand:VMDI 3 "s_register_operand" "w")]
2595 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2596 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2599 (define_insn "neon_vqdmlsl<mode>"
2600 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2601 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2602 (match_operand:VMDI 2 "s_register_operand" "w")
2603 (match_operand:VMDI 3 "s_register_operand" "w")]
2606 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2607 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2610 (define_insn "neon_vmull<sup><mode>"
2611 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2612 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2613 (match_operand:VW 2 "s_register_operand" "w")]
2616 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2617 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2620 (define_insn "neon_vqdmull<mode>"
2621 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2622 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2623 (match_operand:VMDI 2 "s_register_operand" "w")]
2626 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2627 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2630 (define_expand "neon_vsub<mode>"
2631 [(match_operand:VCVTF 0 "s_register_operand")
2632 (match_operand:VCVTF 1 "s_register_operand")
2633 (match_operand:VCVTF 2 "s_register_operand")]
2636 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2637 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2639 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2644 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2646 (define_insn "neon_vsub<mode>_unspec"
2647 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2648 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2649 (match_operand:VCVTF 2 "s_register_operand" "w")]
2652 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2654 (if_then_else (match_test "<Is_float_mode>")
2655 (const_string "neon_fp_addsub_s<q>")
2656 (const_string "neon_sub<q>")))]
2659 (define_insn "neon_vsubl<sup><mode>"
2660 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2661 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2662 (match_operand:VDI 2 "s_register_operand" "w")]
2665 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2666 [(set_attr "type" "neon_sub_long")]
2669 (define_insn "neon_vsubw<sup><mode>"
2670 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2671 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2672 (match_operand:VDI 2 "s_register_operand" "w")]
2675 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2676 [(set_attr "type" "neon_sub_widen")]
2679 (define_insn "neon_vqsub<sup><mode>"
2680 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2681 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2682 (match_operand:VDQIX 2 "s_register_operand" "w")]
2685 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2686 [(set_attr "type" "neon_qsub<q>")]
2689 (define_insn "neon_vhsub<sup><mode>"
2690 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2691 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2692 (match_operand:VDQIW 2 "s_register_operand" "w")]
2695 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2696 [(set_attr "type" "neon_sub_halve<q>")]
2699 (define_insn "neon_v<r>subhn<mode>"
2700 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2701 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2702 (match_operand:VN 2 "s_register_operand" "w")]
2705 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2706 [(set_attr "type" "neon_sub_halve_narrow_q")]
2709 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2710 ;; without unsafe math optimizations.
2711 (define_expand "neon_vc<cmp_op><mode>"
2712 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2714 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
2715 (match_operand:VDQW 2 "reg_or_zero_operand")))]
2718 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2720 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2721 && !flag_unsafe_math_optimizations)
2723 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2724 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2725 whereas this expander iterates over the integer modes as well,
2726 but we will never expand to UNSPECs for the integer comparisons. */
2730 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2735 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2744 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2751 (define_insn "neon_vc<cmp_op><mode>_insn"
2752 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2754 (COMPARISONS:<V_cmp_result>
2755 (match_operand:VDQW 1 "s_register_operand" "w,w")
2756 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2757 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2758 && !flag_unsafe_math_optimizations)"
2761 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2763 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2764 ? "f" : "<cmp_type>",
2765 which_alternative == 0
2766 ? "%<V_reg>2" : "#0");
2767 output_asm_insn (pattern, operands);
2771 (if_then_else (match_operand 2 "zero_operand")
2772 (const_string "neon_compare_zero<q>")
2773 (const_string "neon_compare<q>")))]
2776 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2777 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2778 (unspec:<V_cmp_result>
2779 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2780 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2785 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2787 which_alternative == 0
2788 ? "%<V_reg>2" : "#0");
2789 output_asm_insn (pattern, operands);
2792 [(set_attr "type" "neon_fp_compare_s<q>")]
2795 (define_expand "neon_vc<cmp_op><mode>"
2796 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2799 (match_operand:VH 1 "s_register_operand")
2800 (match_operand:VH 2 "reg_or_zero_operand")))]
2801 "TARGET_NEON_FP16INST"
2803 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2805 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2806 && !flag_unsafe_math_optimizations)
2808 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2809 (operands[0], operands[1], operands[2]));
2812 (gen_neon_vc<cmp_op><mode>_fp16insn
2813 (operands[0], operands[1], operands[2]));
2817 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
2818 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2820 (COMPARISONS:<V_cmp_result>
2821 (match_operand:VH 1 "s_register_operand" "w,w")
2822 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2823 "TARGET_NEON_FP16INST
2824 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2825 && !flag_unsafe_math_optimizations)"
2828 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2830 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2831 ? "f" : "<cmp_type>",
2832 which_alternative == 0
2833 ? "%<V_reg>2" : "#0");
2834 output_asm_insn (pattern, operands);
2838 (if_then_else (match_operand 2 "zero_operand")
2839 (const_string "neon_compare_zero<q>")
2840 (const_string "neon_compare<q>")))])
2842 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2844 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2845 (unspec:<V_cmp_result>
2846 [(match_operand:VH 1 "s_register_operand" "w,w")
2847 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2849 "TARGET_NEON_FP16INST"
2852 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2854 which_alternative == 0
2855 ? "%<V_reg>2" : "#0");
2856 output_asm_insn (pattern, operands);
2859 [(set_attr "type" "neon_fp_compare_s<q>")])
2861 (define_insn "neon_vc<cmp_op>u<mode>"
2862 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2864 (GTUGEU:<V_cmp_result>
2865 (match_operand:VDQIW 1 "s_register_operand" "w")
2866 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2868 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2869 [(set_attr "type" "neon_compare<q>")]
2872 (define_expand "neon_vca<cmp_op><mode>"
2873 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2875 (GTGE:<V_cmp_result>
2876 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2877 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2880 if (flag_unsafe_math_optimizations)
2881 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2884 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2891 (define_insn "neon_vca<cmp_op><mode>_insn"
2892 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2894 (GTGE:<V_cmp_result>
2895 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2896 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2897 "TARGET_NEON && flag_unsafe_math_optimizations"
2898 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2899 [(set_attr "type" "neon_fp_compare_s<q>")]
2902 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2903 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2904 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2905 (match_operand:VCVTF 2 "s_register_operand" "w")]
2908 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2909 [(set_attr "type" "neon_fp_compare_s<q>")]
2912 (define_expand "neon_vca<cmp_op><mode>"
2914 (match_operand:<V_cmp_result> 0 "s_register_operand")
2916 (GLTE:<V_cmp_result>
2917 (abs:VH (match_operand:VH 1 "s_register_operand"))
2918 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2919 "TARGET_NEON_FP16INST"
2921 if (flag_unsafe_math_optimizations)
2922 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2923 (operands[0], operands[1], operands[2]));
2925 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2926 (operands[0], operands[1], operands[2]));
2930 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
2932 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2934 (GLTE:<V_cmp_result>
2935 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2936 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2937 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2938 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2939 [(set_attr "type" "neon_fp_compare_s<q>")]
2942 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2943 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2944 (unspec:<V_cmp_result>
2945 [(match_operand:VH 1 "s_register_operand" "w")
2946 (match_operand:VH 2 "s_register_operand" "w")]
2949 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2950 [(set_attr "type" "neon_fp_compare_s<q>")]
2953 (define_expand "neon_vc<cmp_op>z<mode>"
2955 (match_operand:<V_cmp_result> 0 "s_register_operand")
2956 (COMPARISONS:<V_cmp_result>
2957 (match_operand:VH 1 "s_register_operand")
2959 "TARGET_NEON_FP16INST"
2961 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2962 CONST0_RTX (<MODE>mode)));
2966 (define_insn "neon_vtst<mode>"
2967 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2968 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2969 (match_operand:VDQIW 2 "s_register_operand" "w")]
2972 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2973 [(set_attr "type" "neon_tst<q>")]
2976 (define_insn "neon_vabd<sup><mode>"
2977 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2978 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2979 (match_operand:VDQIW 2 "s_register_operand" "w")]
2982 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2983 [(set_attr "type" "neon_abd<q>")]
2986 (define_insn "neon_vabd<mode>"
2987 [(set (match_operand:VH 0 "s_register_operand" "=w")
2988 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2989 (match_operand:VH 2 "s_register_operand" "w")]
2991 "TARGET_NEON_FP16INST"
2992 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2993 [(set_attr "type" "neon_abd<q>")]
2996 (define_insn "neon_vabdf<mode>"
2997 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2998 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2999 (match_operand:VCVTF 2 "s_register_operand" "w")]
3002 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3003 [(set_attr "type" "neon_fp_abd_s<q>")]
3006 (define_insn "neon_vabdl<sup><mode>"
3007 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3008 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3009 (match_operand:VW 2 "s_register_operand" "w")]
3012 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3013 [(set_attr "type" "neon_abd_long")]
3016 (define_insn "neon_vaba<sup><mode>"
3017 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3018 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3019 (match_operand:VDQIW 3 "s_register_operand" "w")]
3021 (match_operand:VDQIW 1 "s_register_operand" "0")))]
3023 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3024 [(set_attr "type" "neon_arith_acc<q>")]
3027 (define_insn "neon_vabal<sup><mode>"
3028 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3029 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3030 (match_operand:VW 3 "s_register_operand" "w")]
3032 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3034 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3035 [(set_attr "type" "neon_arith_acc<q>")]
3038 (define_expand "<sup>sadv16qi"
3039 [(use (match_operand:V4SI 0 "register_operand"))
3040 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
3041 (use (match_operand:V16QI 2 "register_operand"))] VABAL)
3042 (use (match_operand:V4SI 3 "register_operand"))]
3045 rtx reduc = gen_reg_rtx (V8HImode);
3046 rtx op1_highpart = gen_reg_rtx (V8QImode);
3047 rtx op2_highpart = gen_reg_rtx (V8QImode);
3049 emit_insn (gen_neon_vabdl<sup>v8qi (reduc,
3050 gen_lowpart (V8QImode, operands[1]),
3051 gen_lowpart (V8QImode, operands[2])));
3053 emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1]));
3054 emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2]));
3055 emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc,
3056 op1_highpart, op2_highpart));
3057 emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc));
3059 emit_move_insn (operands[0], operands[3]);
3064 (define_insn "neon_v<maxmin><sup><mode>"
3065 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3066 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3067 (match_operand:VDQIW 2 "s_register_operand" "w")]
3070 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3071 [(set_attr "type" "neon_minmax<q>")]
3074 (define_insn "neon_v<maxmin>f<mode>"
3075 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3076 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3077 (match_operand:VCVTF 2 "s_register_operand" "w")]
3080 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3081 [(set_attr "type" "neon_fp_minmax_s<q>")]
3084 (define_insn "neon_v<maxmin>f<mode>"
3085 [(set (match_operand:VH 0 "s_register_operand" "=w")
3087 [(match_operand:VH 1 "s_register_operand" "w")
3088 (match_operand:VH 2 "s_register_operand" "w")]
3090 "TARGET_NEON_FP16INST"
3091 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3092 [(set_attr "type" "neon_fp_minmax_s<q>")]
3095 (define_insn "neon_vp<maxmin>fv4hf"
3096 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3098 [(match_operand:V4HF 1 "s_register_operand" "w")
3099 (match_operand:V4HF 2 "s_register_operand" "w")]
3101 "TARGET_NEON_FP16INST"
3102 "vp<maxmin>.f16\t%P0, %P1, %P2"
3103 [(set_attr "type" "neon_reduc_minmax")]
3106 (define_insn "neon_<fmaxmin_op><mode>"
3108 (match_operand:VH 0 "s_register_operand" "=w")
3110 [(match_operand:VH 1 "s_register_operand" "w")
3111 (match_operand:VH 2 "s_register_operand" "w")]
3113 "TARGET_NEON_FP16INST"
3114 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3115 [(set_attr "type" "neon_fp_minmax_s<q>")]
3118 ;; v<maxmin>nm intrinsics.
3119 (define_insn "neon_<fmaxmin_op><mode>"
3120 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3121 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3122 (match_operand:VCVTF 2 "s_register_operand" "w")]
3124 "TARGET_NEON && TARGET_VFP5"
3125 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3126 [(set_attr "type" "neon_fp_minmax_s<q>")]
3129 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
3130 (define_insn "<fmaxmin><mode>3"
3131 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3132 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3133 (match_operand:VCVTF 2 "s_register_operand" "w")]
3135 "TARGET_NEON && TARGET_VFP5"
3136 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3137 [(set_attr "type" "neon_fp_minmax_s<q>")]
3140 (define_expand "neon_vpadd<mode>"
3141 [(match_operand:VD 0 "s_register_operand")
3142 (match_operand:VD 1 "s_register_operand")
3143 (match_operand:VD 2 "s_register_operand")]
3146 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3151 (define_insn "neon_vpaddl<sup><mode>"
3152 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3153 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3156 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3157 [(set_attr "type" "neon_reduc_add_long")]
3160 (define_insn "neon_vpadal<sup><mode>"
3161 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3162 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3163 (match_operand:VDQIW 2 "s_register_operand" "w")]
3166 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3167 [(set_attr "type" "neon_reduc_add_acc")]
3170 (define_insn "neon_vp<maxmin><sup><mode>"
3171 [(set (match_operand:VDI 0 "s_register_operand" "=w")
3172 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3173 (match_operand:VDI 2 "s_register_operand" "w")]
3176 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3177 [(set_attr "type" "neon_reduc_minmax<q>")]
3180 (define_insn "neon_vp<maxmin>f<mode>"
3181 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3182 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3183 (match_operand:VCVTF 2 "s_register_operand" "w")]
3186 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3187 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3190 (define_insn "neon_vrecps<mode>"
3191 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3192 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3193 (match_operand:VCVTF 2 "s_register_operand" "w")]
3196 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3197 [(set_attr "type" "neon_fp_recps_s<q>")]
3200 (define_insn "neon_vrecps<mode>"
3202 (match_operand:VH 0 "s_register_operand" "=w")
3203 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3204 (match_operand:VH 2 "s_register_operand" "w")]
3206 "TARGET_NEON_FP16INST"
3207 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3208 [(set_attr "type" "neon_fp_recps_s<q>")]
3211 (define_insn "neon_vrsqrts<mode>"
3212 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3213 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3214 (match_operand:VCVTF 2 "s_register_operand" "w")]
3217 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3218 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3221 (define_insn "neon_vrsqrts<mode>"
3223 (match_operand:VH 0 "s_register_operand" "=w")
3224 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3225 (match_operand:VH 2 "s_register_operand" "w")]
3227 "TARGET_NEON_FP16INST"
3228 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3229 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3232 (define_expand "neon_vabs<mode>"
3233 [(match_operand:VDQW 0 "s_register_operand")
3234 (match_operand:VDQW 1 "s_register_operand")]
3237 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3241 (define_insn "neon_vqabs<mode>"
3242 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3243 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3246 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3247 [(set_attr "type" "neon_qabs<q>")]
3250 (define_insn "neon_bswap<mode>"
3251 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3252 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3254 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3255 [(set_attr "type" "neon_rev<q>")]
3258 (define_expand "neon_vneg<mode>"
3259 [(match_operand:VDQW 0 "s_register_operand")
3260 (match_operand:VDQW 1 "s_register_operand")]
3263 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3268 ;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
3269 ;; fact that their usage need to guarantee that the source vectors are
3270 ;; contiguous. It would be wrong to describe the operation without being able
3271 ;; to describe the permute that is also required, but even if that is done
3272 ;; the permute would have been created as a LOAD_LANES which means the values
3273 ;; in the registers are in the wrong order.
3274 (define_insn "neon_vcadd<rot><mode>"
3275 [(set (match_operand:VF 0 "register_operand" "=w")
3276 (unspec:VF [(match_operand:VF 1 "register_operand" "w")
3277 (match_operand:VF 2 "register_operand" "w")]
3280 "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
3281 [(set_attr "type" "neon_fcadd")]
3284 (define_insn "neon_vcmla<rot><mode>"
3285 [(set (match_operand:VF 0 "register_operand" "=w")
3286 (plus:VF (match_operand:VF 1 "register_operand" "0")
3287 (unspec:VF [(match_operand:VF 2 "register_operand" "w")
3288 (match_operand:VF 3 "register_operand" "w")]
3291 "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
3292 [(set_attr "type" "neon_fcmla")]
3295 (define_insn "neon_vcmla_lane<rot><mode>"
3296 [(set (match_operand:VF 0 "s_register_operand" "=w")
3297 (plus:VF (match_operand:VF 1 "s_register_operand" "0")
3298 (unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
3299 (match_operand:VF 3 "s_register_operand" "<VF_constraint>")
3300 (match_operand:SI 4 "const_int_operand" "n")]
3304 operands = neon_vcmla_lane_prepare_operands (operands);
3305 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3307 [(set_attr "type" "neon_fcmla")]
3310 (define_insn "neon_vcmla_laneq<rot><mode>"
3311 [(set (match_operand:VDF 0 "s_register_operand" "=w")
3312 (plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
3313 (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
3314 (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
3315 (match_operand:SI 4 "const_int_operand" "n")]
3319 operands = neon_vcmla_lane_prepare_operands (operands);
3320 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3322 [(set_attr "type" "neon_fcmla")]
3325 (define_insn "neon_vcmlaq_lane<rot><mode>"
3326 [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
3327 (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
3328 (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
3329 (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
3330 (match_operand:SI 4 "const_int_operand" "n")]
3334 operands = neon_vcmla_lane_prepare_operands (operands);
3335 return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3337 [(set_attr "type" "neon_fcmla")]
3341 ;; These instructions map to the __builtins for the Dot Product operations.
3342 (define_insn "neon_<sup>dot<vsi2qi>"
3343 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3344 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3345 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3346 "register_operand" "w")
3347 (match_operand:<VSI2QI> 3
3348 "register_operand" "w")]
3351 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3352 [(set_attr "type" "neon_dot<q>")]
3355 ;; These instructions map to the __builtins for the Dot Product
3356 ;; indexed operations.
3357 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3358 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3359 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3360 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3361 "register_operand" "w")
3362 (match_operand:V8QI 3 "register_operand" "t")
3363 (match_operand:SI 4 "immediate_operand" "i")]
3368 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3369 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3371 [(set_attr "type" "neon_dot<q>")]
3374 ;; These expands map to the Dot Product optab the vectorizer checks for.
3375 ;; The auto-vectorizer expects a dot product builtin that also does an
3376 ;; accumulation into the provided register.
3377 ;; Given the following pattern
3379 ;; for (i=0; i<len; i++) {
3385 ;; This can be auto-vectorized to
3386 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3388 ;; given enough iterations. However the vectorizer can keep unrolling the loop
3389 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3390 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3393 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3394 (define_expand "<sup>dot_prod<vsi2qi>"
3395 [(set (match_operand:VCVTI 0 "register_operand")
3396 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3398 (match_operand:<VSI2QI> 2
3399 "register_operand")]
3401 (match_operand:VCVTI 3 "register_operand")))]
3405 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3407 emit_insn (gen_rtx_SET (operands[0], operands[3]));
3411 (define_expand "neon_copysignf<mode>"
3412 [(match_operand:VCVTF 0 "register_operand")
3413 (match_operand:VCVTF 1 "register_operand")
3414 (match_operand:VCVTF 2 "register_operand")]
3418 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3419 rtx c = gen_int_mode (0x80000000, SImode);
3421 emit_move_insn (v_bitmask,
3422 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3423 emit_move_insn (operands[0], operands[2]);
3424 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3425 <VCVTF:V_cmp_result>mode, 0);
3426 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3433 (define_insn "neon_vqneg<mode>"
3434 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3435 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3438 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3439 [(set_attr "type" "neon_qneg<q>")]
3442 (define_insn "neon_vcls<mode>"
3443 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3444 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3447 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3448 [(set_attr "type" "neon_cls<q>")]
3451 (define_insn "clz<mode>2"
3452 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3453 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3455 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3456 [(set_attr "type" "neon_cnt<q>")]
3459 (define_expand "neon_vclz<mode>"
3460 [(match_operand:VDQIW 0 "s_register_operand")
3461 (match_operand:VDQIW 1 "s_register_operand")]
3464 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3468 (define_insn "popcount<mode>2"
3469 [(set (match_operand:VE 0 "s_register_operand" "=w")
3470 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3472 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3473 [(set_attr "type" "neon_cnt<q>")]
3476 (define_expand "neon_vcnt<mode>"
3477 [(match_operand:VE 0 "s_register_operand")
3478 (match_operand:VE 1 "s_register_operand")]
3481 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3485 (define_insn "neon_vrecpe<mode>"
3486 [(set (match_operand:VH 0 "s_register_operand" "=w")
3487 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3489 "TARGET_NEON_FP16INST"
3490 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3491 [(set_attr "type" "neon_fp_recpe_s<q>")]
3494 (define_insn "neon_vrecpe<mode>"
3495 [(set (match_operand:V32 0 "s_register_operand" "=w")
3496 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3499 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3500 [(set_attr "type" "neon_fp_recpe_s<q>")]
3503 (define_insn "neon_vrsqrte<mode>"
3504 [(set (match_operand:V32 0 "s_register_operand" "=w")
3505 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3508 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3509 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3512 (define_expand "neon_vmvn<mode>"
3513 [(match_operand:VDQIW 0 "s_register_operand")
3514 (match_operand:VDQIW 1 "s_register_operand")]
3517 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3521 (define_insn "neon_vget_lane<mode>_sext_internal"
3522 [(set (match_operand:SI 0 "s_register_operand" "=r")
3524 (vec_select:<V_elem>
3525 (match_operand:VD 1 "s_register_operand" "w")
3526 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3529 if (BYTES_BIG_ENDIAN)
3531 int elt = INTVAL (operands[2]);
3532 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3533 operands[2] = GEN_INT (elt);
3535 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3537 [(set_attr "type" "neon_to_gp")]
3540 (define_insn "neon_vget_lane<mode>_zext_internal"
3541 [(set (match_operand:SI 0 "s_register_operand" "=r")
3543 (vec_select:<V_elem>
3544 (match_operand:VD 1 "s_register_operand" "w")
3545 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3548 if (BYTES_BIG_ENDIAN)
3550 int elt = INTVAL (operands[2]);
3551 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3552 operands[2] = GEN_INT (elt);
3554 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3556 [(set_attr "type" "neon_to_gp")]
3559 (define_insn "neon_vget_lane<mode>_sext_internal"
3560 [(set (match_operand:SI 0 "s_register_operand" "=r")
3562 (vec_select:<V_elem>
3563 (match_operand:VQ2 1 "s_register_operand" "w")
3564 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3568 int regno = REGNO (operands[1]);
3569 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3570 unsigned int elt = INTVAL (operands[2]);
3571 unsigned int elt_adj = elt % halfelts;
3573 if (BYTES_BIG_ENDIAN)
3574 elt_adj = halfelts - 1 - elt_adj;
3576 ops[0] = operands[0];
3577 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3578 ops[2] = GEN_INT (elt_adj);
3579 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3583 [(set_attr "type" "neon_to_gp_q")]
3586 (define_insn "neon_vget_lane<mode>_zext_internal"
3587 [(set (match_operand:SI 0 "s_register_operand" "=r")
3589 (vec_select:<V_elem>
3590 (match_operand:VQ2 1 "s_register_operand" "w")
3591 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3595 int regno = REGNO (operands[1]);
3596 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3597 unsigned int elt = INTVAL (operands[2]);
3598 unsigned int elt_adj = elt % halfelts;
3600 if (BYTES_BIG_ENDIAN)
3601 elt_adj = halfelts - 1 - elt_adj;
3603 ops[0] = operands[0];
3604 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3605 ops[2] = GEN_INT (elt_adj);
3606 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3610 [(set_attr "type" "neon_to_gp_q")]
3613 (define_expand "neon_vget_lane<mode>"
3614 [(match_operand:<V_ext> 0 "s_register_operand")
3615 (match_operand:VDQW 1 "s_register_operand")
3616 (match_operand:SI 2 "immediate_operand")]
3619 if (BYTES_BIG_ENDIAN)
3621 /* The intrinsics are defined in terms of a model where the
3622 element ordering in memory is vldm order, whereas the generic
3623 RTL is defined in terms of a model where the element ordering
3624 in memory is array order. Convert the lane number to conform
3626 unsigned int elt = INTVAL (operands[2]);
3627 unsigned int reg_nelts
3628 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3629 elt ^= reg_nelts - 1;
3630 operands[2] = GEN_INT (elt);
3633 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3634 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3637 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3643 (define_expand "neon_vget_laneu<mode>"
3644 [(match_operand:<V_ext> 0 "s_register_operand")
3645 (match_operand:VDQIW 1 "s_register_operand")
3646 (match_operand:SI 2 "immediate_operand")]
3649 if (BYTES_BIG_ENDIAN)
3651 /* The intrinsics are defined in terms of a model where the
3652 element ordering in memory is vldm order, whereas the generic
3653 RTL is defined in terms of a model where the element ordering
3654 in memory is array order. Convert the lane number to conform
3656 unsigned int elt = INTVAL (operands[2]);
3657 unsigned int reg_nelts
3658 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3659 elt ^= reg_nelts - 1;
3660 operands[2] = GEN_INT (elt);
3663 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3664 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3667 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3673 (define_expand "neon_vget_lanedi"
3674 [(match_operand:DI 0 "s_register_operand")
3675 (match_operand:DI 1 "s_register_operand")
3676 (match_operand:SI 2 "immediate_operand")]
3679 emit_move_insn (operands[0], operands[1]);
3683 (define_expand "neon_vget_lanev2di"
3684 [(match_operand:DI 0 "s_register_operand")
3685 (match_operand:V2DI 1 "s_register_operand")
3686 (match_operand:SI 2 "immediate_operand")]
3691 if (BYTES_BIG_ENDIAN)
3693 /* The intrinsics are defined in terms of a model where the
3694 element ordering in memory is vldm order, whereas the generic
3695 RTL is defined in terms of a model where the element ordering
3696 in memory is array order. Convert the lane number to conform
3698 unsigned int elt = INTVAL (operands[2]);
3699 unsigned int reg_nelts = 2;
3700 elt ^= reg_nelts - 1;
3701 operands[2] = GEN_INT (elt);
3704 lane = INTVAL (operands[2]);
3705 gcc_assert ((lane ==0) || (lane == 1));
3706 emit_move_insn (operands[0], lane == 0
3707 ? gen_lowpart (DImode, operands[1])
3708 : gen_highpart (DImode, operands[1]));
3712 (define_expand "neon_vset_lane<mode>"
3713 [(match_operand:VDQ 0 "s_register_operand")
3714 (match_operand:<V_elem> 1 "s_register_operand")
3715 (match_operand:VDQ 2 "s_register_operand")
3716 (match_operand:SI 3 "immediate_operand")]
3719 unsigned int elt = INTVAL (operands[3]);
3721 if (BYTES_BIG_ENDIAN)
3723 unsigned int reg_nelts
3724 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3725 elt ^= reg_nelts - 1;
3728 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3729 GEN_INT (1 << elt), operands[2]));
3733 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3735 (define_expand "neon_vset_lanedi"
3736 [(match_operand:DI 0 "s_register_operand")
3737 (match_operand:DI 1 "s_register_operand")
3738 (match_operand:DI 2 "s_register_operand")
3739 (match_operand:SI 3 "immediate_operand")]
3742 emit_move_insn (operands[0], operands[1]);
3746 (define_expand "neon_vcreate<mode>"
3747 [(match_operand:VD_RE 0 "s_register_operand")
3748 (match_operand:DI 1 "general_operand")]
3751 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3752 emit_move_insn (operands[0], src);
3756 (define_insn "neon_vdup_n<mode>"
3757 [(set (match_operand:VX 0 "s_register_operand" "=w")
3758 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3760 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3761 [(set_attr "type" "neon_from_gp<q>")]
3764 (define_insn "neon_vdup_nv4hf"
3765 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3766 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3769 [(set_attr "type" "neon_from_gp")]
3772 (define_insn "neon_vdup_nv8hf"
3773 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3774 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3777 [(set_attr "type" "neon_from_gp_q")]
3780 (define_insn "neon_vdup_n<mode>"
3781 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3782 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3785 vdup.<V_sz_elem>\t%<V_reg>0, %1
3786 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3787 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3790 (define_expand "neon_vdup_ndi"
3791 [(match_operand:DI 0 "s_register_operand")
3792 (match_operand:DI 1 "s_register_operand")]
3795 emit_move_insn (operands[0], operands[1]);
3800 (define_insn "neon_vdup_nv2di"
3801 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3802 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3805 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3806 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3807 [(set_attr "length" "8")
3808 (set_attr "type" "multiple")]
3811 (define_insn "neon_vdup_lane<mode>_internal"
3812 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3814 (vec_select:<V_elem>
3815 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3816 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3819 if (BYTES_BIG_ENDIAN)
3821 int elt = INTVAL (operands[2]);
3822 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3823 operands[2] = GEN_INT (elt);
3826 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3828 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3830 [(set_attr "type" "neon_dup<q>")]
3833 (define_insn "neon_vdup_lane<mode>_internal"
3834 [(set (match_operand:VH 0 "s_register_operand" "=w")
3836 (vec_select:<V_elem>
3837 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3838 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3839 "TARGET_NEON && TARGET_FP16"
3841 if (BYTES_BIG_ENDIAN)
3843 int elt = INTVAL (operands[2]);
3844 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3845 operands[2] = GEN_INT (elt);
3848 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3850 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3852 [(set_attr "type" "neon_dup<q>")]
3855 (define_expand "neon_vdup_lane<mode>"
3856 [(match_operand:VDQW 0 "s_register_operand")
3857 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3858 (match_operand:SI 2 "immediate_operand")]
3861 if (BYTES_BIG_ENDIAN)
3863 unsigned int elt = INTVAL (operands[2]);
3864 unsigned int reg_nelts
3865 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3866 elt ^= reg_nelts - 1;
3867 operands[2] = GEN_INT (elt);
3869 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3874 (define_expand "neon_vdup_lane<mode>"
3875 [(match_operand:VH 0 "s_register_operand")
3876 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3877 (match_operand:SI 2 "immediate_operand")]
3878 "TARGET_NEON && TARGET_FP16"
3880 if (BYTES_BIG_ENDIAN)
3882 unsigned int elt = INTVAL (operands[2]);
3883 unsigned int reg_nelts
3884 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3885 elt ^= reg_nelts - 1;
3886 operands[2] = GEN_INT (elt);
3888 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3893 ; Scalar index is ignored, since only zero is valid here.
3894 (define_expand "neon_vdup_lanedi"
3895 [(match_operand:DI 0 "s_register_operand")
3896 (match_operand:DI 1 "s_register_operand")
3897 (match_operand:SI 2 "immediate_operand")]
3900 emit_move_insn (operands[0], operands[1]);
3904 ; Likewise for v2di, as the DImode second operand has only a single element.
3905 (define_expand "neon_vdup_lanev2di"
3906 [(match_operand:V2DI 0 "s_register_operand")
3907 (match_operand:DI 1 "s_register_operand")
3908 (match_operand:SI 2 "immediate_operand")]
3911 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3915 ; Disabled before reload because we don't want combine doing something silly,
3916 ; but used by the post-reload expansion of neon_vcombine.
3917 (define_insn "*neon_vswp<mode>"
3918 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3919 (match_operand:VDQX 1 "s_register_operand" "+w"))
3920 (set (match_dup 1) (match_dup 0))]
3921 "TARGET_NEON && reload_completed"
3922 "vswp\t%<V_reg>0, %<V_reg>1"
3923 [(set_attr "type" "neon_permute<q>")]
3926 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3928 ;; FIXME: A different implementation of this builtin could make it much
3929 ;; more likely that we wouldn't actually need to output anything (we could make
3930 ;; it so that the reg allocator puts things in the right places magically
3931 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3933 (define_insn_and_split "neon_vcombine<mode>"
3934 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3935 (vec_concat:<V_DOUBLE>
3936 (match_operand:VDX 1 "s_register_operand" "w")
3937 (match_operand:VDX 2 "s_register_operand" "w")))]
3940 "&& reload_completed"
3943 neon_split_vcombine (operands);
3946 [(set_attr "type" "multiple")]
3949 (define_expand "neon_vget_high<mode>"
3950 [(match_operand:<V_HALF> 0 "s_register_operand")
3951 (match_operand:VQX 1 "s_register_operand")]
3954 emit_move_insn (operands[0],
3955 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3956 GET_MODE_SIZE (<V_HALF>mode)));
3960 (define_expand "neon_vget_low<mode>"
3961 [(match_operand:<V_HALF> 0 "s_register_operand")
3962 (match_operand:VQX 1 "s_register_operand")]
3965 emit_move_insn (operands[0],
3966 simplify_gen_subreg (<V_HALF>mode, operands[1],
3971 (define_insn "float<mode><V_cvtto>2"
3972 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3973 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3974 "TARGET_NEON && !flag_rounding_math"
3975 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3976 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3979 (define_insn "floatuns<mode><V_cvtto>2"
3980 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3981 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3982 "TARGET_NEON && !flag_rounding_math"
3983 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3984 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3987 (define_insn "fix_trunc<mode><V_cvtto>2"
3988 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3989 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3991 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3992 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3995 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3996 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3997 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3999 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4000 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4003 (define_insn "neon_vcvt<sup><mode>"
4004 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4005 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4008 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4009 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4012 (define_insn "neon_vcvt<sup><mode>"
4013 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4014 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4017 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4018 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4021 (define_insn "neon_vcvtv4sfv4hf"
4022 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4023 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4025 "TARGET_NEON && TARGET_FP16"
4026 "vcvt.f32.f16\t%q0, %P1"
4027 [(set_attr "type" "neon_fp_cvt_widen_h")]
4030 (define_insn "neon_vcvtv4hfv4sf"
4031 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4032 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4034 "TARGET_NEON && TARGET_FP16"
4035 "vcvt.f16.f32\t%P0, %q1"
4036 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4039 (define_insn "neon_vcvt<sup><mode>"
4041 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4043 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4045 "TARGET_NEON_FP16INST"
4046 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4047 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4050 (define_insn "neon_vcvt<sup><mode>"
4052 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4054 [(match_operand:VH 1 "s_register_operand" "w")]
4056 "TARGET_NEON_FP16INST"
4057 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4058 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4061 (define_insn "neon_vcvt<sup>_n<mode>"
4062 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4063 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4064 (match_operand:SI 2 "immediate_operand" "i")]
4068 arm_const_bounds (operands[2], 1, 33);
4069 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4071 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4074 (define_insn "neon_vcvt<sup>_n<mode>"
4075 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4077 [(match_operand:VH 1 "s_register_operand" "w")
4078 (match_operand:SI 2 "immediate_operand" "i")]
4080 "TARGET_NEON_FP16INST"
4082 arm_const_bounds (operands[2], 0, 17);
4083 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4085 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4088 (define_insn "neon_vcvt<sup>_n<mode>"
4089 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4090 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4091 (match_operand:SI 2 "immediate_operand" "i")]
4095 arm_const_bounds (operands[2], 1, 33);
4096 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4098 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4101 (define_insn "neon_vcvt<sup>_n<mode>"
4102 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4104 [(match_operand:VCVTHI 1 "s_register_operand" "w")
4105 (match_operand:SI 2 "immediate_operand" "i")]
4107 "TARGET_NEON_FP16INST"
4109 arm_const_bounds (operands[2], 0, 17);
4110 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4112 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4115 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
4117 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4119 [(match_operand:VH 1 "s_register_operand" "w")]
4121 "TARGET_NEON_FP16INST"
4122 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4123 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4126 (define_insn "neon_vmovn<mode>"
4127 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4128 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4131 "vmovn.<V_if_elem>\t%P0, %q1"
4132 [(set_attr "type" "neon_shift_imm_narrow_q")]
4135 (define_insn "neon_vqmovn<sup><mode>"
4136 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4137 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4140 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4141 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4144 (define_insn "neon_vqmovun<mode>"
4145 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4146 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4149 "vqmovun.<V_s_elem>\t%P0, %q1"
4150 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4153 (define_insn "neon_vmovl<sup><mode>"
4154 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4155 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4158 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4159 [(set_attr "type" "neon_shift_imm_long")]
4162 (define_insn "neon_vmul_lane<mode>"
4163 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4164 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4165 (match_operand:VMD 2 "s_register_operand"
4166 "<scalar_mul_constraint>")
4167 (match_operand:SI 3 "immediate_operand" "i")]
4171 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4174 (if_then_else (match_test "<Is_float_mode>")
4175 (const_string "neon_fp_mul_s_scalar<q>")
4176 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4179 (define_insn "neon_vmul_lane<mode>"
4180 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4181 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4182 (match_operand:<V_HALF> 2 "s_register_operand"
4183 "<scalar_mul_constraint>")
4184 (match_operand:SI 3 "immediate_operand" "i")]
4188 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4191 (if_then_else (match_test "<Is_float_mode>")
4192 (const_string "neon_fp_mul_s_scalar<q>")
4193 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4196 (define_insn "neon_vmul_lane<mode>"
4197 [(set (match_operand:VH 0 "s_register_operand" "=w")
4198 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4199 (match_operand:V4HF 2 "s_register_operand"
4200 "<scalar_mul_constraint>")
4201 (match_operand:SI 3 "immediate_operand" "i")]
4203 "TARGET_NEON_FP16INST"
4204 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4205 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4208 (define_insn "neon_vmull<sup>_lane<mode>"
4209 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4210 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4211 (match_operand:VMDI 2 "s_register_operand"
4212 "<scalar_mul_constraint>")
4213 (match_operand:SI 3 "immediate_operand" "i")]
4217 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4219 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4222 (define_insn "neon_vqdmull_lane<mode>"
4223 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4224 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4225 (match_operand:VMDI 2 "s_register_operand"
4226 "<scalar_mul_constraint>")
4227 (match_operand:SI 3 "immediate_operand" "i")]
4228 UNSPEC_VQDMULL_LANE))]
4231 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4233 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4236 (define_insn "neon_vq<r>dmulh_lane<mode>"
4237 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4238 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4239 (match_operand:<V_HALF> 2 "s_register_operand"
4240 "<scalar_mul_constraint>")
4241 (match_operand:SI 3 "immediate_operand" "i")]
4245 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4247 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4250 (define_insn "neon_vq<r>dmulh_lane<mode>"
4251 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4252 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4253 (match_operand:VMDI 2 "s_register_operand"
4254 "<scalar_mul_constraint>")
4255 (match_operand:SI 3 "immediate_operand" "i")]
4259 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4261 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4264 ;; vqrdmlah_lane, vqrdmlsh_lane
4265 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4266 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4267 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4268 (match_operand:VMQI 2 "s_register_operand" "w")
4269 (match_operand:<V_HALF> 3 "s_register_operand"
4270 "<scalar_mul_constraint>")
4271 (match_operand:SI 4 "immediate_operand" "i")]
4276 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4278 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4281 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4282 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4283 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4284 (match_operand:VMDI 2 "s_register_operand" "w")
4285 (match_operand:VMDI 3 "s_register_operand"
4286 "<scalar_mul_constraint>")
4287 (match_operand:SI 4 "immediate_operand" "i")]
4292 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4294 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4297 (define_insn "neon_vmla_lane<mode>"
4298 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4299 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4300 (match_operand:VMD 2 "s_register_operand" "w")
4301 (match_operand:VMD 3 "s_register_operand"
4302 "<scalar_mul_constraint>")
4303 (match_operand:SI 4 "immediate_operand" "i")]
4307 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4310 (if_then_else (match_test "<Is_float_mode>")
4311 (const_string "neon_fp_mla_s_scalar<q>")
4312 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4315 (define_insn "neon_vmla_lane<mode>"
4316 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4317 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4318 (match_operand:VMQ 2 "s_register_operand" "w")
4319 (match_operand:<V_HALF> 3 "s_register_operand"
4320 "<scalar_mul_constraint>")
4321 (match_operand:SI 4 "immediate_operand" "i")]
4325 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4328 (if_then_else (match_test "<Is_float_mode>")
4329 (const_string "neon_fp_mla_s_scalar<q>")
4330 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4333 (define_insn "neon_vmlal<sup>_lane<mode>"
4334 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4335 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4336 (match_operand:VMDI 2 "s_register_operand" "w")
4337 (match_operand:VMDI 3 "s_register_operand"
4338 "<scalar_mul_constraint>")
4339 (match_operand:SI 4 "immediate_operand" "i")]
4343 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4345 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4348 (define_insn "neon_vqdmlal_lane<mode>"
4349 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4350 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4351 (match_operand:VMDI 2 "s_register_operand" "w")
4352 (match_operand:VMDI 3 "s_register_operand"
4353 "<scalar_mul_constraint>")
4354 (match_operand:SI 4 "immediate_operand" "i")]
4355 UNSPEC_VQDMLAL_LANE))]
4358 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4360 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4363 (define_insn "neon_vmls_lane<mode>"
4364 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4365 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4366 (match_operand:VMD 2 "s_register_operand" "w")
4367 (match_operand:VMD 3 "s_register_operand"
4368 "<scalar_mul_constraint>")
4369 (match_operand:SI 4 "immediate_operand" "i")]
4373 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4376 (if_then_else (match_test "<Is_float_mode>")
4377 (const_string "neon_fp_mla_s_scalar<q>")
4378 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4381 (define_insn "neon_vmls_lane<mode>"
4382 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4383 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4384 (match_operand:VMQ 2 "s_register_operand" "w")
4385 (match_operand:<V_HALF> 3 "s_register_operand"
4386 "<scalar_mul_constraint>")
4387 (match_operand:SI 4 "immediate_operand" "i")]
4391 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4394 (if_then_else (match_test "<Is_float_mode>")
4395 (const_string "neon_fp_mla_s_scalar<q>")
4396 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4399 (define_insn "neon_vmlsl<sup>_lane<mode>"
4400 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4401 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4402 (match_operand:VMDI 2 "s_register_operand" "w")
4403 (match_operand:VMDI 3 "s_register_operand"
4404 "<scalar_mul_constraint>")
4405 (match_operand:SI 4 "immediate_operand" "i")]
4409 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4411 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4414 (define_insn "neon_vqdmlsl_lane<mode>"
4415 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4416 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4417 (match_operand:VMDI 2 "s_register_operand" "w")
4418 (match_operand:VMDI 3 "s_register_operand"
4419 "<scalar_mul_constraint>")
4420 (match_operand:SI 4 "immediate_operand" "i")]
4421 UNSPEC_VQDMLSL_LANE))]
4424 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4426 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4429 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4430 ; core register into a temp register, then use a scalar taken from that. This
4431 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4432 ; or extracted from another vector. The latter case it's currently better to
4433 ; use the "_lane" variant, and the former case can probably be implemented
4434 ; using vld1_lane, but that hasn't been done yet.
4436 (define_expand "neon_vmul_n<mode>"
4437 [(match_operand:VMD 0 "s_register_operand")
4438 (match_operand:VMD 1 "s_register_operand")
4439 (match_operand:<V_elem> 2 "s_register_operand")]
4442 rtx tmp = gen_reg_rtx (<MODE>mode);
4443 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4444 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4449 (define_expand "neon_vmul_n<mode>"
4450 [(match_operand:VMQ 0 "s_register_operand")
4451 (match_operand:VMQ 1 "s_register_operand")
4452 (match_operand:<V_elem> 2 "s_register_operand")]
4455 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4456 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4457 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4462 (define_expand "neon_vmul_n<mode>"
4463 [(match_operand:VH 0 "s_register_operand")
4464 (match_operand:VH 1 "s_register_operand")
4465 (match_operand:<V_elem> 2 "s_register_operand")]
4466 "TARGET_NEON_FP16INST"
4468 rtx tmp = gen_reg_rtx (V4HFmode);
4469 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4470 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4475 (define_expand "neon_vmulls_n<mode>"
4476 [(match_operand:<V_widen> 0 "s_register_operand")
4477 (match_operand:VMDI 1 "s_register_operand")
4478 (match_operand:<V_elem> 2 "s_register_operand")]
4481 rtx tmp = gen_reg_rtx (<MODE>mode);
4482 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4483 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4488 (define_expand "neon_vmullu_n<mode>"
4489 [(match_operand:<V_widen> 0 "s_register_operand")
4490 (match_operand:VMDI 1 "s_register_operand")
4491 (match_operand:<V_elem> 2 "s_register_operand")]
4494 rtx tmp = gen_reg_rtx (<MODE>mode);
4495 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4496 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4501 (define_expand "neon_vqdmull_n<mode>"
4502 [(match_operand:<V_widen> 0 "s_register_operand")
4503 (match_operand:VMDI 1 "s_register_operand")
4504 (match_operand:<V_elem> 2 "s_register_operand")]
4507 rtx tmp = gen_reg_rtx (<MODE>mode);
4508 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4509 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4514 (define_expand "neon_vqdmulh_n<mode>"
4515 [(match_operand:VMDI 0 "s_register_operand")
4516 (match_operand:VMDI 1 "s_register_operand")
4517 (match_operand:<V_elem> 2 "s_register_operand")]
4520 rtx tmp = gen_reg_rtx (<MODE>mode);
4521 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4522 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4527 (define_expand "neon_vqrdmulh_n<mode>"
4528 [(match_operand:VMDI 0 "s_register_operand")
4529 (match_operand:VMDI 1 "s_register_operand")
4530 (match_operand:<V_elem> 2 "s_register_operand")]
4533 rtx tmp = gen_reg_rtx (<MODE>mode);
4534 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4535 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4540 (define_expand "neon_vqdmulh_n<mode>"
4541 [(match_operand:VMQI 0 "s_register_operand")
4542 (match_operand:VMQI 1 "s_register_operand")
4543 (match_operand:<V_elem> 2 "s_register_operand")]
4546 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4547 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4548 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4553 (define_expand "neon_vqrdmulh_n<mode>"
4554 [(match_operand:VMQI 0 "s_register_operand")
4555 (match_operand:VMQI 1 "s_register_operand")
4556 (match_operand:<V_elem> 2 "s_register_operand")]
4559 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4560 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4561 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4566 (define_expand "neon_vmla_n<mode>"
4567 [(match_operand:VMD 0 "s_register_operand")
4568 (match_operand:VMD 1 "s_register_operand")
4569 (match_operand:VMD 2 "s_register_operand")
4570 (match_operand:<V_elem> 3 "s_register_operand")]
4573 rtx tmp = gen_reg_rtx (<MODE>mode);
4574 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4575 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4580 (define_expand "neon_vmla_n<mode>"
4581 [(match_operand:VMQ 0 "s_register_operand")
4582 (match_operand:VMQ 1 "s_register_operand")
4583 (match_operand:VMQ 2 "s_register_operand")
4584 (match_operand:<V_elem> 3 "s_register_operand")]
4587 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4588 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4589 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4594 (define_expand "neon_vmlals_n<mode>"
4595 [(match_operand:<V_widen> 0 "s_register_operand")
4596 (match_operand:<V_widen> 1 "s_register_operand")
4597 (match_operand:VMDI 2 "s_register_operand")
4598 (match_operand:<V_elem> 3 "s_register_operand")]
4601 rtx tmp = gen_reg_rtx (<MODE>mode);
4602 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4603 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4608 (define_expand "neon_vmlalu_n<mode>"
4609 [(match_operand:<V_widen> 0 "s_register_operand")
4610 (match_operand:<V_widen> 1 "s_register_operand")
4611 (match_operand:VMDI 2 "s_register_operand")
4612 (match_operand:<V_elem> 3 "s_register_operand")]
4615 rtx tmp = gen_reg_rtx (<MODE>mode);
4616 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4617 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4622 (define_expand "neon_vqdmlal_n<mode>"
4623 [(match_operand:<V_widen> 0 "s_register_operand")
4624 (match_operand:<V_widen> 1 "s_register_operand")
4625 (match_operand:VMDI 2 "s_register_operand")
4626 (match_operand:<V_elem> 3 "s_register_operand")]
4629 rtx tmp = gen_reg_rtx (<MODE>mode);
4630 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4631 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4636 (define_expand "neon_vmls_n<mode>"
4637 [(match_operand:VMD 0 "s_register_operand")
4638 (match_operand:VMD 1 "s_register_operand")
4639 (match_operand:VMD 2 "s_register_operand")
4640 (match_operand:<V_elem> 3 "s_register_operand")]
4643 rtx tmp = gen_reg_rtx (<MODE>mode);
4644 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4645 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4650 (define_expand "neon_vmls_n<mode>"
4651 [(match_operand:VMQ 0 "s_register_operand")
4652 (match_operand:VMQ 1 "s_register_operand")
4653 (match_operand:VMQ 2 "s_register_operand")
4654 (match_operand:<V_elem> 3 "s_register_operand")]
4657 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4658 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4659 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4664 (define_expand "neon_vmlsls_n<mode>"
4665 [(match_operand:<V_widen> 0 "s_register_operand")
4666 (match_operand:<V_widen> 1 "s_register_operand")
4667 (match_operand:VMDI 2 "s_register_operand")
4668 (match_operand:<V_elem> 3 "s_register_operand")]
4671 rtx tmp = gen_reg_rtx (<MODE>mode);
4672 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4673 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4678 (define_expand "neon_vmlslu_n<mode>"
4679 [(match_operand:<V_widen> 0 "s_register_operand")
4680 (match_operand:<V_widen> 1 "s_register_operand")
4681 (match_operand:VMDI 2 "s_register_operand")
4682 (match_operand:<V_elem> 3 "s_register_operand")]
4685 rtx tmp = gen_reg_rtx (<MODE>mode);
4686 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4687 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4692 (define_expand "neon_vqdmlsl_n<mode>"
4693 [(match_operand:<V_widen> 0 "s_register_operand")
4694 (match_operand:<V_widen> 1 "s_register_operand")
4695 (match_operand:VMDI 2 "s_register_operand")
4696 (match_operand:<V_elem> 3 "s_register_operand")]
4699 rtx tmp = gen_reg_rtx (<MODE>mode);
4700 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4701 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4706 (define_insn "@neon_vext<mode>"
4707 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4708 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4709 (match_operand:VDQX 2 "s_register_operand" "w")
4710 (match_operand:SI 3 "immediate_operand" "i")]
4714 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4715 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4717 [(set_attr "type" "neon_ext<q>")]
4720 (define_insn "@neon_vrev64<mode>"
4721 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4722 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4725 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4726 [(set_attr "type" "neon_rev<q>")]
4729 (define_insn "@neon_vrev32<mode>"
4730 [(set (match_operand:VX 0 "s_register_operand" "=w")
4731 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4734 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4735 [(set_attr "type" "neon_rev<q>")]
4738 (define_insn "@neon_vrev16<mode>"
4739 [(set (match_operand:VE 0 "s_register_operand" "=w")
4740 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4743 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4744 [(set_attr "type" "neon_rev<q>")]
4747 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4748 ; allocation. For an intrinsic of form:
4749 ; rD = vbsl_* (rS, rN, rM)
4750 ; We can use any of:
4751 ; vbsl rS, rN, rM (if D = S)
4752 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4753 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4755 (define_insn "neon_vbsl<mode>_internal"
4756 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4757 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4758 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4759 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4763 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4764 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4765 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4766 [(set_attr "type" "neon_bsl<q>")]
4769 (define_expand "neon_vbsl<mode>"
4770 [(set (match_operand:VDQX 0 "s_register_operand")
4771 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
4772 (match_operand:VDQX 2 "s_register_operand")
4773 (match_operand:VDQX 3 "s_register_operand")]
4777 /* We can't alias operands together if they have different modes. */
4778 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4782 (define_insn "neon_v<shift_op><sup><mode>"
4783 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4784 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4785 (match_operand:VDQIX 2 "s_register_operand" "w")]
4788 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4789 [(set_attr "type" "neon_shift_imm<q>")]
4793 (define_insn "neon_v<shift_op><sup><mode>"
4794 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4795 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4796 (match_operand:VDQIX 2 "s_register_operand" "w")]
4799 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4800 [(set_attr "type" "neon_sat_shift_imm<q>")]
4804 (define_insn "neon_v<shift_op><sup>_n<mode>"
4805 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4806 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4807 (match_operand:SI 2 "immediate_operand" "i")]
4811 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4812 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4814 [(set_attr "type" "neon_shift_imm<q>")]
4817 ;; vshrn_n, vrshrn_n
4818 (define_insn "neon_v<shift_op>_n<mode>"
4819 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4820 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4821 (match_operand:SI 2 "immediate_operand" "i")]
4825 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4826 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4828 [(set_attr "type" "neon_shift_imm_narrow_q")]
4831 ;; vqshrn_n, vqrshrn_n
4832 (define_insn "neon_v<shift_op><sup>_n<mode>"
4833 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4834 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4835 (match_operand:SI 2 "immediate_operand" "i")]
4839 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4840 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4842 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4845 ;; vqshrun_n, vqrshrun_n
4846 (define_insn "neon_v<shift_op>_n<mode>"
4847 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4848 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4849 (match_operand:SI 2 "immediate_operand" "i")]
4853 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4854 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4856 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4859 (define_insn "neon_vshl_n<mode>"
4860 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4861 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4862 (match_operand:SI 2 "immediate_operand" "i")]
4866 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4867 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4869 [(set_attr "type" "neon_shift_imm<q>")]
4872 (define_insn "neon_vqshl_<sup>_n<mode>"
4873 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4874 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4875 (match_operand:SI 2 "immediate_operand" "i")]
4879 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4880 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4882 [(set_attr "type" "neon_sat_shift_imm<q>")]
4885 (define_insn "neon_vqshlu_n<mode>"
4886 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4887 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4888 (match_operand:SI 2 "immediate_operand" "i")]
4892 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4893 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4895 [(set_attr "type" "neon_sat_shift_imm<q>")]
4898 (define_insn "neon_vshll<sup>_n<mode>"
4899 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4900 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4901 (match_operand:SI 2 "immediate_operand" "i")]
4905 /* The boundaries are: 0 < imm <= size. */
4906 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4907 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4909 [(set_attr "type" "neon_shift_imm_long")]
4913 (define_insn "neon_v<shift_op><sup>_n<mode>"
4914 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4915 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4916 (match_operand:VDQIX 2 "s_register_operand" "w")
4917 (match_operand:SI 3 "immediate_operand" "i")]
4921 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4922 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4924 [(set_attr "type" "neon_shift_acc<q>")]
4927 (define_insn "neon_vsri_n<mode>"
4928 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4929 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4930 (match_operand:VDQIX 2 "s_register_operand" "w")
4931 (match_operand:SI 3 "immediate_operand" "i")]
4935 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4936 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4938 [(set_attr "type" "neon_shift_reg<q>")]
4941 (define_insn "neon_vsli_n<mode>"
4942 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4943 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4944 (match_operand:VDQIX 2 "s_register_operand" "w")
4945 (match_operand:SI 3 "immediate_operand" "i")]
4949 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4950 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4952 [(set_attr "type" "neon_shift_reg<q>")]
4955 (define_insn "neon_vtbl1v8qi"
4956 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4957 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4958 (match_operand:V8QI 2 "s_register_operand" "w")]
4961 "vtbl.8\t%P0, {%P1}, %P2"
4962 [(set_attr "type" "neon_tbl1")]
4965 (define_insn "neon_vtbl2v8qi"
4966 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4967 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4968 (match_operand:V8QI 2 "s_register_operand" "w")]
4973 int tabbase = REGNO (operands[1]);
4975 ops[0] = operands[0];
4976 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4977 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4978 ops[3] = operands[2];
4979 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4983 [(set_attr "type" "neon_tbl2")]
4986 (define_insn "neon_vtbl3v8qi"
4987 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4988 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4989 (match_operand:V8QI 2 "s_register_operand" "w")]
4994 int tabbase = REGNO (operands[1]);
4996 ops[0] = operands[0];
4997 ops[1] = gen_rtx_REG (V8QImode, tabbase);
4998 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4999 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5000 ops[4] = operands[2];
5001 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5005 [(set_attr "type" "neon_tbl3")]
5008 (define_insn "neon_vtbl4v8qi"
5009 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5010 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5011 (match_operand:V8QI 2 "s_register_operand" "w")]
5016 int tabbase = REGNO (operands[1]);
5018 ops[0] = operands[0];
5019 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5020 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5021 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5022 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5023 ops[5] = operands[2];
5024 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5028 [(set_attr "type" "neon_tbl4")]
5031 ;; These three are used by the vec_perm infrastructure for V16QImode.
5032 (define_insn_and_split "neon_vtbl1v16qi"
5033 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5034 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5035 (match_operand:V16QI 2 "s_register_operand" "w")]
5039 "&& reload_completed"
5042 rtx op0, op1, op2, part0, part2;
5046 op1 = gen_lowpart (TImode, operands[1]);
5049 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5050 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5051 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5052 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5054 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5055 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5056 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5057 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5060 [(set_attr "type" "multiple")]
5063 (define_insn_and_split "neon_vtbl2v16qi"
5064 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5065 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5066 (match_operand:V16QI 2 "s_register_operand" "w")]
5070 "&& reload_completed"
5073 rtx op0, op1, op2, part0, part2;
5080 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5081 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5082 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5083 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5085 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5086 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5087 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5088 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5091 [(set_attr "type" "multiple")]
5094 ;; ??? Logically we should extend the regular neon_vcombine pattern to
5095 ;; handle quad-word input modes, producing octa-word output modes. But
5096 ;; that requires us to add support for octa-word vector modes in moves.
5097 ;; That seems overkill for this one use in vec_perm.
5098 (define_insn_and_split "neon_vcombinev16qi"
5099 [(set (match_operand:OI 0 "s_register_operand" "=w")
5100 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5101 (match_operand:V16QI 2 "s_register_operand" "w")]
5105 "&& reload_completed"
5108 neon_split_vcombine (operands);
5111 [(set_attr "type" "multiple")]
5114 (define_insn "neon_vtbx1v8qi"
5115 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5116 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5117 (match_operand:V8QI 2 "s_register_operand" "w")
5118 (match_operand:V8QI 3 "s_register_operand" "w")]
5121 "vtbx.8\t%P0, {%P2}, %P3"
5122 [(set_attr "type" "neon_tbl1")]
5125 (define_insn "neon_vtbx2v8qi"
5126 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5127 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5128 (match_operand:TI 2 "s_register_operand" "w")
5129 (match_operand:V8QI 3 "s_register_operand" "w")]
5134 int tabbase = REGNO (operands[2]);
5136 ops[0] = operands[0];
5137 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5138 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5139 ops[3] = operands[3];
5140 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5144 [(set_attr "type" "neon_tbl2")]
5147 (define_insn "neon_vtbx3v8qi"
5148 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5149 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5150 (match_operand:EI 2 "s_register_operand" "w")
5151 (match_operand:V8QI 3 "s_register_operand" "w")]
5156 int tabbase = REGNO (operands[2]);
5158 ops[0] = operands[0];
5159 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5160 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5161 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5162 ops[4] = operands[3];
5163 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5167 [(set_attr "type" "neon_tbl3")]
5170 (define_insn "neon_vtbx4v8qi"
5171 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5172 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5173 (match_operand:OI 2 "s_register_operand" "w")
5174 (match_operand:V8QI 3 "s_register_operand" "w")]
5179 int tabbase = REGNO (operands[2]);
5181 ops[0] = operands[0];
5182 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5183 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5184 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5185 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5186 ops[5] = operands[3];
5187 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5191 [(set_attr "type" "neon_tbl4")]
5194 (define_expand "@neon_vtrn<mode>_internal"
5196 [(set (match_operand:VDQWH 0 "s_register_operand")
5197 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5198 (match_operand:VDQWH 2 "s_register_operand")]
5200 (set (match_operand:VDQWH 3 "s_register_operand")
5201 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5206 ;; Note: Different operand numbering to handle tied registers correctly.
5207 (define_insn "*neon_vtrn<mode>_insn"
5208 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5209 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5210 (match_operand:VDQWH 3 "s_register_operand" "2")]
5212 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5213 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5216 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5217 [(set_attr "type" "neon_permute<q>")]
5220 (define_expand "@neon_vzip<mode>_internal"
5222 [(set (match_operand:VDQWH 0 "s_register_operand")
5223 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5224 (match_operand:VDQWH 2 "s_register_operand")]
5226 (set (match_operand:VDQWH 3 "s_register_operand")
5227 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5232 ;; Note: Different operand numbering to handle tied registers correctly.
5233 (define_insn "*neon_vzip<mode>_insn"
5234 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5235 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5236 (match_operand:VDQWH 3 "s_register_operand" "2")]
5238 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5239 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5242 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5243 [(set_attr "type" "neon_zip<q>")]
5246 (define_expand "@neon_vuzp<mode>_internal"
5248 [(set (match_operand:VDQWH 0 "s_register_operand")
5249 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5250 (match_operand:VDQWH 2 "s_register_operand")]
5252 (set (match_operand:VDQWH 3 "s_register_operand")
5253 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5258 ;; Note: Different operand numbering to handle tied registers correctly.
5259 (define_insn "*neon_vuzp<mode>_insn"
5260 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5261 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5262 (match_operand:VDQWH 3 "s_register_operand" "2")]
5264 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5265 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5268 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5269 [(set_attr "type" "neon_zip<q>")]
5272 (define_expand "vec_load_lanes<mode><mode>"
5273 [(set (match_operand:VDQX 0 "s_register_operand")
5274 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5278 (define_insn "neon_vld1<mode>"
5279 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5280 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5283 "vld1.<V_sz_elem>\t%h0, %A1"
5284 [(set_attr "type" "neon_load1_1reg<q>")]
5287 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5288 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5290 (define_insn "neon_vld1_lane<mode>"
5291 [(set (match_operand:VDX 0 "s_register_operand" "=w")
5292 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5293 (match_operand:VDX 2 "s_register_operand" "0")
5294 (match_operand:SI 3 "immediate_operand" "i")]
5298 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5299 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5300 operands[3] = GEN_INT (lane);
5302 return "vld1.<V_sz_elem>\t%P0, %A1";
5304 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5306 [(set_attr "type" "neon_load1_one_lane<q>")]
5309 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5310 ;; here on big endian targets.
5311 (define_insn "neon_vld1_lane<mode>"
5312 [(set (match_operand:VQX 0 "s_register_operand" "=w")
5313 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5314 (match_operand:VQX 2 "s_register_operand" "0")
5315 (match_operand:SI 3 "immediate_operand" "i")]
5319 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5320 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5321 operands[3] = GEN_INT (lane);
5322 int regno = REGNO (operands[0]);
5323 if (lane >= max / 2)
5327 operands[3] = GEN_INT (lane);
5329 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5331 return "vld1.<V_sz_elem>\t%P0, %A1";
5333 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5335 [(set_attr "type" "neon_load1_one_lane<q>")]
5338 (define_insn "neon_vld1_dup<mode>"
5339 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5340 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5342 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5343 [(set_attr "type" "neon_load1_all_lanes<q>")]
5346 ;; Special case for DImode. Treat it exactly like a simple load.
5347 (define_expand "neon_vld1_dupdi"
5348 [(set (match_operand:DI 0 "s_register_operand")
5349 (unspec:DI [(match_operand:DI 1 "neon_struct_operand")]
5355 (define_insn "neon_vld1_dup<mode>"
5356 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5357 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5360 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5362 [(set_attr "type" "neon_load1_all_lanes<q>")]
5365 (define_insn_and_split "neon_vld1_dupv2di"
5366 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5367 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5370 "&& reload_completed"
5373 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5374 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5375 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5378 [(set_attr "length" "8")
5379 (set_attr "type" "neon_load1_all_lanes_q")]
5382 (define_expand "vec_store_lanes<mode><mode>"
5383 [(set (match_operand:VDQX 0 "neon_struct_operand")
5384 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5388 (define_insn "neon_vst1<mode>"
5389 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5390 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5393 "vst1.<V_sz_elem>\t%h1, %A0"
5394 [(set_attr "type" "neon_store1_1reg<q>")])
5396 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5397 ;; here on big endian targets.
5398 (define_insn "neon_vst1_lane<mode>"
5399 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5401 [(match_operand:VDX 1 "s_register_operand" "w")
5402 (match_operand:SI 2 "immediate_operand" "i")]
5406 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5407 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5408 operands[2] = GEN_INT (lane);
5410 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5412 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5414 [(set_attr "type" "neon_store1_one_lane<q>")]
5417 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5418 ;; here on big endian targets.
5419 (define_insn "neon_vst1_lane<mode>"
5420 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5422 [(match_operand:VQX 1 "s_register_operand" "w")
5423 (match_operand:SI 2 "immediate_operand" "i")]
5427 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5428 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5429 int regno = REGNO (operands[1]);
5430 if (lane >= max / 2)
5435 operands[2] = GEN_INT (lane);
5436 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5438 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5440 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5442 [(set_attr "type" "neon_store1_one_lane<q>")]
5445 (define_expand "vec_load_lanesti<mode>"
5446 [(set (match_operand:TI 0 "s_register_operand")
5447 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5448 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5452 (define_insn "neon_vld2<mode>"
5453 [(set (match_operand:TI 0 "s_register_operand" "=w")
5454 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5455 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5459 if (<V_sz_elem> == 64)
5460 return "vld1.64\t%h0, %A1";
5462 return "vld2.<V_sz_elem>\t%h0, %A1";
5465 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5466 (const_string "neon_load1_2reg<q>")
5467 (const_string "neon_load2_2reg<q>")))]
5470 (define_expand "vec_load_lanesoi<mode>"
5471 [(set (match_operand:OI 0 "s_register_operand")
5472 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5473 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5477 (define_insn "neon_vld2<mode>"
5478 [(set (match_operand:OI 0 "s_register_operand" "=w")
5479 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5480 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5483 "vld2.<V_sz_elem>\t%h0, %A1"
5484 [(set_attr "type" "neon_load2_2reg_q")])
5486 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5487 ;; here on big endian targets.
5488 (define_insn "neon_vld2_lane<mode>"
5489 [(set (match_operand:TI 0 "s_register_operand" "=w")
5490 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5491 (match_operand:TI 2 "s_register_operand" "0")
5492 (match_operand:SI 3 "immediate_operand" "i")
5493 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5497 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5498 int regno = REGNO (operands[0]);
5500 ops[0] = gen_rtx_REG (DImode, regno);
5501 ops[1] = gen_rtx_REG (DImode, regno + 2);
5502 ops[2] = operands[1];
5503 ops[3] = GEN_INT (lane);
5504 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5507 [(set_attr "type" "neon_load2_one_lane<q>")]
5510 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5511 ;; here on big endian targets.
5512 (define_insn "neon_vld2_lane<mode>"
5513 [(set (match_operand:OI 0 "s_register_operand" "=w")
5514 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5515 (match_operand:OI 2 "s_register_operand" "0")
5516 (match_operand:SI 3 "immediate_operand" "i")
5517 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5521 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5522 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5523 int regno = REGNO (operands[0]);
5525 if (lane >= max / 2)
5530 ops[0] = gen_rtx_REG (DImode, regno);
5531 ops[1] = gen_rtx_REG (DImode, regno + 4);
5532 ops[2] = operands[1];
5533 ops[3] = GEN_INT (lane);
5534 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5537 [(set_attr "type" "neon_load2_one_lane<q>")]
5540 (define_insn "neon_vld2_dup<mode>"
5541 [(set (match_operand:TI 0 "s_register_operand" "=w")
5542 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5543 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5547 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5548 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5550 return "vld1.<V_sz_elem>\t%h0, %A1";
5553 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5554 (const_string "neon_load2_all_lanes<q>")
5555 (const_string "neon_load1_1reg<q>")))]
5558 (define_expand "vec_store_lanesti<mode>"
5559 [(set (match_operand:TI 0 "neon_struct_operand")
5560 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5561 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5565 (define_insn "neon_vst2<mode>"
5566 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5567 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5568 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5572 if (<V_sz_elem> == 64)
5573 return "vst1.64\t%h1, %A0";
5575 return "vst2.<V_sz_elem>\t%h1, %A0";
5578 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5579 (const_string "neon_store1_2reg<q>")
5580 (const_string "neon_store2_one_lane<q>")))]
5583 (define_expand "vec_store_lanesoi<mode>"
5584 [(set (match_operand:OI 0 "neon_struct_operand")
5585 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5586 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5590 (define_insn "neon_vst2<mode>"
5591 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5592 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5593 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5596 "vst2.<V_sz_elem>\t%h1, %A0"
5597 [(set_attr "type" "neon_store2_4reg<q>")]
5600 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5601 ;; here on big endian targets.
5602 (define_insn "neon_vst2_lane<mode>"
5603 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5604 (unspec:<V_two_elem>
5605 [(match_operand:TI 1 "s_register_operand" "w")
5606 (match_operand:SI 2 "immediate_operand" "i")
5607 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5611 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5612 int regno = REGNO (operands[1]);
5614 ops[0] = operands[0];
5615 ops[1] = gen_rtx_REG (DImode, regno);
5616 ops[2] = gen_rtx_REG (DImode, regno + 2);
5617 ops[3] = GEN_INT (lane);
5618 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5621 [(set_attr "type" "neon_store2_one_lane<q>")]
5624 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5625 ;; here on big endian targets.
5626 (define_insn "neon_vst2_lane<mode>"
5627 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5628 (unspec:<V_two_elem>
5629 [(match_operand:OI 1 "s_register_operand" "w")
5630 (match_operand:SI 2 "immediate_operand" "i")
5631 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5635 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5636 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5637 int regno = REGNO (operands[1]);
5639 if (lane >= max / 2)
5644 ops[0] = operands[0];
5645 ops[1] = gen_rtx_REG (DImode, regno);
5646 ops[2] = gen_rtx_REG (DImode, regno + 4);
5647 ops[3] = GEN_INT (lane);
5648 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5651 [(set_attr "type" "neon_store2_one_lane<q>")]
5654 (define_expand "vec_load_lanesei<mode>"
5655 [(set (match_operand:EI 0 "s_register_operand")
5656 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5657 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5661 (define_insn "neon_vld3<mode>"
5662 [(set (match_operand:EI 0 "s_register_operand" "=w")
5663 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5664 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5668 if (<V_sz_elem> == 64)
5669 return "vld1.64\t%h0, %A1";
5671 return "vld3.<V_sz_elem>\t%h0, %A1";
5674 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5675 (const_string "neon_load1_3reg<q>")
5676 (const_string "neon_load3_3reg<q>")))]
5679 (define_expand "vec_load_lanesci<mode>"
5680 [(match_operand:CI 0 "s_register_operand")
5681 (match_operand:CI 1 "neon_struct_operand")
5682 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5685 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5689 (define_expand "neon_vld3<mode>"
5690 [(match_operand:CI 0 "s_register_operand")
5691 (match_operand:CI 1 "neon_struct_operand")
5692 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5697 mem = adjust_address (operands[1], EImode, 0);
5698 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5699 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5700 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5704 (define_insn "neon_vld3qa<mode>"
5705 [(set (match_operand:CI 0 "s_register_operand" "=w")
5706 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5707 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5711 int regno = REGNO (operands[0]);
5713 ops[0] = gen_rtx_REG (DImode, regno);
5714 ops[1] = gen_rtx_REG (DImode, regno + 4);
5715 ops[2] = gen_rtx_REG (DImode, regno + 8);
5716 ops[3] = operands[1];
5717 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5720 [(set_attr "type" "neon_load3_3reg<q>")]
5723 (define_insn "neon_vld3qb<mode>"
5724 [(set (match_operand:CI 0 "s_register_operand" "=w")
5725 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5726 (match_operand:CI 2 "s_register_operand" "0")
5727 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5731 int regno = REGNO (operands[0]);
5733 ops[0] = gen_rtx_REG (DImode, regno + 2);
5734 ops[1] = gen_rtx_REG (DImode, regno + 6);
5735 ops[2] = gen_rtx_REG (DImode, regno + 10);
5736 ops[3] = operands[1];
5737 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5740 [(set_attr "type" "neon_load3_3reg<q>")]
5743 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5744 ;; here on big endian targets.
5745 (define_insn "neon_vld3_lane<mode>"
5746 [(set (match_operand:EI 0 "s_register_operand" "=w")
5747 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5748 (match_operand:EI 2 "s_register_operand" "0")
5749 (match_operand:SI 3 "immediate_operand" "i")
5750 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5754 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5755 int regno = REGNO (operands[0]);
5757 ops[0] = gen_rtx_REG (DImode, regno);
5758 ops[1] = gen_rtx_REG (DImode, regno + 2);
5759 ops[2] = gen_rtx_REG (DImode, regno + 4);
5760 ops[3] = operands[1];
5761 ops[4] = GEN_INT (lane);
5762 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5766 [(set_attr "type" "neon_load3_one_lane<q>")]
5769 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5770 ;; here on big endian targets.
5771 (define_insn "neon_vld3_lane<mode>"
5772 [(set (match_operand:CI 0 "s_register_operand" "=w")
5773 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5774 (match_operand:CI 2 "s_register_operand" "0")
5775 (match_operand:SI 3 "immediate_operand" "i")
5776 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5780 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5781 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5782 int regno = REGNO (operands[0]);
5784 if (lane >= max / 2)
5789 ops[0] = gen_rtx_REG (DImode, regno);
5790 ops[1] = gen_rtx_REG (DImode, regno + 4);
5791 ops[2] = gen_rtx_REG (DImode, regno + 8);
5792 ops[3] = operands[1];
5793 ops[4] = GEN_INT (lane);
5794 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5798 [(set_attr "type" "neon_load3_one_lane<q>")]
5801 (define_insn "neon_vld3_dup<mode>"
5802 [(set (match_operand:EI 0 "s_register_operand" "=w")
5803 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5804 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5808 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5810 int regno = REGNO (operands[0]);
5812 ops[0] = gen_rtx_REG (DImode, regno);
5813 ops[1] = gen_rtx_REG (DImode, regno + 2);
5814 ops[2] = gen_rtx_REG (DImode, regno + 4);
5815 ops[3] = operands[1];
5816 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5820 return "vld1.<V_sz_elem>\t%h0, %A1";
5823 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5824 (const_string "neon_load3_all_lanes<q>")
5825 (const_string "neon_load1_1reg<q>")))])
5827 (define_expand "vec_store_lanesei<mode>"
5828 [(set (match_operand:EI 0 "neon_struct_operand")
5829 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5830 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5834 (define_insn "neon_vst3<mode>"
5835 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5836 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5837 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5841 if (<V_sz_elem> == 64)
5842 return "vst1.64\t%h1, %A0";
5844 return "vst3.<V_sz_elem>\t%h1, %A0";
5847 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5848 (const_string "neon_store1_3reg<q>")
5849 (const_string "neon_store3_one_lane<q>")))])
5851 (define_expand "vec_store_lanesci<mode>"
5852 [(match_operand:CI 0 "neon_struct_operand")
5853 (match_operand:CI 1 "s_register_operand")
5854 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5857 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5861 (define_expand "neon_vst3<mode>"
5862 [(match_operand:CI 0 "neon_struct_operand")
5863 (match_operand:CI 1 "s_register_operand")
5864 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5869 mem = adjust_address (operands[0], EImode, 0);
5870 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5871 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5872 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5876 (define_insn "neon_vst3qa<mode>"
5877 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5878 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5879 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5883 int regno = REGNO (operands[1]);
5885 ops[0] = operands[0];
5886 ops[1] = gen_rtx_REG (DImode, regno);
5887 ops[2] = gen_rtx_REG (DImode, regno + 4);
5888 ops[3] = gen_rtx_REG (DImode, regno + 8);
5889 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5892 [(set_attr "type" "neon_store3_3reg<q>")]
5895 (define_insn "neon_vst3qb<mode>"
5896 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5897 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5898 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5902 int regno = REGNO (operands[1]);
5904 ops[0] = operands[0];
5905 ops[1] = gen_rtx_REG (DImode, regno + 2);
5906 ops[2] = gen_rtx_REG (DImode, regno + 6);
5907 ops[3] = gen_rtx_REG (DImode, regno + 10);
5908 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5911 [(set_attr "type" "neon_store3_3reg<q>")]
5914 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5915 ;; here on big endian targets.
5916 (define_insn "neon_vst3_lane<mode>"
5917 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5918 (unspec:<V_three_elem>
5919 [(match_operand:EI 1 "s_register_operand" "w")
5920 (match_operand:SI 2 "immediate_operand" "i")
5921 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5925 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5926 int regno = REGNO (operands[1]);
5928 ops[0] = operands[0];
5929 ops[1] = gen_rtx_REG (DImode, regno);
5930 ops[2] = gen_rtx_REG (DImode, regno + 2);
5931 ops[3] = gen_rtx_REG (DImode, regno + 4);
5932 ops[4] = GEN_INT (lane);
5933 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5937 [(set_attr "type" "neon_store3_one_lane<q>")]
5940 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5941 ;; here on big endian targets.
5942 (define_insn "neon_vst3_lane<mode>"
5943 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5944 (unspec:<V_three_elem>
5945 [(match_operand:CI 1 "s_register_operand" "w")
5946 (match_operand:SI 2 "immediate_operand" "i")
5947 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5951 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5952 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5953 int regno = REGNO (operands[1]);
5955 if (lane >= max / 2)
5960 ops[0] = operands[0];
5961 ops[1] = gen_rtx_REG (DImode, regno);
5962 ops[2] = gen_rtx_REG (DImode, regno + 4);
5963 ops[3] = gen_rtx_REG (DImode, regno + 8);
5964 ops[4] = GEN_INT (lane);
5965 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5969 [(set_attr "type" "neon_store3_one_lane<q>")]
5972 (define_expand "vec_load_lanesoi<mode>"
5973 [(set (match_operand:OI 0 "s_register_operand")
5974 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5975 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5979 (define_insn "neon_vld4<mode>"
5980 [(set (match_operand:OI 0 "s_register_operand" "=w")
5981 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5982 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5986 if (<V_sz_elem> == 64)
5987 return "vld1.64\t%h0, %A1";
5989 return "vld4.<V_sz_elem>\t%h0, %A1";
5992 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5993 (const_string "neon_load1_4reg<q>")
5994 (const_string "neon_load4_4reg<q>")))]
5997 (define_expand "vec_load_lanesxi<mode>"
5998 [(match_operand:XI 0 "s_register_operand")
5999 (match_operand:XI 1 "neon_struct_operand")
6000 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6003 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6007 (define_expand "neon_vld4<mode>"
6008 [(match_operand:XI 0 "s_register_operand")
6009 (match_operand:XI 1 "neon_struct_operand")
6010 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6015 mem = adjust_address (operands[1], OImode, 0);
6016 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6017 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6018 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6022 (define_insn "neon_vld4qa<mode>"
6023 [(set (match_operand:XI 0 "s_register_operand" "=w")
6024 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6025 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6029 int regno = REGNO (operands[0]);
6031 ops[0] = gen_rtx_REG (DImode, regno);
6032 ops[1] = gen_rtx_REG (DImode, regno + 4);
6033 ops[2] = gen_rtx_REG (DImode, regno + 8);
6034 ops[3] = gen_rtx_REG (DImode, regno + 12);
6035 ops[4] = operands[1];
6036 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6039 [(set_attr "type" "neon_load4_4reg<q>")]
6042 (define_insn "neon_vld4qb<mode>"
6043 [(set (match_operand:XI 0 "s_register_operand" "=w")
6044 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6045 (match_operand:XI 2 "s_register_operand" "0")
6046 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6050 int regno = REGNO (operands[0]);
6052 ops[0] = gen_rtx_REG (DImode, regno + 2);
6053 ops[1] = gen_rtx_REG (DImode, regno + 6);
6054 ops[2] = gen_rtx_REG (DImode, regno + 10);
6055 ops[3] = gen_rtx_REG (DImode, regno + 14);
6056 ops[4] = operands[1];
6057 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6060 [(set_attr "type" "neon_load4_4reg<q>")]
6063 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6064 ;; here on big endian targets.
6065 (define_insn "neon_vld4_lane<mode>"
6066 [(set (match_operand:OI 0 "s_register_operand" "=w")
6067 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6068 (match_operand:OI 2 "s_register_operand" "0")
6069 (match_operand:SI 3 "immediate_operand" "i")
6070 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6074 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6075 int regno = REGNO (operands[0]);
6077 ops[0] = gen_rtx_REG (DImode, regno);
6078 ops[1] = gen_rtx_REG (DImode, regno + 2);
6079 ops[2] = gen_rtx_REG (DImode, regno + 4);
6080 ops[3] = gen_rtx_REG (DImode, regno + 6);
6081 ops[4] = operands[1];
6082 ops[5] = GEN_INT (lane);
6083 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6087 [(set_attr "type" "neon_load4_one_lane<q>")]
6090 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6091 ;; here on big endian targets.
6092 (define_insn "neon_vld4_lane<mode>"
6093 [(set (match_operand:XI 0 "s_register_operand" "=w")
6094 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6095 (match_operand:XI 2 "s_register_operand" "0")
6096 (match_operand:SI 3 "immediate_operand" "i")
6097 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6101 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6102 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6103 int regno = REGNO (operands[0]);
6105 if (lane >= max / 2)
6110 ops[0] = gen_rtx_REG (DImode, regno);
6111 ops[1] = gen_rtx_REG (DImode, regno + 4);
6112 ops[2] = gen_rtx_REG (DImode, regno + 8);
6113 ops[3] = gen_rtx_REG (DImode, regno + 12);
6114 ops[4] = operands[1];
6115 ops[5] = GEN_INT (lane);
6116 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6120 [(set_attr "type" "neon_load4_one_lane<q>")]
6123 (define_insn "neon_vld4_dup<mode>"
6124 [(set (match_operand:OI 0 "s_register_operand" "=w")
6125 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6126 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6130 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6132 int regno = REGNO (operands[0]);
6134 ops[0] = gen_rtx_REG (DImode, regno);
6135 ops[1] = gen_rtx_REG (DImode, regno + 2);
6136 ops[2] = gen_rtx_REG (DImode, regno + 4);
6137 ops[3] = gen_rtx_REG (DImode, regno + 6);
6138 ops[4] = operands[1];
6139 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6144 return "vld1.<V_sz_elem>\t%h0, %A1";
6147 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6148 (const_string "neon_load4_all_lanes<q>")
6149 (const_string "neon_load1_1reg<q>")))]
6152 (define_expand "vec_store_lanesoi<mode>"
6153 [(set (match_operand:OI 0 "neon_struct_operand")
6154 (unspec:OI [(match_operand:OI 1 "s_register_operand")
6155 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6159 (define_insn "neon_vst4<mode>"
6160 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6161 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6162 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6166 if (<V_sz_elem> == 64)
6167 return "vst1.64\t%h1, %A0";
6169 return "vst4.<V_sz_elem>\t%h1, %A0";
6172 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6173 (const_string "neon_store1_4reg<q>")
6174 (const_string "neon_store4_4reg<q>")))]
6177 (define_expand "vec_store_lanesxi<mode>"
6178 [(match_operand:XI 0 "neon_struct_operand")
6179 (match_operand:XI 1 "s_register_operand")
6180 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6183 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6187 (define_expand "neon_vst4<mode>"
6188 [(match_operand:XI 0 "neon_struct_operand")
6189 (match_operand:XI 1 "s_register_operand")
6190 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6195 mem = adjust_address (operands[0], OImode, 0);
6196 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6197 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6198 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6202 (define_insn "neon_vst4qa<mode>"
6203 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6204 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6205 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6209 int regno = REGNO (operands[1]);
6211 ops[0] = operands[0];
6212 ops[1] = gen_rtx_REG (DImode, regno);
6213 ops[2] = gen_rtx_REG (DImode, regno + 4);
6214 ops[3] = gen_rtx_REG (DImode, regno + 8);
6215 ops[4] = gen_rtx_REG (DImode, regno + 12);
6216 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6219 [(set_attr "type" "neon_store4_4reg<q>")]
6222 (define_insn "neon_vst4qb<mode>"
6223 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6224 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6225 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6229 int regno = REGNO (operands[1]);
6231 ops[0] = operands[0];
6232 ops[1] = gen_rtx_REG (DImode, regno + 2);
6233 ops[2] = gen_rtx_REG (DImode, regno + 6);
6234 ops[3] = gen_rtx_REG (DImode, regno + 10);
6235 ops[4] = gen_rtx_REG (DImode, regno + 14);
6236 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6239 [(set_attr "type" "neon_store4_4reg<q>")]
6242 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6243 ;; here on big endian targets.
6244 (define_insn "neon_vst4_lane<mode>"
6245 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6246 (unspec:<V_four_elem>
6247 [(match_operand:OI 1 "s_register_operand" "w")
6248 (match_operand:SI 2 "immediate_operand" "i")
6249 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6253 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6254 int regno = REGNO (operands[1]);
6256 ops[0] = operands[0];
6257 ops[1] = gen_rtx_REG (DImode, regno);
6258 ops[2] = gen_rtx_REG (DImode, regno + 2);
6259 ops[3] = gen_rtx_REG (DImode, regno + 4);
6260 ops[4] = gen_rtx_REG (DImode, regno + 6);
6261 ops[5] = GEN_INT (lane);
6262 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6266 [(set_attr "type" "neon_store4_one_lane<q>")]
6269 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6270 ;; here on big endian targets.
6271 (define_insn "neon_vst4_lane<mode>"
6272 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6273 (unspec:<V_four_elem>
6274 [(match_operand:XI 1 "s_register_operand" "w")
6275 (match_operand:SI 2 "immediate_operand" "i")
6276 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6280 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6281 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6282 int regno = REGNO (operands[1]);
6284 if (lane >= max / 2)
6289 ops[0] = operands[0];
6290 ops[1] = gen_rtx_REG (DImode, regno);
6291 ops[2] = gen_rtx_REG (DImode, regno + 4);
6292 ops[3] = gen_rtx_REG (DImode, regno + 8);
6293 ops[4] = gen_rtx_REG (DImode, regno + 12);
6294 ops[5] = GEN_INT (lane);
6295 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6299 [(set_attr "type" "neon_store4_4reg<q>")]
6302 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6303 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6304 (SE:<V_unpack> (vec_select:<V_HALF>
6305 (match_operand:VU 1 "register_operand" "w")
6306 (match_operand:VU 2 "vect_par_constant_low" ""))))]
6307 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6308 "vmovl.<US><V_sz_elem> %q0, %e1"
6309 [(set_attr "type" "neon_shift_imm_long")]
6312 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6313 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6314 (SE:<V_unpack> (vec_select:<V_HALF>
6315 (match_operand:VU 1 "register_operand" "w")
6316 (match_operand:VU 2 "vect_par_constant_high" ""))))]
6317 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6318 "vmovl.<US><V_sz_elem> %q0, %f1"
6319 [(set_attr "type" "neon_shift_imm_long")]
6322 (define_expand "vec_unpack<US>_hi_<mode>"
6323 [(match_operand:<V_unpack> 0 "register_operand")
6324 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6325 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6327 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6330 for (i = 0; i < (<V_mode_nunits>/2); i++)
6331 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6333 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6334 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6341 (define_expand "vec_unpack<US>_lo_<mode>"
6342 [(match_operand:<V_unpack> 0 "register_operand")
6343 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6344 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6346 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6349 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6350 RTVEC_ELT (v, i) = GEN_INT (i);
6351 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6352 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6359 (define_insn "neon_vec_<US>mult_lo_<mode>"
6360 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6361 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6362 (match_operand:VU 1 "register_operand" "w")
6363 (match_operand:VU 2 "vect_par_constant_low" "")))
6364 (SE:<V_unpack> (vec_select:<V_HALF>
6365 (match_operand:VU 3 "register_operand" "w")
6367 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6368 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6369 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6372 (define_expand "vec_widen_<US>mult_lo_<mode>"
6373 [(match_operand:<V_unpack> 0 "register_operand")
6374 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6375 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6376 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6378 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6381 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6382 RTVEC_ELT (v, i) = GEN_INT (i);
6383 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6385 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6393 (define_insn "neon_vec_<US>mult_hi_<mode>"
6394 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6395 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6396 (match_operand:VU 1 "register_operand" "w")
6397 (match_operand:VU 2 "vect_par_constant_high" "")))
6398 (SE:<V_unpack> (vec_select:<V_HALF>
6399 (match_operand:VU 3 "register_operand" "w")
6401 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6402 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6403 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6406 (define_expand "vec_widen_<US>mult_hi_<mode>"
6407 [(match_operand:<V_unpack> 0 "register_operand")
6408 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6409 (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6410 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6412 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6415 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6416 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6417 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6419 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6428 (define_insn "neon_vec_<US>shiftl_<mode>"
6429 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6430 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6431 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6434 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6436 [(set_attr "type" "neon_shift_imm_long")]
6439 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6440 [(match_operand:<V_unpack> 0 "register_operand")
6441 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6442 (match_operand:SI 2 "immediate_operand")]
6443 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6445 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6446 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6452 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6453 [(match_operand:<V_unpack> 0 "register_operand")
6454 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6455 (match_operand:SI 2 "immediate_operand")]
6456 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6458 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6459 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6460 GET_MODE_SIZE (<V_HALF>mode)),
6466 ;; Vectorize for non-neon-quad case
6467 (define_insn "neon_unpack<US>_<mode>"
6468 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6469 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6471 "vmovl.<US><V_sz_elem> %q0, %P1"
6472 [(set_attr "type" "neon_move")]
6475 (define_expand "vec_unpack<US>_lo_<mode>"
6476 [(match_operand:<V_double_width> 0 "register_operand")
6477 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6480 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6481 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6482 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6488 (define_expand "vec_unpack<US>_hi_<mode>"
6489 [(match_operand:<V_double_width> 0 "register_operand")
6490 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6493 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6494 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6495 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6501 (define_insn "neon_vec_<US>mult_<mode>"
6502 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6503 (mult:<V_widen> (SE:<V_widen>
6504 (match_operand:VDI 1 "register_operand" "w"))
6506 (match_operand:VDI 2 "register_operand" "w"))))]
6508 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6509 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6512 (define_expand "vec_widen_<US>mult_hi_<mode>"
6513 [(match_operand:<V_double_width> 0 "register_operand")
6514 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6515 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6518 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6519 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6520 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6527 (define_expand "vec_widen_<US>mult_lo_<mode>"
6528 [(match_operand:<V_double_width> 0 "register_operand")
6529 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6530 (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6533 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6534 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6535 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6542 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6543 [(match_operand:<V_double_width> 0 "register_operand")
6544 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6545 (match_operand:SI 2 "immediate_operand")]
6548 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6549 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6550 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6556 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6557 [(match_operand:<V_double_width> 0 "register_operand")
6558 (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6559 (match_operand:SI 2 "immediate_operand")]
6562 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6563 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6564 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6570 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6571 ; because the ordering of vector elements in Q registers is different from what
6572 ; the semantics of the instructions require.
6574 (define_insn "vec_pack_trunc_<mode>"
6575 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6576 (vec_concat:<V_narrow_pack>
6577 (truncate:<V_narrow>
6578 (match_operand:VN 1 "register_operand" "w"))
6579 (truncate:<V_narrow>
6580 (match_operand:VN 2 "register_operand" "w"))))]
6581 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6582 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6583 [(set_attr "type" "multiple")
6584 (set_attr "length" "8")]
6587 ;; For the non-quad case.
6588 (define_insn "neon_vec_pack_trunc_<mode>"
6589 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6590 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6591 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6592 "vmovn.i<V_sz_elem>\t%P0, %q1"
6593 [(set_attr "type" "neon_move_narrow_q")]
6596 (define_expand "vec_pack_trunc_<mode>"
6597 [(match_operand:<V_narrow_pack> 0 "register_operand")
6598 (match_operand:VSHFT 1 "register_operand")
6599 (match_operand:VSHFT 2 "register_operand")]
6600 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6602 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6604 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6605 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6606 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6610 (define_insn "neon_vabd<mode>_2"
6611 [(set (match_operand:VF 0 "s_register_operand" "=w")
6612 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6613 (match_operand:VF 2 "s_register_operand" "w"))))]
6614 "TARGET_NEON && flag_unsafe_math_optimizations"
6615 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6616 [(set_attr "type" "neon_fp_abd_s<q>")]
6619 (define_insn "neon_vabd<mode>_3"
6620 [(set (match_operand:VF 0 "s_register_operand" "=w")
6621 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6622 (match_operand:VF 2 "s_register_operand" "w")]
6624 "TARGET_NEON && flag_unsafe_math_optimizations"
6625 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6626 [(set_attr "type" "neon_fp_abd_s<q>")]