1 ;; ARM NEON coprocessor Machine Description
2 ;; Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 ;; Written by CodeSourcery.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
23 ;; type attribute definitions.
24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
26 (define_insn "*neon_mov<mode>"
27 [(set (match_operand:VDX 0 "nonimmediate_operand"
28 "=w,Un,w, w, ?r,?w,?r, ?Us")
29 (match_operand:VDX 1 "general_operand"
30 " w,w, Dn,Uni, w, r, Usi,r"))]
32 && (register_operand (operands[0], <MODE>mode)
33 || register_operand (operands[1], <MODE>mode))"
35 if (which_alternative == 2)
38 static char templ[40];
40 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
41 &operands[1], &width);
43 gcc_assert (is_valid != 0);
46 return "vmov.f32\t%P0, %1 @ <mode>";
48 sprintf (templ, "vmov.i%d\t%%P0, %%x1 @ <mode>", width);
53 switch (which_alternative)
55 case 0: return "vmov\t%P0, %P1 @ <mode>";
56 case 1: case 3: return output_move_neon (operands);
57 case 2: gcc_unreachable ();
58 case 4: return "vmov\t%Q0, %R0, %P1 @ <mode>";
59 case 5: return "vmov\t%P0, %Q1, %R1 @ <mode>";
60 default: return output_move_double (operands, true, NULL);
63 [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
64 neon_load1_1reg, neon_to_gp<q>,neon_from_gp<q>,\
65 neon_load1_2reg, neon_store1_2reg")
66 (set_attr "length" "4,4,4,4,4,4,8,8")
67 (set_attr "arm_pool_range" "*,*,*,1020,*,*,1020,*")
68 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,1018,*")
69 (set_attr "neg_pool_range" "*,*,*,1004,*,*,1004,*")])
71 (define_insn "*neon_mov<mode>"
72 [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
73 "=w,Un,w, w, ?r,?w,?r,?r, ?Us")
74 (match_operand:VQXMOV 1 "general_operand"
75 " w,w, Dn,Uni, w, r, r, Usi, r"))]
77 && (register_operand (operands[0], <MODE>mode)
78 || register_operand (operands[1], <MODE>mode))"
80 if (which_alternative == 2)
83 static char templ[40];
85 is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
86 &operands[1], &width);
88 gcc_assert (is_valid != 0);
91 return "vmov.f32\t%q0, %1 @ <mode>";
93 sprintf (templ, "vmov.i%d\t%%q0, %%1 @ <mode>", width);
98 switch (which_alternative)
100 case 0: return "vmov\t%q0, %q1 @ <mode>";
101 case 1: case 3: return output_move_neon (operands);
102 case 2: gcc_unreachable ();
103 case 4: return "vmov\t%Q0, %R0, %e1 @ <mode>\;vmov\t%J0, %K0, %f1";
104 case 5: return "vmov\t%e0, %Q1, %R1 @ <mode>\;vmov\t%f0, %J1, %K1";
105 default: return output_move_quad (operands);
108 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
109 neon_load2_2reg_q,neon_to_gp_q,neon_from_gp_q,\
110 mov_reg,neon_load1_4reg,neon_store1_4reg")
111 (set_attr "length" "4,8,4,8,8,8,16,8,16")
112 (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,1020,*")
113 (set_attr "thumb2_pool_range" "*,*,*,1018,*,*,*,1018,*")
114 (set_attr "neg_pool_range" "*,*,*,996,*,*,*,996,*")])
116 /* We define these mov expanders to match the standard mov$a optab to prevent
117 the mid-end from trying to do a subreg for these modes which is the most
118 inefficient way to expand the move. Also big-endian subreg's aren't
119 allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
120 Without these RTL generation patterns the mid-end would attempt to take a
121 sub-reg and may ICE if it can't. */
123 (define_expand "movti"
124 [(set (match_operand:TI 0 "nonimmediate_operand" "")
125 (match_operand:TI 1 "general_operand" ""))]
128 if (can_create_pseudo_p ())
130 if (!REG_P (operands[0]))
131 operands[1] = force_reg (TImode, operands[1]);
135 (define_expand "mov<mode>"
136 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "")
137 (match_operand:VSTRUCT 1 "general_operand" ""))]
140 if (can_create_pseudo_p ())
142 if (!REG_P (operands[0]))
143 operands[1] = force_reg (<MODE>mode, operands[1]);
147 (define_expand "mov<mode>"
148 [(set (match_operand:VH 0 "s_register_operand")
149 (match_operand:VH 1 "s_register_operand"))]
152 if (can_create_pseudo_p ())
154 if (!REG_P (operands[0]))
155 operands[1] = force_reg (<MODE>mode, operands[1]);
159 (define_insn "*neon_mov<mode>"
160 [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
161 (match_operand:VSTRUCT 1 "general_operand" " w,w, Ut"))]
163 && (register_operand (operands[0], <MODE>mode)
164 || register_operand (operands[1], <MODE>mode))"
166 switch (which_alternative)
169 case 1: case 2: return output_move_neon (operands);
170 default: gcc_unreachable ();
173 [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
174 (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
177 [(set (match_operand:EI 0 "s_register_operand" "")
178 (match_operand:EI 1 "s_register_operand" ""))]
179 "TARGET_NEON && reload_completed"
180 [(set (match_dup 0) (match_dup 1))
181 (set (match_dup 2) (match_dup 3))]
183 int rdest = REGNO (operands[0]);
184 int rsrc = REGNO (operands[1]);
187 dest[0] = gen_rtx_REG (TImode, rdest);
188 src[0] = gen_rtx_REG (TImode, rsrc);
189 dest[1] = gen_rtx_REG (DImode, rdest + 4);
190 src[1] = gen_rtx_REG (DImode, rsrc + 4);
192 neon_disambiguate_copy (operands, dest, src, 2);
196 [(set (match_operand:OI 0 "s_register_operand" "")
197 (match_operand:OI 1 "s_register_operand" ""))]
198 "TARGET_NEON && reload_completed"
199 [(set (match_dup 0) (match_dup 1))
200 (set (match_dup 2) (match_dup 3))]
202 int rdest = REGNO (operands[0]);
203 int rsrc = REGNO (operands[1]);
206 dest[0] = gen_rtx_REG (TImode, rdest);
207 src[0] = gen_rtx_REG (TImode, rsrc);
208 dest[1] = gen_rtx_REG (TImode, rdest + 4);
209 src[1] = gen_rtx_REG (TImode, rsrc + 4);
211 neon_disambiguate_copy (operands, dest, src, 2);
215 [(set (match_operand:CI 0 "s_register_operand" "")
216 (match_operand:CI 1 "s_register_operand" ""))]
217 "TARGET_NEON && reload_completed"
218 [(set (match_dup 0) (match_dup 1))
219 (set (match_dup 2) (match_dup 3))
220 (set (match_dup 4) (match_dup 5))]
222 int rdest = REGNO (operands[0]);
223 int rsrc = REGNO (operands[1]);
226 dest[0] = gen_rtx_REG (TImode, rdest);
227 src[0] = gen_rtx_REG (TImode, rsrc);
228 dest[1] = gen_rtx_REG (TImode, rdest + 4);
229 src[1] = gen_rtx_REG (TImode, rsrc + 4);
230 dest[2] = gen_rtx_REG (TImode, rdest + 8);
231 src[2] = gen_rtx_REG (TImode, rsrc + 8);
233 neon_disambiguate_copy (operands, dest, src, 3);
237 [(set (match_operand:XI 0 "s_register_operand" "")
238 (match_operand:XI 1 "s_register_operand" ""))]
239 "TARGET_NEON && reload_completed"
240 [(set (match_dup 0) (match_dup 1))
241 (set (match_dup 2) (match_dup 3))
242 (set (match_dup 4) (match_dup 5))
243 (set (match_dup 6) (match_dup 7))]
245 int rdest = REGNO (operands[0]);
246 int rsrc = REGNO (operands[1]);
249 dest[0] = gen_rtx_REG (TImode, rdest);
250 src[0] = gen_rtx_REG (TImode, rsrc);
251 dest[1] = gen_rtx_REG (TImode, rdest + 4);
252 src[1] = gen_rtx_REG (TImode, rsrc + 4);
253 dest[2] = gen_rtx_REG (TImode, rdest + 8);
254 src[2] = gen_rtx_REG (TImode, rsrc + 8);
255 dest[3] = gen_rtx_REG (TImode, rdest + 12);
256 src[3] = gen_rtx_REG (TImode, rsrc + 12);
258 neon_disambiguate_copy (operands, dest, src, 4);
261 (define_expand "movmisalign<mode>"
262 [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
263 (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
264 UNSPEC_MISALIGNED_ACCESS))]
265 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
268 /* This pattern is not permitted to fail during expansion: if both arguments
269 are non-registers (e.g. memory := constant, which can be created by the
270 auto-vectorizer), force operand 1 into a register. */
271 if (!s_register_operand (operands[0], <MODE>mode)
272 && !s_register_operand (operands[1], <MODE>mode))
273 operands[1] = force_reg (<MODE>mode, operands[1]);
275 if (s_register_operand (operands[0], <MODE>mode))
276 adjust_mem = operands[1];
278 adjust_mem = operands[0];
280 /* Legitimize address. */
281 if (!neon_vector_mem_operand (adjust_mem, 2, true))
282 XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
286 (define_insn "*movmisalign<mode>_neon_store"
287 [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
288 (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
289 UNSPEC_MISALIGNED_ACCESS))]
290 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
291 "vst1.<V_sz_elem>\t{%P1}, %A0"
292 [(set_attr "type" "neon_store1_1reg<q>")])
294 (define_insn "*movmisalign<mode>_neon_load"
295 [(set (match_operand:VDX 0 "s_register_operand" "=w")
296 (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
298 UNSPEC_MISALIGNED_ACCESS))]
299 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
300 "vld1.<V_sz_elem>\t{%P0}, %A1"
301 [(set_attr "type" "neon_load1_1reg<q>")])
303 (define_insn "*movmisalign<mode>_neon_store"
304 [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
305 (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
306 UNSPEC_MISALIGNED_ACCESS))]
307 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
308 "vst1.<V_sz_elem>\t{%q1}, %A0"
309 [(set_attr "type" "neon_store1_1reg<q>")])
311 (define_insn "*movmisalign<mode>_neon_load"
312 [(set (match_operand:VQX 0 "s_register_operand" "=w")
313 (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
315 UNSPEC_MISALIGNED_ACCESS))]
316 "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
317 "vld1.<V_sz_elem>\t{%q0}, %A1"
318 [(set_attr "type" "neon_load1_1reg<q>")])
320 (define_insn "vec_set<mode>_internal"
321 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
323 (vec_duplicate:VD_LANE
324 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
325 (match_operand:VD_LANE 3 "s_register_operand" "0,0")
326 (match_operand:SI 2 "immediate_operand" "i,i")))]
329 int elt = ffs ((int) INTVAL (operands[2])) - 1;
330 if (BYTES_BIG_ENDIAN)
331 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
332 operands[2] = GEN_INT (elt);
334 if (which_alternative == 0)
335 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
337 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
339 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
341 (define_insn "vec_set<mode>_internal"
342 [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
345 (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
346 (match_operand:VQ2 3 "s_register_operand" "0,0")
347 (match_operand:SI 2 "immediate_operand" "i,i")))]
350 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
351 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
352 int elt = elem % half_elts;
353 int hi = (elem / half_elts) * 2;
354 int regno = REGNO (operands[0]);
356 if (BYTES_BIG_ENDIAN)
357 elt = half_elts - 1 - elt;
359 operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
360 operands[2] = GEN_INT (elt);
362 if (which_alternative == 0)
363 return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
365 return "vmov.<V_sz_elem>\t%P0[%c2], %1";
367 [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
370 (define_insn "vec_setv2di_internal"
371 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
374 (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
375 (match_operand:V2DI 3 "s_register_operand" "0,0")
376 (match_operand:SI 2 "immediate_operand" "i,i")))]
379 HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
380 int regno = REGNO (operands[0]) + 2 * elem;
382 operands[0] = gen_rtx_REG (DImode, regno);
384 if (which_alternative == 0)
385 return "vld1.64\t%P0, %A1";
387 return "vmov\t%P0, %Q1, %R1";
389 [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
392 (define_expand "vec_set<mode>"
393 [(match_operand:VDQ 0 "s_register_operand" "")
394 (match_operand:<V_elem> 1 "s_register_operand" "")
395 (match_operand:SI 2 "immediate_operand" "")]
398 HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
399 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
400 GEN_INT (elem), operands[0]));
404 (define_insn "vec_extract<mode><V_elem_l>"
405 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
407 (match_operand:VD_LANE 1 "s_register_operand" "w,w")
408 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
411 if (BYTES_BIG_ENDIAN)
413 int elt = INTVAL (operands[2]);
414 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
415 operands[2] = GEN_INT (elt);
418 if (which_alternative == 0)
419 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
421 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
423 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
426 (define_insn "vec_extract<mode><V_elem_l>"
427 [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
429 (match_operand:VQ2 1 "s_register_operand" "w,w")
430 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
433 int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
434 int elt = INTVAL (operands[2]) % half_elts;
435 int hi = (INTVAL (operands[2]) / half_elts) * 2;
436 int regno = REGNO (operands[1]);
438 if (BYTES_BIG_ENDIAN)
439 elt = half_elts - 1 - elt;
441 operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
442 operands[2] = GEN_INT (elt);
444 if (which_alternative == 0)
445 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
447 return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
449 [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
452 (define_insn "vec_extractv2didi"
453 [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
455 (match_operand:V2DI 1 "s_register_operand" "w,w")
456 (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
459 int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
461 operands[1] = gen_rtx_REG (DImode, regno);
463 if (which_alternative == 0)
464 return "vst1.64\t{%P1}, %A0 @ v2di";
466 return "vmov\t%Q0, %R0, %P1 @ v2di";
468 [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
471 (define_expand "vec_init<mode><V_elem_l>"
472 [(match_operand:VDQ 0 "s_register_operand" "")
473 (match_operand 1 "" "")]
476 neon_expand_vector_init (operands[0], operands[1]);
480 ;; Doubleword and quadword arithmetic.
482 ;; NOTE: some other instructions also support 64-bit integer
483 ;; element size, which we could potentially use for "long long" operations.
485 (define_insn "*add<mode>3_neon"
486 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
487 (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
488 (match_operand:VDQ 2 "s_register_operand" "w")))]
489 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
490 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
492 (if_then_else (match_test "<Is_float_mode>")
493 (const_string "neon_fp_addsub_s<q>")
494 (const_string "neon_add<q>")))]
497 ;; As with SFmode, full support for HFmode vector arithmetic is only available
498 ;; when flag-unsafe-math-optimizations is enabled.
500 (define_insn "add<mode>3"
502 (match_operand:VH 0 "s_register_operand" "=w")
504 (match_operand:VH 1 "s_register_operand" "w")
505 (match_operand:VH 2 "s_register_operand" "w")))]
506 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
507 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
509 (if_then_else (match_test "<Is_float_mode>")
510 (const_string "neon_fp_addsub_s<q>")
511 (const_string "neon_add<q>")))]
514 (define_insn "add<mode>3_fp16"
516 (match_operand:VH 0 "s_register_operand" "=w")
518 (match_operand:VH 1 "s_register_operand" "w")
519 (match_operand:VH 2 "s_register_operand" "w")))]
520 "TARGET_NEON_FP16INST"
521 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
523 (if_then_else (match_test "<Is_float_mode>")
524 (const_string "neon_fp_addsub_s<q>")
525 (const_string "neon_add<q>")))]
528 (define_insn "adddi3_neon"
529 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
530 (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
531 (match_operand:DI 2 "arm_adddi_operand" "w,r,0,w,r,Dd,Dd")))
532 (clobber (reg:CC CC_REGNUM))]
535 switch (which_alternative)
537 case 0: /* fall through */
538 case 3: return "vadd.i64\t%P0, %P1, %P2";
544 default: gcc_unreachable ();
547 [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
548 multiple,multiple,multiple")
549 (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
550 (set_attr "length" "*,8,8,*,8,8,8")
551 (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
554 (define_insn "*sub<mode>3_neon"
555 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
556 (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
557 (match_operand:VDQ 2 "s_register_operand" "w")))]
558 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
559 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
561 (if_then_else (match_test "<Is_float_mode>")
562 (const_string "neon_fp_addsub_s<q>")
563 (const_string "neon_sub<q>")))]
566 (define_insn "sub<mode>3"
568 (match_operand:VH 0 "s_register_operand" "=w")
570 (match_operand:VH 1 "s_register_operand" "w")
571 (match_operand:VH 2 "s_register_operand" "w")))]
572 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
573 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
574 [(set_attr "type" "neon_sub<q>")]
577 (define_insn "sub<mode>3_fp16"
579 (match_operand:VH 0 "s_register_operand" "=w")
581 (match_operand:VH 1 "s_register_operand" "w")
582 (match_operand:VH 2 "s_register_operand" "w")))]
583 "TARGET_NEON_FP16INST"
584 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
585 [(set_attr "type" "neon_sub<q>")]
588 (define_insn "subdi3_neon"
589 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
590 (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
591 (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
592 (clobber (reg:CC CC_REGNUM))]
595 switch (which_alternative)
597 case 0: /* fall through */
598 case 4: return "vsub.i64\t%P0, %P1, %P2";
599 case 1: /* fall through */
600 case 2: /* fall through */
601 case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
602 default: gcc_unreachable ();
605 [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
606 (set_attr "conds" "*,clob,clob,clob,*")
607 (set_attr "length" "*,8,8,8,*")
608 (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
611 (define_insn "*mul<mode>3_neon"
612 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
613 (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
614 (match_operand:VDQW 2 "s_register_operand" "w")))]
615 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
616 "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
618 (if_then_else (match_test "<Is_float_mode>")
619 (const_string "neon_fp_mul_s<q>")
620 (const_string "neon_mul_<V_elem_ch><q>")))]
623 (define_insn "mul<mode>3add<mode>_neon"
624 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
625 (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
626 (match_operand:VDQW 3 "s_register_operand" "w"))
627 (match_operand:VDQW 1 "s_register_operand" "0")))]
628 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
629 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
631 (if_then_else (match_test "<Is_float_mode>")
632 (const_string "neon_fp_mla_s<q>")
633 (const_string "neon_mla_<V_elem_ch><q>")))]
636 (define_insn "mul<mode>3add<mode>_neon"
637 [(set (match_operand:VH 0 "s_register_operand" "=w")
638 (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
639 (match_operand:VH 3 "s_register_operand" "w"))
640 (match_operand:VH 1 "s_register_operand" "0")))]
641 "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
642 "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
643 [(set_attr "type" "neon_fp_mla_s<q>")]
646 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
647 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
648 (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
649 (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
650 (match_operand:VDQW 3 "s_register_operand" "w"))))]
651 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
652 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
654 (if_then_else (match_test "<Is_float_mode>")
655 (const_string "neon_fp_mla_s<q>")
656 (const_string "neon_mla_<V_elem_ch><q>")))]
659 ;; Fused multiply-accumulate
660 ;; We define each insn twice here:
661 ;; 1: with flag_unsafe_math_optimizations for the widening multiply phase
662 ;; to be able to use when converting to FMA.
663 ;; 2: without flag_unsafe_math_optimizations for the intrinsics to use.
664 (define_insn "fma<VCVTF:mode>4"
665 [(set (match_operand:VCVTF 0 "register_operand" "=w")
666 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
667 (match_operand:VCVTF 2 "register_operand" "w")
668 (match_operand:VCVTF 3 "register_operand" "0")))]
669 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
670 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
671 [(set_attr "type" "neon_fp_mla_s<q>")]
674 (define_insn "fma<VCVTF:mode>4_intrinsic"
675 [(set (match_operand:VCVTF 0 "register_operand" "=w")
676 (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
677 (match_operand:VCVTF 2 "register_operand" "w")
678 (match_operand:VCVTF 3 "register_operand" "0")))]
679 "TARGET_NEON && TARGET_FMA"
680 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
681 [(set_attr "type" "neon_fp_mla_s<q>")]
684 (define_insn "fma<VH:mode>4"
685 [(set (match_operand:VH 0 "register_operand" "=w")
687 (match_operand:VH 1 "register_operand" "w")
688 (match_operand:VH 2 "register_operand" "w")
689 (match_operand:VH 3 "register_operand" "0")))]
690 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
691 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
692 [(set_attr "type" "neon_fp_mla_s<q>")]
695 (define_insn "fma<VH:mode>4_intrinsic"
696 [(set (match_operand:VH 0 "register_operand" "=w")
698 (match_operand:VH 1 "register_operand" "w")
699 (match_operand:VH 2 "register_operand" "w")
700 (match_operand:VH 3 "register_operand" "0")))]
701 "TARGET_NEON_FP16INST"
702 "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
703 [(set_attr "type" "neon_fp_mla_s<q>")]
706 (define_insn "*fmsub<VCVTF:mode>4"
707 [(set (match_operand:VCVTF 0 "register_operand" "=w")
708 (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
709 (match_operand:VCVTF 2 "register_operand" "w")
710 (match_operand:VCVTF 3 "register_operand" "0")))]
711 "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
712 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
713 [(set_attr "type" "neon_fp_mla_s<q>")]
716 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
717 [(set (match_operand:VCVTF 0 "register_operand" "=w")
719 (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
720 (match_operand:VCVTF 2 "register_operand" "w")
721 (match_operand:VCVTF 3 "register_operand" "0")))]
722 "TARGET_NEON && TARGET_FMA"
723 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
724 [(set_attr "type" "neon_fp_mla_s<q>")]
727 (define_insn "fmsub<VH:mode>4_intrinsic"
728 [(set (match_operand:VH 0 "register_operand" "=w")
730 (neg:VH (match_operand:VH 1 "register_operand" "w"))
731 (match_operand:VH 2 "register_operand" "w")
732 (match_operand:VH 3 "register_operand" "0")))]
733 "TARGET_NEON_FP16INST"
734 "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
735 [(set_attr "type" "neon_fp_mla_s<q>")]
738 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
739 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
740 (unspec:VCVTF [(match_operand:VCVTF 1
741 "s_register_operand" "w")]
743 "TARGET_NEON && TARGET_VFP5"
744 "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
745 [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
748 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
749 [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
750 (FIXUORS:<V_cmp_result> (unspec:VCVTF
751 [(match_operand:VCVTF 1 "register_operand" "w")]
753 "TARGET_NEON && TARGET_VFP5"
754 "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
755 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
756 (set_attr "predicable" "no")]
759 (define_insn "ior<mode>3"
760 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
761 (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
762 (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
765 switch (which_alternative)
767 case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
768 case 1: return neon_output_logic_immediate ("vorr", &operands[2],
769 <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
770 default: gcc_unreachable ();
773 [(set_attr "type" "neon_logic<q>")]
776 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
777 ;; vorr. We support the pseudo-instruction vand instead, because that
778 ;; corresponds to the canonical form the middle-end expects to use for
779 ;; immediate bitwise-ANDs.
781 (define_insn "and<mode>3"
782 [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
783 (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
784 (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
787 switch (which_alternative)
789 case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
790 case 1: return neon_output_logic_immediate ("vand", &operands[2],
791 <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
792 default: gcc_unreachable ();
795 [(set_attr "type" "neon_logic<q>")]
798 (define_insn "orn<mode>3_neon"
799 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
800 (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
801 (match_operand:VDQ 1 "s_register_operand" "w")))]
803 "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
804 [(set_attr "type" "neon_logic<q>")]
807 ;; TODO: investigate whether we should disable
808 ;; this and bicdi3_neon for the A8 in line with the other
810 (define_insn_and_split "orndi3_neon"
811 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
812 (ior:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,0,0,r"))
813 (match_operand:DI 1 "s_register_operand" "w,r,r,0")))]
821 (TARGET_NEON && !(IS_VFP_REGNUM (REGNO (operands[0]))))"
822 [(set (match_dup 0) (ior:SI (not:SI (match_dup 2)) (match_dup 1)))
823 (set (match_dup 3) (ior:SI (not:SI (match_dup 4)) (match_dup 5)))]
828 operands[3] = gen_highpart (SImode, operands[0]);
829 operands[0] = gen_lowpart (SImode, operands[0]);
830 operands[4] = gen_highpart (SImode, operands[2]);
831 operands[2] = gen_lowpart (SImode, operands[2]);
832 operands[5] = gen_highpart (SImode, operands[1]);
833 operands[1] = gen_lowpart (SImode, operands[1]);
837 emit_insn (gen_one_cmpldi2 (operands[0], operands[2]));
838 emit_insn (gen_iordi3 (operands[0], operands[1], operands[0]));
842 [(set_attr "type" "neon_logic,multiple,multiple,multiple")
843 (set_attr "length" "*,16,8,8")
844 (set_attr "arch" "any,a,t2,t2")]
847 (define_insn "bic<mode>3_neon"
848 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
849 (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
850 (match_operand:VDQ 1 "s_register_operand" "w")))]
852 "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
853 [(set_attr "type" "neon_logic<q>")]
856 ;; Compare to *anddi_notdi_di.
857 (define_insn "bicdi3_neon"
858 [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
859 (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
860 (match_operand:DI 1 "s_register_operand" "w,0,r")))]
866 [(set_attr "type" "neon_logic,multiple,multiple")
867 (set_attr "length" "*,8,8")]
870 (define_insn "xor<mode>3"
871 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
872 (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
873 (match_operand:VDQ 2 "s_register_operand" "w")))]
875 "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
876 [(set_attr "type" "neon_logic<q>")]
879 (define_insn "one_cmpl<mode>2"
880 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
881 (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
883 "vmvn\t%<V_reg>0, %<V_reg>1"
884 [(set_attr "type" "neon_move<q>")]
887 (define_insn "abs<mode>2"
888 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
889 (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
891 "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
893 (if_then_else (match_test "<Is_float_mode>")
894 (const_string "neon_fp_abs_s<q>")
895 (const_string "neon_abs<q>")))]
898 (define_insn "neg<mode>2"
899 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
900 (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
902 "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
904 (if_then_else (match_test "<Is_float_mode>")
905 (const_string "neon_fp_neg_s<q>")
906 (const_string "neon_neg<q>")))]
909 (define_insn "negdi2_neon"
910 [(set (match_operand:DI 0 "s_register_operand" "=&w, w,r,&r")
911 (neg:DI (match_operand:DI 1 "s_register_operand" " w, w,0, r")))
912 (clobber (match_scratch:DI 2 "= X,&w,X, X"))
913 (clobber (reg:CC CC_REGNUM))]
916 [(set_attr "length" "8")
917 (set_attr "type" "multiple")]
920 ; Split negdi2_neon for vfp registers
922 [(set (match_operand:DI 0 "s_register_operand" "")
923 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
924 (clobber (match_scratch:DI 2 ""))
925 (clobber (reg:CC CC_REGNUM))]
926 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
927 [(set (match_dup 2) (const_int 0))
928 (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
929 (clobber (reg:CC CC_REGNUM))])]
931 if (!REG_P (operands[2]))
932 operands[2] = operands[0];
936 ; Split negdi2_neon for core registers
938 [(set (match_operand:DI 0 "s_register_operand" "")
939 (neg:DI (match_operand:DI 1 "s_register_operand" "")))
940 (clobber (match_scratch:DI 2 ""))
941 (clobber (reg:CC CC_REGNUM))]
942 "TARGET_32BIT && reload_completed
943 && arm_general_register_operand (operands[0], DImode)"
944 [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
945 (clobber (reg:CC CC_REGNUM))])]
949 (define_insn "<absneg_str><mode>2"
950 [(set (match_operand:VH 0 "s_register_operand" "=w")
951 (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
952 "TARGET_NEON_FP16INST"
953 "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
954 [(set_attr "type" "neon_abs<q>")]
957 (define_expand "neon_v<absneg_str><mode>"
959 (match_operand:VH 0 "s_register_operand")
960 (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
961 "TARGET_NEON_FP16INST"
963 emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
967 (define_insn "neon_v<fp16_rnd_str><mode>"
968 [(set (match_operand:VH 0 "s_register_operand" "=w")
970 [(match_operand:VH 1 "s_register_operand" "w")]
972 "TARGET_NEON_FP16INST"
973 "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
974 [(set_attr "type" "neon_fp_round_s<q>")]
977 (define_insn "neon_vrsqrte<mode>"
978 [(set (match_operand:VH 0 "s_register_operand" "=w")
980 [(match_operand:VH 1 "s_register_operand" "w")]
982 "TARGET_NEON_FP16INST"
983 "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
984 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
987 (define_insn "*umin<mode>3_neon"
988 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
989 (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
990 (match_operand:VDQIW 2 "s_register_operand" "w")))]
992 "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
993 [(set_attr "type" "neon_minmax<q>")]
996 (define_insn "*umax<mode>3_neon"
997 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
998 (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
999 (match_operand:VDQIW 2 "s_register_operand" "w")))]
1001 "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1002 [(set_attr "type" "neon_minmax<q>")]
1005 (define_insn "*smin<mode>3_neon"
1006 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1007 (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1008 (match_operand:VDQW 2 "s_register_operand" "w")))]
1010 "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1012 (if_then_else (match_test "<Is_float_mode>")
1013 (const_string "neon_fp_minmax_s<q>")
1014 (const_string "neon_minmax<q>")))]
1017 (define_insn "*smax<mode>3_neon"
1018 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
1019 (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1020 (match_operand:VDQW 2 "s_register_operand" "w")))]
1022 "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1024 (if_then_else (match_test "<Is_float_mode>")
1025 (const_string "neon_fp_minmax_s<q>")
1026 (const_string "neon_minmax<q>")))]
1029 ; TODO: V2DI shifts are current disabled because there are bugs in the
1030 ; generic vectorizer code. It ends up creating a V2DI constructor with
1033 (define_insn "vashl<mode>3"
1034 [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1035 (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1036 (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dn")))]
1039 switch (which_alternative)
1041 case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1042 case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1044 VALID_NEON_QREG_MODE (<MODE>mode),
1046 default: gcc_unreachable ();
1049 [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1052 (define_insn "vashr<mode>3_imm"
1053 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1054 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1055 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1058 return neon_output_shift_immediate ("vshr", 's', &operands[2],
1059 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1062 [(set_attr "type" "neon_shift_imm<q>")]
1065 (define_insn "vlshr<mode>3_imm"
1066 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1067 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1068 (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dn")))]
1071 return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1072 <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1075 [(set_attr "type" "neon_shift_imm<q>")]
1078 ; Used for implementing logical shift-right, which is a left-shift by a negative
1079 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1080 ; above, but using an unspec in case GCC tries anything tricky with negative
1083 (define_insn "ashl<mode>3_signed"
1084 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1085 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1086 (match_operand:VDQI 2 "s_register_operand" "w")]
1087 UNSPEC_ASHIFT_SIGNED))]
1089 "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1090 [(set_attr "type" "neon_shift_reg<q>")]
1093 ; Used for implementing logical shift-right, which is a left-shift by a negative
1094 ; amount, with unsigned operands.
1096 (define_insn "ashl<mode>3_unsigned"
1097 [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1098 (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1099 (match_operand:VDQI 2 "s_register_operand" "w")]
1100 UNSPEC_ASHIFT_UNSIGNED))]
1102 "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1103 [(set_attr "type" "neon_shift_reg<q>")]
1106 (define_expand "vashr<mode>3"
1107 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1108 (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1109 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1112 if (s_register_operand (operands[2], <MODE>mode))
1114 rtx neg = gen_reg_rtx (<MODE>mode);
1115 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1116 emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1119 emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1123 (define_expand "vlshr<mode>3"
1124 [(set (match_operand:VDQIW 0 "s_register_operand" "")
1125 (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
1126 (match_operand:VDQIW 2 "imm_rshift_or_reg_neon" "")))]
1129 if (s_register_operand (operands[2], <MODE>mode))
1131 rtx neg = gen_reg_rtx (<MODE>mode);
1132 emit_insn (gen_neg<mode>2 (neg, operands[2]));
1133 emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1136 emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1142 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1143 ;; leaving the upper half uninitalized. This is OK since the shift
1144 ;; instruction only looks at the low 8 bits anyway. To avoid confusing
1145 ;; data flow analysis however, we pretend the full register is set
1147 (define_insn "neon_load_count"
1148 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1149 (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1150 UNSPEC_LOAD_COUNT))]
1153 vld1.32\t{%P0[0]}, %A1
1154 vmov.32\t%P0[0], %1"
1155 [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1158 (define_insn "ashldi3_neon_noclobber"
1159 [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1160 (ashift:DI (match_operand:DI 1 "s_register_operand" " w,w")
1161 (match_operand:DI 2 "reg_or_int_operand" " i,w")))]
1162 "TARGET_NEON && reload_completed
1163 && (!CONST_INT_P (operands[2])
1164 || (INTVAL (operands[2]) >= 0 && INTVAL (operands[2]) < 64))"
1166 vshl.u64\t%P0, %P1, %2
1167 vshl.u64\t%P0, %P1, %P2"
1168 [(set_attr "type" "neon_shift_imm, neon_shift_reg")]
1171 (define_insn_and_split "ashldi3_neon"
1172 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r, ?w,?w")
1173 (ashift:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r, 0w, w")
1174 (match_operand:SI 2 "general_operand" "rUm, i, r, i, i,rUm, i")))
1175 (clobber (match_scratch:SI 3 "= X, X, &r, X, X, X, X"))
1176 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1177 (clobber (match_scratch:DI 5 "=&w, X, X, X, X, &w, X"))
1178 (clobber (reg:CC_C CC_REGNUM))]
1181 "TARGET_NEON && reload_completed"
1185 if (IS_VFP_REGNUM (REGNO (operands[0])))
1187 if (CONST_INT_P (operands[2]))
1189 if (INTVAL (operands[2]) < 1)
1191 emit_insn (gen_movdi (operands[0], operands[1]));
1194 else if (INTVAL (operands[2]) > 63)
1195 operands[2] = gen_rtx_CONST_INT (VOIDmode, 63);
1199 emit_insn (gen_neon_load_count (operands[5], operands[2]));
1200 operands[2] = operands[5];
1203 /* Ditch the unnecessary clobbers. */
1204 emit_insn (gen_ashldi3_neon_noclobber (operands[0], operands[1],
1209 /* The shift expanders support either full overlap or no overlap. */
1210 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1211 || REGNO (operands[0]) == REGNO (operands[1]));
1213 arm_emit_coreregs_64bit_shift (ASHIFT, operands[0], operands[1],
1214 operands[2], operands[3], operands[4]);
1218 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1219 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1220 (set_attr "type" "multiple")]
1223 ; The shift amount needs to be negated for right-shifts
1224 (define_insn "signed_shift_di3_neon"
1225 [(set (match_operand:DI 0 "s_register_operand" "=w")
1226 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1227 (match_operand:DI 2 "s_register_operand" " w")]
1228 UNSPEC_ASHIFT_SIGNED))]
1229 "TARGET_NEON && reload_completed"
1230 "vshl.s64\t%P0, %P1, %P2"
1231 [(set_attr "type" "neon_shift_reg")]
1234 ; The shift amount needs to be negated for right-shifts
1235 (define_insn "unsigned_shift_di3_neon"
1236 [(set (match_operand:DI 0 "s_register_operand" "=w")
1237 (unspec:DI [(match_operand:DI 1 "s_register_operand" " w")
1238 (match_operand:DI 2 "s_register_operand" " w")]
1239 UNSPEC_ASHIFT_UNSIGNED))]
1240 "TARGET_NEON && reload_completed"
1241 "vshl.u64\t%P0, %P1, %P2"
1242 [(set_attr "type" "neon_shift_reg")]
1245 (define_insn "ashrdi3_neon_imm_noclobber"
1246 [(set (match_operand:DI 0 "s_register_operand" "=w")
1247 (ashiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1248 (match_operand:DI 2 "const_int_operand" " i")))]
1249 "TARGET_NEON && reload_completed
1250 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1251 "vshr.s64\t%P0, %P1, %2"
1252 [(set_attr "type" "neon_shift_imm")]
1255 (define_insn "lshrdi3_neon_imm_noclobber"
1256 [(set (match_operand:DI 0 "s_register_operand" "=w")
1257 (lshiftrt:DI (match_operand:DI 1 "s_register_operand" " w")
1258 (match_operand:DI 2 "const_int_operand" " i")))]
1259 "TARGET_NEON && reload_completed
1260 && INTVAL (operands[2]) > 0 && INTVAL (operands[2]) <= 64"
1261 "vshr.u64\t%P0, %P1, %2"
1262 [(set_attr "type" "neon_shift_imm")]
1267 (define_insn_and_split "<shift>di3_neon"
1268 [(set (match_operand:DI 0 "s_register_operand" "= w, w, &r, r, &r,?w,?w")
1269 (RSHIFTS:DI (match_operand:DI 1 "s_register_operand" " 0w, w, 0r, 0, r,0w, w")
1270 (match_operand:SI 2 "reg_or_int_operand" " r, i, r, i, i, r, i")))
1271 (clobber (match_scratch:SI 3 "=2r, X, &r, X, X,2r, X"))
1272 (clobber (match_scratch:SI 4 "= X, X, &r, X, X, X, X"))
1273 (clobber (match_scratch:DI 5 "=&w, X, X, X, X,&w, X"))
1274 (clobber (reg:CC CC_REGNUM))]
1277 "TARGET_NEON && reload_completed"
1281 if (IS_VFP_REGNUM (REGNO (operands[0])))
1283 if (CONST_INT_P (operands[2]))
1285 if (INTVAL (operands[2]) < 1)
1287 emit_insn (gen_movdi (operands[0], operands[1]));
1290 else if (INTVAL (operands[2]) > 64)
1291 operands[2] = gen_rtx_CONST_INT (VOIDmode, 64);
1293 /* Ditch the unnecessary clobbers. */
1294 emit_insn (gen_<shift>di3_neon_imm_noclobber (operands[0],
1300 /* We must use a negative left-shift. */
1301 emit_insn (gen_negsi2 (operands[3], operands[2]));
1302 emit_insn (gen_neon_load_count (operands[5], operands[3]));
1303 emit_insn (gen_<shifttype>_shift_di3_neon (operands[0], operands[1],
1309 /* The shift expanders support either full overlap or no overlap. */
1310 gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1])
1311 || REGNO (operands[0]) == REGNO (operands[1]));
1313 /* This clobbers CC (ASHIFTRT by register only). */
1314 arm_emit_coreregs_64bit_shift (<CODE>, operands[0], operands[1],
1315 operands[2], operands[3], operands[4]);
1320 [(set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")
1321 (set_attr "opt" "*,*,speed,speed,speed,*,*")
1322 (set_attr "type" "multiple")]
1325 ;; Widening operations
1327 (define_expand "widen_ssum<mode>3"
1328 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1329 (plus:<V_double_width>
1330 (sign_extend:<V_double_width>
1331 (match_operand:VQI 1 "s_register_operand" ""))
1332 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1335 machine_mode mode = GET_MODE (operands[1]);
1338 p1 = arm_simd_vect_par_cnst_half (mode, false);
1339 p2 = arm_simd_vect_par_cnst_half (mode, true);
1341 if (operands[0] != operands[2])
1342 emit_move_insn (operands[0], operands[2]);
1344 emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1348 emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1356 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1357 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1358 (plus:<V_double_width>
1359 (sign_extend:<V_double_width>
1360 (vec_select:<V_HALF>
1361 (match_operand:VQI 1 "s_register_operand" "%w")
1362 (match_operand:VQI 2 "vect_par_constant_low" "")))
1363 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1366 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1367 "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1369 [(set_attr "type" "neon_add_widen")])
1371 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1372 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1373 (plus:<V_double_width>
1374 (sign_extend:<V_double_width>
1375 (vec_select:<V_HALF>
1376 (match_operand:VQI 1 "s_register_operand" "%w")
1377 (match_operand:VQI 2 "vect_par_constant_high" "")))
1378 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1381 return BYTES_BIG_ENDIAN ? "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1382 "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1384 [(set_attr "type" "neon_add_widen")])
1386 (define_insn "widen_ssum<mode>3"
1387 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1389 (sign_extend:<V_widen>
1390 (match_operand:VW 1 "s_register_operand" "%w"))
1391 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1393 "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1394 [(set_attr "type" "neon_add_widen")]
1397 (define_expand "widen_usum<mode>3"
1398 [(set (match_operand:<V_double_width> 0 "s_register_operand" "")
1399 (plus:<V_double_width>
1400 (zero_extend:<V_double_width>
1401 (match_operand:VQI 1 "s_register_operand" ""))
1402 (match_operand:<V_double_width> 2 "s_register_operand" "")))]
1405 machine_mode mode = GET_MODE (operands[1]);
1408 p1 = arm_simd_vect_par_cnst_half (mode, false);
1409 p2 = arm_simd_vect_par_cnst_half (mode, true);
1411 if (operands[0] != operands[2])
1412 emit_move_insn (operands[0], operands[2]);
1414 emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1418 emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1426 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1427 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1428 (plus:<V_double_width>
1429 (zero_extend:<V_double_width>
1430 (vec_select:<V_HALF>
1431 (match_operand:VQI 1 "s_register_operand" "%w")
1432 (match_operand:VQI 2 "vect_par_constant_low" "")))
1433 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1436 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1437 "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1439 [(set_attr "type" "neon_add_widen")])
1441 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1442 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1443 (plus:<V_double_width>
1444 (zero_extend:<V_double_width>
1445 (vec_select:<V_HALF>
1446 (match_operand:VQI 1 "s_register_operand" "%w")
1447 (match_operand:VQI 2 "vect_par_constant_high" "")))
1448 (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1451 return BYTES_BIG_ENDIAN ? "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1452 "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1454 [(set_attr "type" "neon_add_widen")])
1456 (define_insn "widen_usum<mode>3"
1457 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1458 (plus:<V_widen> (zero_extend:<V_widen>
1459 (match_operand:VW 1 "s_register_operand" "%w"))
1460 (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1462 "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1463 [(set_attr "type" "neon_add_widen")]
1466 ;; Helpers for quad-word reduction operations
1468 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1469 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1470 ; N/2-element vector.
1472 (define_insn "quad_halves_<code>v4si"
1473 [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1475 (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1476 (parallel [(const_int 0) (const_int 1)]))
1477 (vec_select:V2SI (match_dup 1)
1478 (parallel [(const_int 2) (const_int 3)]))))]
1480 "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1481 [(set_attr "vqh_mnem" "<VQH_mnem>")
1482 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1485 (define_insn "quad_halves_<code>v4sf"
1486 [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1488 (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1489 (parallel [(const_int 0) (const_int 1)]))
1490 (vec_select:V2SF (match_dup 1)
1491 (parallel [(const_int 2) (const_int 3)]))))]
1492 "TARGET_NEON && flag_unsafe_math_optimizations"
1493 "<VQH_mnem>.f32\t%P0, %e1, %f1"
1494 [(set_attr "vqh_mnem" "<VQH_mnem>")
1495 (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1498 (define_insn "quad_halves_<code>v8hi"
1499 [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1501 (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1502 (parallel [(const_int 0) (const_int 1)
1503 (const_int 2) (const_int 3)]))
1504 (vec_select:V4HI (match_dup 1)
1505 (parallel [(const_int 4) (const_int 5)
1506 (const_int 6) (const_int 7)]))))]
1508 "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1509 [(set_attr "vqh_mnem" "<VQH_mnem>")
1510 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1513 (define_insn "quad_halves_<code>v16qi"
1514 [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1516 (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1517 (parallel [(const_int 0) (const_int 1)
1518 (const_int 2) (const_int 3)
1519 (const_int 4) (const_int 5)
1520 (const_int 6) (const_int 7)]))
1521 (vec_select:V8QI (match_dup 1)
1522 (parallel [(const_int 8) (const_int 9)
1523 (const_int 10) (const_int 11)
1524 (const_int 12) (const_int 13)
1525 (const_int 14) (const_int 15)]))))]
1527 "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1528 [(set_attr "vqh_mnem" "<VQH_mnem>")
1529 (set_attr "type" "neon_reduc_<VQH_type>_q")]
1532 (define_expand "move_hi_quad_<mode>"
1533 [(match_operand:ANY128 0 "s_register_operand" "")
1534 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1537 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1538 GET_MODE_SIZE (<V_HALF>mode)),
1543 (define_expand "move_lo_quad_<mode>"
1544 [(match_operand:ANY128 0 "s_register_operand" "")
1545 (match_operand:<V_HALF> 1 "s_register_operand" "")]
1548 emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1554 ;; Reduction operations
1556 (define_expand "reduc_plus_scal_<mode>"
1557 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1558 (match_operand:VD 1 "s_register_operand" "")]
1559 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1561 rtx vec = gen_reg_rtx (<MODE>mode);
1562 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1563 &gen_neon_vpadd_internal<mode>);
1564 /* The same result is actually computed into every element. */
1565 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1569 (define_expand "reduc_plus_scal_<mode>"
1570 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1571 (match_operand:VQ 1 "s_register_operand" "")]
1572 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1573 && !BYTES_BIG_ENDIAN"
1575 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1577 emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1578 emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1583 (define_expand "reduc_plus_scal_v2di"
1584 [(match_operand:DI 0 "nonimmediate_operand" "=w")
1585 (match_operand:V2DI 1 "s_register_operand" "")]
1586 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1588 rtx vec = gen_reg_rtx (V2DImode);
1590 emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1591 emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1596 (define_insn "arm_reduc_plus_internal_v2di"
1597 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1598 (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1600 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1601 "vadd.i64\t%e0, %e1, %f1"
1602 [(set_attr "type" "neon_add_q")]
1605 (define_expand "reduc_smin_scal_<mode>"
1606 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1607 (match_operand:VD 1 "s_register_operand" "")]
1608 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1610 rtx vec = gen_reg_rtx (<MODE>mode);
1612 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1613 &gen_neon_vpsmin<mode>);
1614 /* The result is computed into every element of the vector. */
1615 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1619 (define_expand "reduc_smin_scal_<mode>"
1620 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1621 (match_operand:VQ 1 "s_register_operand" "")]
1622 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1623 && !BYTES_BIG_ENDIAN"
1625 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1627 emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1628 emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1633 (define_expand "reduc_smax_scal_<mode>"
1634 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1635 (match_operand:VD 1 "s_register_operand" "")]
1636 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1638 rtx vec = gen_reg_rtx (<MODE>mode);
1639 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1640 &gen_neon_vpsmax<mode>);
1641 /* The result is computed into every element of the vector. */
1642 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1646 (define_expand "reduc_smax_scal_<mode>"
1647 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1648 (match_operand:VQ 1 "s_register_operand" "")]
1649 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1650 && !BYTES_BIG_ENDIAN"
1652 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1654 emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1655 emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1660 (define_expand "reduc_umin_scal_<mode>"
1661 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1662 (match_operand:VDI 1 "s_register_operand" "")]
1665 rtx vec = gen_reg_rtx (<MODE>mode);
1666 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1667 &gen_neon_vpumin<mode>);
1668 /* The result is computed into every element of the vector. */
1669 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1673 (define_expand "reduc_umin_scal_<mode>"
1674 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1675 (match_operand:VQI 1 "s_register_operand" "")]
1676 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1678 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1680 emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1681 emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1686 (define_expand "reduc_umax_scal_<mode>"
1687 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1688 (match_operand:VDI 1 "s_register_operand" "")]
1691 rtx vec = gen_reg_rtx (<MODE>mode);
1692 neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1693 &gen_neon_vpumax<mode>);
1694 /* The result is computed into every element of the vector. */
1695 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1699 (define_expand "reduc_umax_scal_<mode>"
1700 [(match_operand:<V_elem> 0 "nonimmediate_operand" "")
1701 (match_operand:VQI 1 "s_register_operand" "")]
1702 "TARGET_NEON && !BYTES_BIG_ENDIAN"
1704 rtx step1 = gen_reg_rtx (<V_HALF>mode);
1706 emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1707 emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1712 (define_insn "neon_vpadd_internal<mode>"
1713 [(set (match_operand:VD 0 "s_register_operand" "=w")
1714 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1715 (match_operand:VD 2 "s_register_operand" "w")]
1718 "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1719 ;; Assume this schedules like vadd.
1721 (if_then_else (match_test "<Is_float_mode>")
1722 (const_string "neon_fp_reduc_add_s<q>")
1723 (const_string "neon_reduc_add<q>")))]
1726 (define_insn "neon_vpaddv4hf"
1728 (match_operand:V4HF 0 "s_register_operand" "=w")
1729 (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1730 (match_operand:V4HF 2 "s_register_operand" "w")]
1732 "TARGET_NEON_FP16INST"
1733 "vpadd.f16\t%P0, %P1, %P2"
1734 [(set_attr "type" "neon_reduc_add")]
1737 (define_insn "neon_vpsmin<mode>"
1738 [(set (match_operand:VD 0 "s_register_operand" "=w")
1739 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1740 (match_operand:VD 2 "s_register_operand" "w")]
1743 "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1745 (if_then_else (match_test "<Is_float_mode>")
1746 (const_string "neon_fp_reduc_minmax_s<q>")
1747 (const_string "neon_reduc_minmax<q>")))]
1750 (define_insn "neon_vpsmax<mode>"
1751 [(set (match_operand:VD 0 "s_register_operand" "=w")
1752 (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1753 (match_operand:VD 2 "s_register_operand" "w")]
1756 "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1758 (if_then_else (match_test "<Is_float_mode>")
1759 (const_string "neon_fp_reduc_minmax_s<q>")
1760 (const_string "neon_reduc_minmax<q>")))]
1763 (define_insn "neon_vpumin<mode>"
1764 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1765 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1766 (match_operand:VDI 2 "s_register_operand" "w")]
1769 "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1770 [(set_attr "type" "neon_reduc_minmax<q>")]
1773 (define_insn "neon_vpumax<mode>"
1774 [(set (match_operand:VDI 0 "s_register_operand" "=w")
1775 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1776 (match_operand:VDI 2 "s_register_operand" "w")]
1779 "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1780 [(set_attr "type" "neon_reduc_minmax<q>")]
1783 ;; Saturating arithmetic
1785 ; NOTE: Neon supports many more saturating variants of instructions than the
1786 ; following, but these are all GCC currently understands.
1787 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1788 ; yet either, although these patterns may be used by intrinsics when they're
1791 (define_insn "*ss_add<mode>_neon"
1792 [(set (match_operand:VD 0 "s_register_operand" "=w")
1793 (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1794 (match_operand:VD 2 "s_register_operand" "w")))]
1796 "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1797 [(set_attr "type" "neon_qadd<q>")]
1800 (define_insn "*us_add<mode>_neon"
1801 [(set (match_operand:VD 0 "s_register_operand" "=w")
1802 (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1803 (match_operand:VD 2 "s_register_operand" "w")))]
1805 "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1806 [(set_attr "type" "neon_qadd<q>")]
1809 (define_insn "*ss_sub<mode>_neon"
1810 [(set (match_operand:VD 0 "s_register_operand" "=w")
1811 (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1812 (match_operand:VD 2 "s_register_operand" "w")))]
1814 "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1815 [(set_attr "type" "neon_qsub<q>")]
1818 (define_insn "*us_sub<mode>_neon"
1819 [(set (match_operand:VD 0 "s_register_operand" "=w")
1820 (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1821 (match_operand:VD 2 "s_register_operand" "w")))]
1823 "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1824 [(set_attr "type" "neon_qsub<q>")]
1827 ;; Conditional instructions. These are comparisons with conditional moves for
1828 ;; vectors. They perform the assignment:
1830 ;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1832 ;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
1835 (define_expand "vcond<mode><mode>"
1836 [(set (match_operand:VDQW 0 "s_register_operand" "")
1838 (match_operator 3 "comparison_operator"
1839 [(match_operand:VDQW 4 "s_register_operand" "")
1840 (match_operand:VDQW 5 "nonmemory_operand" "")])
1841 (match_operand:VDQW 1 "s_register_operand" "")
1842 (match_operand:VDQW 2 "s_register_operand" "")))]
1843 "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1846 int use_zero_form = 0;
1847 int swap_bsl_operands = 0;
1848 rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1849 rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1851 rtx (*base_comparison) (rtx, rtx, rtx);
1852 rtx (*complimentary_comparison) (rtx, rtx, rtx);
1854 switch (GET_CODE (operands[3]))
1861 if (operands[5] == CONST0_RTX (<MODE>mode))
1868 if (!REG_P (operands[5]))
1869 operands[5] = force_reg (<MODE>mode, operands[5]);
1872 switch (GET_CODE (operands[3]))
1882 base_comparison = gen_neon_vcge<mode>;
1883 complimentary_comparison = gen_neon_vcgt<mode>;
1891 base_comparison = gen_neon_vcgt<mode>;
1892 complimentary_comparison = gen_neon_vcge<mode>;
1897 base_comparison = gen_neon_vceq<mode>;
1898 complimentary_comparison = gen_neon_vceq<mode>;
1904 switch (GET_CODE (operands[3]))
1911 /* The easy case. Here we emit one of vcge, vcgt or vceq.
1912 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
1918 Note that there also exist direct comparison against 0 forms,
1919 so catch those as a special case. */
1923 switch (GET_CODE (operands[3]))
1926 base_comparison = gen_neon_vclt<mode>;
1929 base_comparison = gen_neon_vcle<mode>;
1932 /* Do nothing, other zero form cases already have the correct
1939 emit_insn (base_comparison (mask, operands[4], operands[5]));
1941 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1948 /* Vector compare returns false for lanes which are unordered, so if we use
1949 the inverse of the comparison we actually want to emit, then
1950 swap the operands to BSL, we will end up with the correct result.
1951 Note that a NE NaN and NaN NE b are true for all a, b.
1953 Our transformations are:
1958 a NE b -> !(a EQ b) */
1961 emit_insn (base_comparison (mask, operands[4], operands[5]));
1963 emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1965 swap_bsl_operands = 1;
1968 /* We check (a > b || b > a). combining these comparisons give us
1969 true iff !(a != b && a ORDERED b), swapping the operands to BSL
1970 will then give us (a == b || a UNORDERED b) as intended. */
1972 emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1973 emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1974 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1975 swap_bsl_operands = 1;
1978 /* Operands are ORDERED iff (a > b || b >= a).
1979 Swapping the operands to BSL will give the UNORDERED case. */
1980 swap_bsl_operands = 1;
1983 emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1984 emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1985 emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1991 if (swap_bsl_operands)
1992 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1995 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2000 (define_expand "vcondu<mode><mode>"
2001 [(set (match_operand:VDQIW 0 "s_register_operand" "")
2003 (match_operator 3 "arm_comparison_operator"
2004 [(match_operand:VDQIW 4 "s_register_operand" "")
2005 (match_operand:VDQIW 5 "s_register_operand" "")])
2006 (match_operand:VDQIW 1 "s_register_operand" "")
2007 (match_operand:VDQIW 2 "s_register_operand" "")))]
2011 int inverse = 0, immediate_zero = 0;
2013 mask = gen_reg_rtx (<V_cmp_result>mode);
2015 if (operands[5] == CONST0_RTX (<MODE>mode))
2017 else if (!REG_P (operands[5]))
2018 operands[5] = force_reg (<MODE>mode, operands[5]);
2020 switch (GET_CODE (operands[3]))
2023 emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
2027 emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
2031 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2036 emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
2038 emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
2043 emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
2045 emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
2049 emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
2058 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
2061 emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
2067 ;; Patterns for builtins.
2069 ; good for plain vadd, vaddq.
2071 (define_expand "neon_vadd<mode>"
2072 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2073 (match_operand:VCVTF 1 "s_register_operand" "w")
2074 (match_operand:VCVTF 2 "s_register_operand" "w")]
2077 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2078 emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
2080 emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
2085 (define_expand "neon_vadd<mode>"
2086 [(match_operand:VH 0 "s_register_operand")
2087 (match_operand:VH 1 "s_register_operand")
2088 (match_operand:VH 2 "s_register_operand")]
2089 "TARGET_NEON_FP16INST"
2091 emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
2095 (define_expand "neon_vsub<mode>"
2096 [(match_operand:VH 0 "s_register_operand")
2097 (match_operand:VH 1 "s_register_operand")
2098 (match_operand:VH 2 "s_register_operand")]
2099 "TARGET_NEON_FP16INST"
2101 emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
2105 ; Note that NEON operations don't support the full IEEE 754 standard: in
2106 ; particular, denormal values are flushed to zero. This means that GCC cannot
2107 ; use those instructions for autovectorization, etc. unless
2108 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
2109 ; behavior is permissible). Intrinsic operations (provided by the arm_neon.h
2110 ; header) must work in either case: if -funsafe-math-optimizations is given,
2111 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
2112 ; expand to unspecs (which may potentially limit the extent to which they might
2113 ; be optimized by generic code).
2115 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2117 (define_insn "neon_vadd<mode>_unspec"
2118 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2119 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2120 (match_operand:VCVTF 2 "s_register_operand" "w")]
2123 "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2125 (if_then_else (match_test "<Is_float_mode>")
2126 (const_string "neon_fp_addsub_s<q>")
2127 (const_string "neon_add<q>")))]
2130 (define_insn "neon_vaddl<sup><mode>"
2131 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2132 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2133 (match_operand:VDI 2 "s_register_operand" "w")]
2136 "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2137 [(set_attr "type" "neon_add_long")]
2140 (define_insn "neon_vaddw<sup><mode>"
2141 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2142 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2143 (match_operand:VDI 2 "s_register_operand" "w")]
2146 "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2147 [(set_attr "type" "neon_add_widen")]
2152 (define_insn "neon_v<r>hadd<sup><mode>"
2153 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2154 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2155 (match_operand:VDQIW 2 "s_register_operand" "w")]
2158 "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2159 [(set_attr "type" "neon_add_halve_q")]
2162 (define_insn "neon_vqadd<sup><mode>"
2163 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2164 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2165 (match_operand:VDQIX 2 "s_register_operand" "w")]
2168 "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2169 [(set_attr "type" "neon_qadd<q>")]
2172 (define_insn "neon_v<r>addhn<mode>"
2173 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2174 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2175 (match_operand:VN 2 "s_register_operand" "w")]
2178 "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
2179 [(set_attr "type" "neon_add_halve_narrow_q")]
2182 ;; Polynomial and Float multiplication.
2183 (define_insn "neon_vmul<pf><mode>"
2184 [(set (match_operand:VPF 0 "s_register_operand" "=w")
2185 (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
2186 (match_operand:VPF 2 "s_register_operand" "w")]
2189 "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2191 (if_then_else (match_test "<Is_float_mode>")
2192 (const_string "neon_fp_mul_s<q>")
2193 (const_string "neon_mul_<V_elem_ch><q>")))]
2196 (define_insn "mul<mode>3"
2198 (match_operand:VH 0 "s_register_operand" "=w")
2200 (match_operand:VH 1 "s_register_operand" "w")
2201 (match_operand:VH 2 "s_register_operand" "w")))]
2202 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2203 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2204 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2207 (define_insn "neon_vmulf<mode>"
2209 (match_operand:VH 0 "s_register_operand" "=w")
2211 (match_operand:VH 1 "s_register_operand" "w")
2212 (match_operand:VH 2 "s_register_operand" "w")))]
2213 "TARGET_NEON_FP16INST"
2214 "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2215 [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2218 (define_expand "neon_vmla<mode>"
2219 [(match_operand:VDQW 0 "s_register_operand" "=w")
2220 (match_operand:VDQW 1 "s_register_operand" "0")
2221 (match_operand:VDQW 2 "s_register_operand" "w")
2222 (match_operand:VDQW 3 "s_register_operand" "w")]
2225 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2226 emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2227 operands[2], operands[3]));
2229 emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2230 operands[2], operands[3]));
2234 (define_expand "neon_vfma<VCVTF:mode>"
2235 [(match_operand:VCVTF 0 "s_register_operand")
2236 (match_operand:VCVTF 1 "s_register_operand")
2237 (match_operand:VCVTF 2 "s_register_operand")
2238 (match_operand:VCVTF 3 "s_register_operand")]
2239 "TARGET_NEON && TARGET_FMA"
2241 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2246 (define_expand "neon_vfma<VH:mode>"
2247 [(match_operand:VH 0 "s_register_operand")
2248 (match_operand:VH 1 "s_register_operand")
2249 (match_operand:VH 2 "s_register_operand")
2250 (match_operand:VH 3 "s_register_operand")]
2251 "TARGET_NEON_FP16INST"
2253 emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2258 (define_expand "neon_vfms<VCVTF:mode>"
2259 [(match_operand:VCVTF 0 "s_register_operand")
2260 (match_operand:VCVTF 1 "s_register_operand")
2261 (match_operand:VCVTF 2 "s_register_operand")
2262 (match_operand:VCVTF 3 "s_register_operand")]
2263 "TARGET_NEON && TARGET_FMA"
2265 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2270 (define_expand "neon_vfms<VH:mode>"
2271 [(match_operand:VH 0 "s_register_operand")
2272 (match_operand:VH 1 "s_register_operand")
2273 (match_operand:VH 2 "s_register_operand")
2274 (match_operand:VH 3 "s_register_operand")]
2275 "TARGET_NEON_FP16INST"
2277 emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2282 ;; The expand RTL structure here is not important.
2283 ;; We use the gen_* functions anyway.
2284 ;; We just need something to wrap the iterators around.
2286 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2287 [(set (match_operand:VCVTF 0 "s_register_operand")
2289 [(match_operand:VCVTF 1 "s_register_operand")
2291 (match_operand:<VFML> 2 "s_register_operand")
2292 (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2295 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2296 emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2304 (define_insn "vfmal_low<mode>_intrinsic"
2305 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2308 (vec_select:<VFMLSEL>
2309 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2310 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2312 (vec_select:<VFMLSEL>
2313 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2314 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2315 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2317 "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2318 [(set_attr "type" "neon_fp_mla_s<q>")]
2321 (define_insn "vfmsl_high<mode>_intrinsic"
2322 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2326 (vec_select:<VFMLSEL>
2327 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2328 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2330 (vec_select:<VFMLSEL>
2331 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2332 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2333 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2335 "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2336 [(set_attr "type" "neon_fp_mla_s<q>")]
2339 (define_insn "vfmal_high<mode>_intrinsic"
2340 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2343 (vec_select:<VFMLSEL>
2344 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2345 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2347 (vec_select:<VFMLSEL>
2348 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2349 (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2350 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2352 "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2353 [(set_attr "type" "neon_fp_mla_s<q>")]
2356 (define_insn "vfmsl_low<mode>_intrinsic"
2357 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2361 (vec_select:<VFMLSEL>
2362 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2363 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2365 (vec_select:<VFMLSEL>
2366 (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2367 (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2368 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2370 "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2371 [(set_attr "type" "neon_fp_mla_s<q>")]
2374 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2375 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2377 [(match_operand:VCVTF 1 "s_register_operand")
2379 (match_operand:<VFML> 2 "s_register_operand")
2380 (match_operand:<VFML> 3 "s_register_operand"))
2381 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2384 rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2385 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2386 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2387 (operands[0], operands[1],
2388 operands[2], operands[3],
2393 (define_insn "vfmal_lane_low<mode>_intrinsic"
2394 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2397 (vec_select:<VFMLSEL>
2398 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2399 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2401 (vec_duplicate:<VFMLSEL>
2403 (match_operand:<VFML> 3 "s_register_operand" "x")
2404 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2405 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2408 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2409 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2411 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2412 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2416 operands[5] = GEN_INT (lane);
2417 return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2420 [(set_attr "type" "neon_fp_mla_s<q>")]
2423 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2424 [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2426 [(match_operand:VCVTF 1 "s_register_operand")
2428 (match_operand:<VFML> 2 "s_register_operand")
2429 (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2430 (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2434 = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2435 rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2436 emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2437 (operands[0], operands[1], operands[2], operands[3],
2442 ;; Used to implement the intrinsics:
2443 ;; float32x4_t vfmlalq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2444 ;; float32x2_t vfmlal_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2445 ;; Needs a bit of care to get the modes of the different sub-expressions right
2446 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2447 ;; S or D subregister to select the appropriate lane from.
2449 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2450 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2453 (vec_select:<VFMLSEL>
2454 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2455 (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2457 (vec_duplicate:<VFMLSEL>
2459 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2460 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2461 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2464 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2465 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2466 int new_lane = lane % elts_per_reg;
2467 int regdiff = lane / elts_per_reg;
2468 operands[5] = GEN_INT (new_lane);
2469 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2470 because we want the print_operand code to print the appropriate
2471 S or D register prefix. */
2472 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2473 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2474 return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2476 [(set_attr "type" "neon_fp_mla_s<q>")]
2479 ;; Used to implement the intrinsics:
2480 ;; float32x4_t vfmlalq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2481 ;; float32x2_t vfmlal_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2482 ;; Needs a bit of care to get the modes of the different sub-expressions right
2483 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2484 ;; S or D subregister to select the appropriate lane from.
2486 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2487 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2490 (vec_select:<VFMLSEL>
2491 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2492 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2494 (vec_duplicate:<VFMLSEL>
2496 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2497 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2498 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2501 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2502 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2503 int new_lane = lane % elts_per_reg;
2504 int regdiff = lane / elts_per_reg;
2505 operands[5] = GEN_INT (new_lane);
2506 /* We re-create operands[3] in the halved VFMLSEL mode
2507 because we've calculated the correct half-width subreg to extract
2508 the lane from and we want to print *that* subreg instead. */
2509 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2510 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2512 [(set_attr "type" "neon_fp_mla_s<q>")]
2515 (define_insn "vfmal_lane_high<mode>_intrinsic"
2516 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2519 (vec_select:<VFMLSEL>
2520 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2521 (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2523 (vec_duplicate:<VFMLSEL>
2525 (match_operand:<VFML> 3 "s_register_operand" "x")
2526 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2527 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2530 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2531 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2533 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2534 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2538 operands[5] = GEN_INT (lane);
2539 return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2542 [(set_attr "type" "neon_fp_mla_s<q>")]
2545 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2546 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2550 (vec_select:<VFMLSEL>
2551 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2552 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2554 (vec_duplicate:<VFMLSEL>
2556 (match_operand:<VFML> 3 "s_register_operand" "x")
2557 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2558 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2561 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2562 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2564 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2565 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2569 operands[5] = GEN_INT (lane);
2570 return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2573 [(set_attr "type" "neon_fp_mla_s<q>")]
2576 ;; Used to implement the intrinsics:
2577 ;; float32x4_t vfmlslq_lane_low_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2578 ;; float32x2_t vfmlsl_laneq_low_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2579 ;; Needs a bit of care to get the modes of the different sub-expressions right
2580 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2581 ;; S or D subregister to select the appropriate lane from.
2583 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2584 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2588 (vec_select:<VFMLSEL>
2589 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2590 (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2592 (vec_duplicate:<VFMLSEL>
2594 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2595 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2596 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2599 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2600 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2601 int new_lane = lane % elts_per_reg;
2602 int regdiff = lane / elts_per_reg;
2603 operands[5] = GEN_INT (new_lane);
2604 /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2605 because we want the print_operand code to print the appropriate
2606 S or D register prefix. */
2607 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2608 operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2609 return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2611 [(set_attr "type" "neon_fp_mla_s<q>")]
2614 ;; Used to implement the intrinsics:
2615 ;; float32x4_t vfmlslq_lane_high_u32 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2616 ;; float32x2_t vfmlsl_laneq_high_u32 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2617 ;; Needs a bit of care to get the modes of the different sub-expressions right
2618 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2619 ;; S or D subregister to select the appropriate lane from.
2621 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2622 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2626 (vec_select:<VFMLSEL>
2627 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2628 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2630 (vec_duplicate:<VFMLSEL>
2632 (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2633 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2634 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2637 int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2638 int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2639 int new_lane = lane % elts_per_reg;
2640 int regdiff = lane / elts_per_reg;
2641 operands[5] = GEN_INT (new_lane);
2642 /* We re-create operands[3] in the halved VFMLSEL mode
2643 because we've calculated the correct half-width subreg to extract
2644 the lane from and we want to print *that* subreg instead. */
2645 operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2646 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2648 [(set_attr "type" "neon_fp_mla_s<q>")]
2651 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2652 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2656 (vec_select:<VFMLSEL>
2657 (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2658 (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2660 (vec_duplicate:<VFMLSEL>
2662 (match_operand:<VFML> 3 "s_register_operand" "x")
2663 (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2664 (match_operand:VCVTF 1 "s_register_operand" "0")))]
2667 int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2668 if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2670 operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2671 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2675 operands[5] = GEN_INT (lane);
2676 return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2679 [(set_attr "type" "neon_fp_mla_s<q>")]
2682 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2684 (define_insn "neon_vmla<mode>_unspec"
2685 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2686 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2687 (match_operand:VDQW 2 "s_register_operand" "w")
2688 (match_operand:VDQW 3 "s_register_operand" "w")]
2691 "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2693 (if_then_else (match_test "<Is_float_mode>")
2694 (const_string "neon_fp_mla_s<q>")
2695 (const_string "neon_mla_<V_elem_ch><q>")))]
2698 (define_insn "neon_vmlal<sup><mode>"
2699 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2700 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2701 (match_operand:VW 2 "s_register_operand" "w")
2702 (match_operand:VW 3 "s_register_operand" "w")]
2705 "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2706 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2709 (define_expand "neon_vmls<mode>"
2710 [(match_operand:VDQW 0 "s_register_operand" "=w")
2711 (match_operand:VDQW 1 "s_register_operand" "0")
2712 (match_operand:VDQW 2 "s_register_operand" "w")
2713 (match_operand:VDQW 3 "s_register_operand" "w")]
2716 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2717 emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2718 operands[1], operands[2], operands[3]));
2720 emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2721 operands[2], operands[3]));
2725 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2727 (define_insn "neon_vmls<mode>_unspec"
2728 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2729 (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2730 (match_operand:VDQW 2 "s_register_operand" "w")
2731 (match_operand:VDQW 3 "s_register_operand" "w")]
2734 "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2736 (if_then_else (match_test "<Is_float_mode>")
2737 (const_string "neon_fp_mla_s<q>")
2738 (const_string "neon_mla_<V_elem_ch><q>")))]
2741 (define_insn "neon_vmlsl<sup><mode>"
2742 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2743 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2744 (match_operand:VW 2 "s_register_operand" "w")
2745 (match_operand:VW 3 "s_register_operand" "w")]
2748 "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2749 [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2752 ;; vqdmulh, vqrdmulh
2753 (define_insn "neon_vq<r>dmulh<mode>"
2754 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2755 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2756 (match_operand:VMDQI 2 "s_register_operand" "w")]
2759 "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2760 [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2763 ;; vqrdmlah, vqrdmlsh
2764 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2765 [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2766 (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2767 (match_operand:VMDQI 2 "s_register_operand" "w")
2768 (match_operand:VMDQI 3 "s_register_operand" "w")]
2771 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2772 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2775 (define_insn "neon_vqdmlal<mode>"
2776 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2777 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2778 (match_operand:VMDI 2 "s_register_operand" "w")
2779 (match_operand:VMDI 3 "s_register_operand" "w")]
2782 "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2783 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2786 (define_insn "neon_vqdmlsl<mode>"
2787 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2788 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2789 (match_operand:VMDI 2 "s_register_operand" "w")
2790 (match_operand:VMDI 3 "s_register_operand" "w")]
2793 "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2794 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2797 (define_insn "neon_vmull<sup><mode>"
2798 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2799 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2800 (match_operand:VW 2 "s_register_operand" "w")]
2803 "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2804 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2807 (define_insn "neon_vqdmull<mode>"
2808 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2809 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2810 (match_operand:VMDI 2 "s_register_operand" "w")]
2813 "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2814 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2817 (define_expand "neon_vsub<mode>"
2818 [(match_operand:VCVTF 0 "s_register_operand" "=w")
2819 (match_operand:VCVTF 1 "s_register_operand" "w")
2820 (match_operand:VCVTF 2 "s_register_operand" "w")]
2823 if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2824 emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2826 emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2831 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2833 (define_insn "neon_vsub<mode>_unspec"
2834 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2835 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2836 (match_operand:VCVTF 2 "s_register_operand" "w")]
2839 "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2841 (if_then_else (match_test "<Is_float_mode>")
2842 (const_string "neon_fp_addsub_s<q>")
2843 (const_string "neon_sub<q>")))]
2846 (define_insn "neon_vsubl<sup><mode>"
2847 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2848 (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2849 (match_operand:VDI 2 "s_register_operand" "w")]
2852 "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2853 [(set_attr "type" "neon_sub_long")]
2856 (define_insn "neon_vsubw<sup><mode>"
2857 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2858 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2859 (match_operand:VDI 2 "s_register_operand" "w")]
2862 "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2863 [(set_attr "type" "neon_sub_widen")]
2866 (define_insn "neon_vqsub<sup><mode>"
2867 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2868 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2869 (match_operand:VDQIX 2 "s_register_operand" "w")]
2872 "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2873 [(set_attr "type" "neon_qsub<q>")]
2876 (define_insn "neon_vhsub<sup><mode>"
2877 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2878 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2879 (match_operand:VDQIW 2 "s_register_operand" "w")]
2882 "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2883 [(set_attr "type" "neon_sub_halve<q>")]
2886 (define_insn "neon_v<r>subhn<mode>"
2887 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2888 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2889 (match_operand:VN 2 "s_register_operand" "w")]
2892 "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2893 [(set_attr "type" "neon_sub_halve_narrow_q")]
2896 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2897 ;; without unsafe math optimizations.
2898 (define_expand "neon_vc<cmp_op><mode>"
2899 [(match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2901 (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand" "w,w")
2902 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz")))]
2905 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2907 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2908 && !flag_unsafe_math_optimizations)
2910 /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2911 we define gen_neon_vceq<mode>_insn_unspec only for float modes
2912 whereas this expander iterates over the integer modes as well,
2913 but we will never expand to UNSPECs for the integer comparisons. */
2917 emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2922 emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2931 emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2938 (define_insn "neon_vc<cmp_op><mode>_insn"
2939 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2941 (COMPARISONS:<V_cmp_result>
2942 (match_operand:VDQW 1 "s_register_operand" "w,w")
2943 (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2944 "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2945 && !flag_unsafe_math_optimizations)"
2948 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2950 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2951 ? "f" : "<cmp_type>",
2952 which_alternative == 0
2953 ? "%<V_reg>2" : "#0");
2954 output_asm_insn (pattern, operands);
2958 (if_then_else (match_operand 2 "zero_operand")
2959 (const_string "neon_compare_zero<q>")
2960 (const_string "neon_compare<q>")))]
2963 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2964 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2965 (unspec:<V_cmp_result>
2966 [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2967 (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2972 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2974 which_alternative == 0
2975 ? "%<V_reg>2" : "#0");
2976 output_asm_insn (pattern, operands);
2979 [(set_attr "type" "neon_fp_compare_s<q>")]
2982 (define_expand "neon_vc<cmp_op><mode>"
2983 [(match_operand:<V_cmp_result> 0 "s_register_operand")
2986 (match_operand:VH 1 "s_register_operand")
2987 (match_operand:VH 2 "reg_or_zero_operand")))]
2988 "TARGET_NEON_FP16INST"
2990 /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2992 if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2993 && !flag_unsafe_math_optimizations)
2995 (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2996 (operands[0], operands[1], operands[2]));
2999 (gen_neon_vc<cmp_op><mode>_fp16insn
3000 (operands[0], operands[1], operands[2]));
3004 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
3005 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3007 (COMPARISONS:<V_cmp_result>
3008 (match_operand:VH 1 "s_register_operand" "w,w")
3009 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
3010 "TARGET_NEON_FP16INST
3011 && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3012 && !flag_unsafe_math_optimizations)"
3015 sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
3017 GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
3018 ? "f" : "<cmp_type>",
3019 which_alternative == 0
3020 ? "%<V_reg>2" : "#0");
3021 output_asm_insn (pattern, operands);
3025 (if_then_else (match_operand 2 "zero_operand")
3026 (const_string "neon_compare_zero<q>")
3027 (const_string "neon_compare<q>")))])
3029 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
3031 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
3032 (unspec:<V_cmp_result>
3033 [(match_operand:VH 1 "s_register_operand" "w,w")
3034 (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
3036 "TARGET_NEON_FP16INST"
3039 sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
3041 which_alternative == 0
3042 ? "%<V_reg>2" : "#0");
3043 output_asm_insn (pattern, operands);
3046 [(set_attr "type" "neon_fp_compare_s<q>")])
3048 (define_insn "neon_vc<cmp_op>u<mode>"
3049 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3051 (GTUGEU:<V_cmp_result>
3052 (match_operand:VDQIW 1 "s_register_operand" "w")
3053 (match_operand:VDQIW 2 "s_register_operand" "w"))))]
3055 "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3056 [(set_attr "type" "neon_compare<q>")]
3059 (define_expand "neon_vca<cmp_op><mode>"
3060 [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
3062 (GTGE:<V_cmp_result>
3063 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
3064 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
3067 if (flag_unsafe_math_optimizations)
3068 emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
3071 emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
3078 (define_insn "neon_vca<cmp_op><mode>_insn"
3079 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3081 (GTGE:<V_cmp_result>
3082 (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
3083 (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
3084 "TARGET_NEON && flag_unsafe_math_optimizations"
3085 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3086 [(set_attr "type" "neon_fp_compare_s<q>")]
3089 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
3090 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3091 (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
3092 (match_operand:VCVTF 2 "s_register_operand" "w")]
3095 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3096 [(set_attr "type" "neon_fp_compare_s<q>")]
3099 (define_expand "neon_vca<cmp_op><mode>"
3101 (match_operand:<V_cmp_result> 0 "s_register_operand")
3103 (GLTE:<V_cmp_result>
3104 (abs:VH (match_operand:VH 1 "s_register_operand"))
3105 (abs:VH (match_operand:VH 2 "s_register_operand")))))]
3106 "TARGET_NEON_FP16INST"
3108 if (flag_unsafe_math_optimizations)
3109 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
3110 (operands[0], operands[1], operands[2]));
3112 emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
3113 (operands[0], operands[1], operands[2]));
3117 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
3119 (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3121 (GLTE:<V_cmp_result>
3122 (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
3123 (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
3124 "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
3125 "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3126 [(set_attr "type" "neon_fp_compare_s<q>")]
3129 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
3130 [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
3131 (unspec:<V_cmp_result>
3132 [(match_operand:VH 1 "s_register_operand" "w")
3133 (match_operand:VH 2 "s_register_operand" "w")]
3136 "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3137 [(set_attr "type" "neon_fp_compare_s<q>")]
3140 (define_expand "neon_vc<cmp_op>z<mode>"
3142 (match_operand:<V_cmp_result> 0 "s_register_operand")
3143 (COMPARISONS:<V_cmp_result>
3144 (match_operand:VH 1 "s_register_operand")
3146 "TARGET_NEON_FP16INST"
3148 emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
3149 CONST0_RTX (<MODE>mode)));
3153 (define_insn "neon_vtst<mode>"
3154 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3155 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3156 (match_operand:VDQIW 2 "s_register_operand" "w")]
3159 "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3160 [(set_attr "type" "neon_tst<q>")]
3163 (define_insn "neon_vabd<sup><mode>"
3164 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3165 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3166 (match_operand:VDQIW 2 "s_register_operand" "w")]
3169 "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3170 [(set_attr "type" "neon_abd<q>")]
3173 (define_insn "neon_vabd<mode>"
3174 [(set (match_operand:VH 0 "s_register_operand" "=w")
3175 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3176 (match_operand:VH 2 "s_register_operand" "w")]
3178 "TARGET_NEON_FP16INST"
3179 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3180 [(set_attr "type" "neon_abd<q>")]
3183 (define_insn "neon_vabdf<mode>"
3184 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3185 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3186 (match_operand:VCVTF 2 "s_register_operand" "w")]
3189 "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3190 [(set_attr "type" "neon_fp_abd_s<q>")]
3193 (define_insn "neon_vabdl<sup><mode>"
3194 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3195 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3196 (match_operand:VW 2 "s_register_operand" "w")]
3199 "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3200 [(set_attr "type" "neon_abd_long")]
3203 (define_insn "neon_vaba<sup><mode>"
3204 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3205 (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3206 (match_operand:VDQIW 3 "s_register_operand" "w")]
3208 (match_operand:VDQIW 1 "s_register_operand" "0")))]
3210 "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3211 [(set_attr "type" "neon_arith_acc<q>")]
3214 (define_insn "neon_vabal<sup><mode>"
3215 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3216 (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3217 (match_operand:VW 3 "s_register_operand" "w")]
3219 (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3221 "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3222 [(set_attr "type" "neon_arith_acc<q>")]
3225 (define_insn "neon_v<maxmin><sup><mode>"
3226 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3227 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3228 (match_operand:VDQIW 2 "s_register_operand" "w")]
3231 "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3232 [(set_attr "type" "neon_minmax<q>")]
3235 (define_insn "neon_v<maxmin>f<mode>"
3236 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3237 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3238 (match_operand:VCVTF 2 "s_register_operand" "w")]
3241 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3242 [(set_attr "type" "neon_fp_minmax_s<q>")]
3245 (define_insn "neon_v<maxmin>f<mode>"
3246 [(set (match_operand:VH 0 "s_register_operand" "=w")
3248 [(match_operand:VH 1 "s_register_operand" "w")
3249 (match_operand:VH 2 "s_register_operand" "w")]
3251 "TARGET_NEON_FP16INST"
3252 "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3253 [(set_attr "type" "neon_fp_minmax_s<q>")]
3256 (define_insn "neon_vp<maxmin>fv4hf"
3257 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3259 [(match_operand:V4HF 1 "s_register_operand" "w")
3260 (match_operand:V4HF 2 "s_register_operand" "w")]
3262 "TARGET_NEON_FP16INST"
3263 "vp<maxmin>.f16\t%P0, %P1, %P2"
3264 [(set_attr "type" "neon_reduc_minmax")]
3267 (define_insn "neon_<fmaxmin_op><mode>"
3269 (match_operand:VH 0 "s_register_operand" "=w")
3271 [(match_operand:VH 1 "s_register_operand" "w")
3272 (match_operand:VH 2 "s_register_operand" "w")]
3274 "TARGET_NEON_FP16INST"
3275 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3276 [(set_attr "type" "neon_fp_minmax_s<q>")]
3279 ;; v<maxmin>nm intrinsics.
3280 (define_insn "neon_<fmaxmin_op><mode>"
3281 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3282 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3283 (match_operand:VCVTF 2 "s_register_operand" "w")]
3285 "TARGET_NEON && TARGET_VFP5"
3286 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3287 [(set_attr "type" "neon_fp_minmax_s<q>")]
3290 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
3291 (define_insn "<fmaxmin><mode>3"
3292 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3293 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3294 (match_operand:VCVTF 2 "s_register_operand" "w")]
3296 "TARGET_NEON && TARGET_VFP5"
3297 "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3298 [(set_attr "type" "neon_fp_minmax_s<q>")]
3301 (define_expand "neon_vpadd<mode>"
3302 [(match_operand:VD 0 "s_register_operand" "=w")
3303 (match_operand:VD 1 "s_register_operand" "w")
3304 (match_operand:VD 2 "s_register_operand" "w")]
3307 emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3312 (define_insn "neon_vpaddl<sup><mode>"
3313 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3314 (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3317 "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3318 [(set_attr "type" "neon_reduc_add_long")]
3321 (define_insn "neon_vpadal<sup><mode>"
3322 [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3323 (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3324 (match_operand:VDQIW 2 "s_register_operand" "w")]
3327 "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3328 [(set_attr "type" "neon_reduc_add_acc")]
3331 (define_insn "neon_vp<maxmin><sup><mode>"
3332 [(set (match_operand:VDI 0 "s_register_operand" "=w")
3333 (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3334 (match_operand:VDI 2 "s_register_operand" "w")]
3337 "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3338 [(set_attr "type" "neon_reduc_minmax<q>")]
3341 (define_insn "neon_vp<maxmin>f<mode>"
3342 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3343 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3344 (match_operand:VCVTF 2 "s_register_operand" "w")]
3347 "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3348 [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3351 (define_insn "neon_vrecps<mode>"
3352 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3353 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3354 (match_operand:VCVTF 2 "s_register_operand" "w")]
3357 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3358 [(set_attr "type" "neon_fp_recps_s<q>")]
3361 (define_insn "neon_vrecps<mode>"
3363 (match_operand:VH 0 "s_register_operand" "=w")
3364 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3365 (match_operand:VH 2 "s_register_operand" "w")]
3367 "TARGET_NEON_FP16INST"
3368 "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3369 [(set_attr "type" "neon_fp_recps_s<q>")]
3372 (define_insn "neon_vrsqrts<mode>"
3373 [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3374 (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3375 (match_operand:VCVTF 2 "s_register_operand" "w")]
3378 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3379 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3382 (define_insn "neon_vrsqrts<mode>"
3384 (match_operand:VH 0 "s_register_operand" "=w")
3385 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3386 (match_operand:VH 2 "s_register_operand" "w")]
3388 "TARGET_NEON_FP16INST"
3389 "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3390 [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3393 (define_expand "neon_vabs<mode>"
3394 [(match_operand:VDQW 0 "s_register_operand" "")
3395 (match_operand:VDQW 1 "s_register_operand" "")]
3398 emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3402 (define_insn "neon_vqabs<mode>"
3403 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3404 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3407 "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3408 [(set_attr "type" "neon_qabs<q>")]
3411 (define_insn "neon_bswap<mode>"
3412 [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3413 (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3415 "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3416 [(set_attr "type" "neon_rev<q>")]
3419 (define_expand "neon_vneg<mode>"
3420 [(match_operand:VDQW 0 "s_register_operand" "")
3421 (match_operand:VDQW 1 "s_register_operand" "")]
3424 emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3428 ;; These instructions map to the __builtins for the Dot Product operations.
3429 (define_insn "neon_<sup>dot<vsi2qi>"
3430 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3431 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3432 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3433 "register_operand" "w")
3434 (match_operand:<VSI2QI> 3
3435 "register_operand" "w")]
3438 "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3439 [(set_attr "type" "neon_dot")]
3442 ;; These instructions map to the __builtins for the Dot Product
3443 ;; indexed operations.
3444 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3445 [(set (match_operand:VCVTI 0 "register_operand" "=w")
3446 (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3447 (unspec:VCVTI [(match_operand:<VSI2QI> 2
3448 "register_operand" "w")
3449 (match_operand:V8QI 3 "register_operand" "t")
3450 (match_operand:SI 4 "immediate_operand" "i")]
3455 = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3456 return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3458 [(set_attr "type" "neon_dot")]
3461 ;; These expands map to the Dot Product optab the vectorizer checks for.
3462 ;; The auto-vectorizer expects a dot product builtin that also does an
3463 ;; accumulation into the provided register.
3464 ;; Given the following pattern
3466 ;; for (i=0; i<len; i++) {
3472 ;; This can be auto-vectorized to
3473 ;; r = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3475 ;; given enough iterations. However the vectorizer can keep unrolling the loop
3476 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3477 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3480 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3481 (define_expand "<sup>dot_prod<vsi2qi>"
3482 [(set (match_operand:VCVTI 0 "register_operand")
3483 (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3485 (match_operand:<VSI2QI> 2
3486 "register_operand")]
3488 (match_operand:VCVTI 3 "register_operand")))]
3492 gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3494 emit_insn (gen_rtx_SET (operands[0], operands[3]));
3498 (define_expand "neon_copysignf<mode>"
3499 [(match_operand:VCVTF 0 "register_operand")
3500 (match_operand:VCVTF 1 "register_operand")
3501 (match_operand:VCVTF 2 "register_operand")]
3505 rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3506 rtx c = GEN_INT (0x80000000);
3508 emit_move_insn (v_bitmask,
3509 gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3510 emit_move_insn (operands[0], operands[2]);
3511 v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3512 <VCVTF:V_cmp_result>mode, 0);
3513 emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3520 (define_insn "neon_vqneg<mode>"
3521 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3522 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3525 "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3526 [(set_attr "type" "neon_qneg<q>")]
3529 (define_insn "neon_vcls<mode>"
3530 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3531 (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3534 "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3535 [(set_attr "type" "neon_cls<q>")]
3538 (define_insn "clz<mode>2"
3539 [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3540 (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3542 "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3543 [(set_attr "type" "neon_cnt<q>")]
3546 (define_expand "neon_vclz<mode>"
3547 [(match_operand:VDQIW 0 "s_register_operand" "")
3548 (match_operand:VDQIW 1 "s_register_operand" "")]
3551 emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3555 (define_insn "popcount<mode>2"
3556 [(set (match_operand:VE 0 "s_register_operand" "=w")
3557 (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3559 "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3560 [(set_attr "type" "neon_cnt<q>")]
3563 (define_expand "neon_vcnt<mode>"
3564 [(match_operand:VE 0 "s_register_operand" "=w")
3565 (match_operand:VE 1 "s_register_operand" "w")]
3568 emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3572 (define_insn "neon_vrecpe<mode>"
3573 [(set (match_operand:VH 0 "s_register_operand" "=w")
3574 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3576 "TARGET_NEON_FP16INST"
3577 "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3578 [(set_attr "type" "neon_fp_recpe_s<q>")]
3581 (define_insn "neon_vrecpe<mode>"
3582 [(set (match_operand:V32 0 "s_register_operand" "=w")
3583 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3586 "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3587 [(set_attr "type" "neon_fp_recpe_s<q>")]
3590 (define_insn "neon_vrsqrte<mode>"
3591 [(set (match_operand:V32 0 "s_register_operand" "=w")
3592 (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3595 "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3596 [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3599 (define_expand "neon_vmvn<mode>"
3600 [(match_operand:VDQIW 0 "s_register_operand" "")
3601 (match_operand:VDQIW 1 "s_register_operand" "")]
3604 emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3608 (define_insn "neon_vget_lane<mode>_sext_internal"
3609 [(set (match_operand:SI 0 "s_register_operand" "=r")
3611 (vec_select:<V_elem>
3612 (match_operand:VD 1 "s_register_operand" "w")
3613 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3616 if (BYTES_BIG_ENDIAN)
3618 int elt = INTVAL (operands[2]);
3619 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3620 operands[2] = GEN_INT (elt);
3622 return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3624 [(set_attr "type" "neon_to_gp")]
3627 (define_insn "neon_vget_lane<mode>_zext_internal"
3628 [(set (match_operand:SI 0 "s_register_operand" "=r")
3630 (vec_select:<V_elem>
3631 (match_operand:VD 1 "s_register_operand" "w")
3632 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3635 if (BYTES_BIG_ENDIAN)
3637 int elt = INTVAL (operands[2]);
3638 elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3639 operands[2] = GEN_INT (elt);
3641 return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3643 [(set_attr "type" "neon_to_gp")]
3646 (define_insn "neon_vget_lane<mode>_sext_internal"
3647 [(set (match_operand:SI 0 "s_register_operand" "=r")
3649 (vec_select:<V_elem>
3650 (match_operand:VQ2 1 "s_register_operand" "w")
3651 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3655 int regno = REGNO (operands[1]);
3656 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3657 unsigned int elt = INTVAL (operands[2]);
3658 unsigned int elt_adj = elt % halfelts;
3660 if (BYTES_BIG_ENDIAN)
3661 elt_adj = halfelts - 1 - elt_adj;
3663 ops[0] = operands[0];
3664 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3665 ops[2] = GEN_INT (elt_adj);
3666 output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3670 [(set_attr "type" "neon_to_gp_q")]
3673 (define_insn "neon_vget_lane<mode>_zext_internal"
3674 [(set (match_operand:SI 0 "s_register_operand" "=r")
3676 (vec_select:<V_elem>
3677 (match_operand:VQ2 1 "s_register_operand" "w")
3678 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3682 int regno = REGNO (operands[1]);
3683 unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3684 unsigned int elt = INTVAL (operands[2]);
3685 unsigned int elt_adj = elt % halfelts;
3687 if (BYTES_BIG_ENDIAN)
3688 elt_adj = halfelts - 1 - elt_adj;
3690 ops[0] = operands[0];
3691 ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3692 ops[2] = GEN_INT (elt_adj);
3693 output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3697 [(set_attr "type" "neon_to_gp_q")]
3700 (define_expand "neon_vget_lane<mode>"
3701 [(match_operand:<V_ext> 0 "s_register_operand" "")
3702 (match_operand:VDQW 1 "s_register_operand" "")
3703 (match_operand:SI 2 "immediate_operand" "")]
3706 if (BYTES_BIG_ENDIAN)
3708 /* The intrinsics are defined in terms of a model where the
3709 element ordering in memory is vldm order, whereas the generic
3710 RTL is defined in terms of a model where the element ordering
3711 in memory is array order. Convert the lane number to conform
3713 unsigned int elt = INTVAL (operands[2]);
3714 unsigned int reg_nelts
3715 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3716 elt ^= reg_nelts - 1;
3717 operands[2] = GEN_INT (elt);
3720 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3721 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3724 emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3730 (define_expand "neon_vget_laneu<mode>"
3731 [(match_operand:<V_ext> 0 "s_register_operand" "")
3732 (match_operand:VDQIW 1 "s_register_operand" "")
3733 (match_operand:SI 2 "immediate_operand" "")]
3736 if (BYTES_BIG_ENDIAN)
3738 /* The intrinsics are defined in terms of a model where the
3739 element ordering in memory is vldm order, whereas the generic
3740 RTL is defined in terms of a model where the element ordering
3741 in memory is array order. Convert the lane number to conform
3743 unsigned int elt = INTVAL (operands[2]);
3744 unsigned int reg_nelts
3745 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3746 elt ^= reg_nelts - 1;
3747 operands[2] = GEN_INT (elt);
3750 if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3751 emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3754 emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3760 (define_expand "neon_vget_lanedi"
3761 [(match_operand:DI 0 "s_register_operand" "=r")
3762 (match_operand:DI 1 "s_register_operand" "w")
3763 (match_operand:SI 2 "immediate_operand" "")]
3766 emit_move_insn (operands[0], operands[1]);
3770 (define_expand "neon_vget_lanev2di"
3771 [(match_operand:DI 0 "s_register_operand" "")
3772 (match_operand:V2DI 1 "s_register_operand" "")
3773 (match_operand:SI 2 "immediate_operand" "")]
3778 if (BYTES_BIG_ENDIAN)
3780 /* The intrinsics are defined in terms of a model where the
3781 element ordering in memory is vldm order, whereas the generic
3782 RTL is defined in terms of a model where the element ordering
3783 in memory is array order. Convert the lane number to conform
3785 unsigned int elt = INTVAL (operands[2]);
3786 unsigned int reg_nelts = 2;
3787 elt ^= reg_nelts - 1;
3788 operands[2] = GEN_INT (elt);
3791 lane = INTVAL (operands[2]);
3792 gcc_assert ((lane ==0) || (lane == 1));
3793 emit_move_insn (operands[0], lane == 0
3794 ? gen_lowpart (DImode, operands[1])
3795 : gen_highpart (DImode, operands[1]));
3799 (define_expand "neon_vset_lane<mode>"
3800 [(match_operand:VDQ 0 "s_register_operand" "=w")
3801 (match_operand:<V_elem> 1 "s_register_operand" "r")
3802 (match_operand:VDQ 2 "s_register_operand" "0")
3803 (match_operand:SI 3 "immediate_operand" "i")]
3806 unsigned int elt = INTVAL (operands[3]);
3808 if (BYTES_BIG_ENDIAN)
3810 unsigned int reg_nelts
3811 = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3812 elt ^= reg_nelts - 1;
3815 emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3816 GEN_INT (1 << elt), operands[2]));
3820 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3822 (define_expand "neon_vset_lanedi"
3823 [(match_operand:DI 0 "s_register_operand" "=w")
3824 (match_operand:DI 1 "s_register_operand" "r")
3825 (match_operand:DI 2 "s_register_operand" "0")
3826 (match_operand:SI 3 "immediate_operand" "i")]
3829 emit_move_insn (operands[0], operands[1]);
3833 (define_expand "neon_vcreate<mode>"
3834 [(match_operand:VD_RE 0 "s_register_operand" "")
3835 (match_operand:DI 1 "general_operand" "")]
3838 rtx src = gen_lowpart (<MODE>mode, operands[1]);
3839 emit_move_insn (operands[0], src);
3843 (define_insn "neon_vdup_n<mode>"
3844 [(set (match_operand:VX 0 "s_register_operand" "=w")
3845 (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3847 "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3848 [(set_attr "type" "neon_from_gp<q>")]
3851 (define_insn "neon_vdup_nv4hf"
3852 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3853 (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3856 [(set_attr "type" "neon_from_gp")]
3859 (define_insn "neon_vdup_nv8hf"
3860 [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3861 (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3864 [(set_attr "type" "neon_from_gp_q")]
3867 (define_insn "neon_vdup_n<mode>"
3868 [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3869 (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3872 vdup.<V_sz_elem>\t%<V_reg>0, %1
3873 vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3874 [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3877 (define_expand "neon_vdup_ndi"
3878 [(match_operand:DI 0 "s_register_operand" "=w")
3879 (match_operand:DI 1 "s_register_operand" "r")]
3882 emit_move_insn (operands[0], operands[1]);
3887 (define_insn "neon_vdup_nv2di"
3888 [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3889 (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3892 vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3893 vmov\t%e0, %P1\;vmov\t%f0, %P1"
3894 [(set_attr "length" "8")
3895 (set_attr "type" "multiple")]
3898 (define_insn "neon_vdup_lane<mode>_internal"
3899 [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3901 (vec_select:<V_elem>
3902 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3903 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3906 if (BYTES_BIG_ENDIAN)
3908 int elt = INTVAL (operands[2]);
3909 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3910 operands[2] = GEN_INT (elt);
3913 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3915 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3917 [(set_attr "type" "neon_dup<q>")]
3920 (define_insn "neon_vdup_lane<mode>_internal"
3921 [(set (match_operand:VH 0 "s_register_operand" "=w")
3923 (vec_select:<V_elem>
3924 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3925 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3926 "TARGET_NEON && TARGET_FP16"
3928 if (BYTES_BIG_ENDIAN)
3930 int elt = INTVAL (operands[2]);
3931 elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3932 operands[2] = GEN_INT (elt);
3935 return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3937 return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3939 [(set_attr "type" "neon_dup<q>")]
3942 (define_expand "neon_vdup_lane<mode>"
3943 [(match_operand:VDQW 0 "s_register_operand" "=w")
3944 (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3945 (match_operand:SI 2 "immediate_operand" "i")]
3948 if (BYTES_BIG_ENDIAN)
3950 unsigned int elt = INTVAL (operands[2]);
3951 unsigned int reg_nelts
3952 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3953 elt ^= reg_nelts - 1;
3954 operands[2] = GEN_INT (elt);
3956 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3961 (define_expand "neon_vdup_lane<mode>"
3962 [(match_operand:VH 0 "s_register_operand")
3963 (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3964 (match_operand:SI 2 "immediate_operand")]
3965 "TARGET_NEON && TARGET_FP16"
3967 if (BYTES_BIG_ENDIAN)
3969 unsigned int elt = INTVAL (operands[2]);
3970 unsigned int reg_nelts
3971 = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3972 elt ^= reg_nelts - 1;
3973 operands[2] = GEN_INT (elt);
3975 emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3980 ; Scalar index is ignored, since only zero is valid here.
3981 (define_expand "neon_vdup_lanedi"
3982 [(match_operand:DI 0 "s_register_operand" "=w")
3983 (match_operand:DI 1 "s_register_operand" "w")
3984 (match_operand:SI 2 "immediate_operand" "i")]
3987 emit_move_insn (operands[0], operands[1]);
3991 ; Likewise for v2di, as the DImode second operand has only a single element.
3992 (define_expand "neon_vdup_lanev2di"
3993 [(match_operand:V2DI 0 "s_register_operand" "=w")
3994 (match_operand:DI 1 "s_register_operand" "w")
3995 (match_operand:SI 2 "immediate_operand" "i")]
3998 emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
4002 ; Disabled before reload because we don't want combine doing something silly,
4003 ; but used by the post-reload expansion of neon_vcombine.
4004 (define_insn "*neon_vswp<mode>"
4005 [(set (match_operand:VDQX 0 "s_register_operand" "+w")
4006 (match_operand:VDQX 1 "s_register_operand" "+w"))
4007 (set (match_dup 1) (match_dup 0))]
4008 "TARGET_NEON && reload_completed"
4009 "vswp\t%<V_reg>0, %<V_reg>1"
4010 [(set_attr "type" "neon_permute<q>")]
4013 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
4015 ;; FIXME: A different implementation of this builtin could make it much
4016 ;; more likely that we wouldn't actually need to output anything (we could make
4017 ;; it so that the reg allocator puts things in the right places magically
4018 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
4020 (define_insn_and_split "neon_vcombine<mode>"
4021 [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
4022 (vec_concat:<V_DOUBLE>
4023 (match_operand:VDX 1 "s_register_operand" "w")
4024 (match_operand:VDX 2 "s_register_operand" "w")))]
4027 "&& reload_completed"
4030 neon_split_vcombine (operands);
4033 [(set_attr "type" "multiple")]
4036 (define_expand "neon_vget_high<mode>"
4037 [(match_operand:<V_HALF> 0 "s_register_operand")
4038 (match_operand:VQX 1 "s_register_operand")]
4041 emit_move_insn (operands[0],
4042 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
4043 GET_MODE_SIZE (<V_HALF>mode)));
4047 (define_expand "neon_vget_low<mode>"
4048 [(match_operand:<V_HALF> 0 "s_register_operand")
4049 (match_operand:VQX 1 "s_register_operand")]
4052 emit_move_insn (operands[0],
4053 simplify_gen_subreg (<V_HALF>mode, operands[1],
4058 (define_insn "float<mode><V_cvtto>2"
4059 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4060 (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4061 "TARGET_NEON && !flag_rounding_math"
4062 "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
4063 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4066 (define_insn "floatuns<mode><V_cvtto>2"
4067 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4068 (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
4069 "TARGET_NEON && !flag_rounding_math"
4070 "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
4071 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4074 (define_insn "fix_trunc<mode><V_cvtto>2"
4075 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4076 (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4078 "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
4079 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4082 (define_insn "fixuns_trunc<mode><V_cvtto>2"
4083 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4084 (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
4086 "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4087 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4090 (define_insn "neon_vcvt<sup><mode>"
4091 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4092 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4095 "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4096 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4099 (define_insn "neon_vcvt<sup><mode>"
4100 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4101 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4104 "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4105 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4108 (define_insn "neon_vcvtv4sfv4hf"
4109 [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4110 (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4112 "TARGET_NEON && TARGET_FP16"
4113 "vcvt.f32.f16\t%q0, %P1"
4114 [(set_attr "type" "neon_fp_cvt_widen_h")]
4117 (define_insn "neon_vcvtv4hfv4sf"
4118 [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4119 (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4121 "TARGET_NEON && TARGET_FP16"
4122 "vcvt.f16.f32\t%P0, %q1"
4123 [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4126 (define_insn "neon_vcvt<sup><mode>"
4128 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4130 [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4132 "TARGET_NEON_FP16INST"
4133 "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4134 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4137 (define_insn "neon_vcvt<sup><mode>"
4139 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4141 [(match_operand:VH 1 "s_register_operand" "w")]
4143 "TARGET_NEON_FP16INST"
4144 "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4145 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4148 (define_insn "neon_vcvt<sup>_n<mode>"
4149 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4150 (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4151 (match_operand:SI 2 "immediate_operand" "i")]
4155 arm_const_bounds (operands[2], 1, 33);
4156 return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4158 [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4161 (define_insn "neon_vcvt<sup>_n<mode>"
4162 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4164 [(match_operand:VH 1 "s_register_operand" "w")
4165 (match_operand:SI 2 "immediate_operand" "i")]
4167 "TARGET_NEON_FP16INST"
4169 arm_const_bounds (operands[2], 0, 17);
4170 return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4172 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4175 (define_insn "neon_vcvt<sup>_n<mode>"
4176 [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4177 (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4178 (match_operand:SI 2 "immediate_operand" "i")]
4182 arm_const_bounds (operands[2], 1, 33);
4183 return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4185 [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4188 (define_insn "neon_vcvt<sup>_n<mode>"
4189 [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4191 [(match_operand:VCVTHI 1 "s_register_operand" "w")
4192 (match_operand:SI 2 "immediate_operand" "i")]
4194 "TARGET_NEON_FP16INST"
4196 arm_const_bounds (operands[2], 0, 17);
4197 return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4199 [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4202 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
4204 (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4206 [(match_operand:VH 1 "s_register_operand" "w")]
4208 "TARGET_NEON_FP16INST"
4209 "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4210 [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4213 (define_insn "neon_vmovn<mode>"
4214 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4215 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4218 "vmovn.<V_if_elem>\t%P0, %q1"
4219 [(set_attr "type" "neon_shift_imm_narrow_q")]
4222 (define_insn "neon_vqmovn<sup><mode>"
4223 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4224 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4227 "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4228 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4231 (define_insn "neon_vqmovun<mode>"
4232 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4233 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4236 "vqmovun.<V_s_elem>\t%P0, %q1"
4237 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4240 (define_insn "neon_vmovl<sup><mode>"
4241 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4242 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4245 "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4246 [(set_attr "type" "neon_shift_imm_long")]
4249 (define_insn "neon_vmul_lane<mode>"
4250 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4251 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4252 (match_operand:VMD 2 "s_register_operand"
4253 "<scalar_mul_constraint>")
4254 (match_operand:SI 3 "immediate_operand" "i")]
4258 return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4261 (if_then_else (match_test "<Is_float_mode>")
4262 (const_string "neon_fp_mul_s_scalar<q>")
4263 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4266 (define_insn "neon_vmul_lane<mode>"
4267 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4268 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4269 (match_operand:<V_HALF> 2 "s_register_operand"
4270 "<scalar_mul_constraint>")
4271 (match_operand:SI 3 "immediate_operand" "i")]
4275 return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4278 (if_then_else (match_test "<Is_float_mode>")
4279 (const_string "neon_fp_mul_s_scalar<q>")
4280 (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4283 (define_insn "neon_vmul_lane<mode>"
4284 [(set (match_operand:VH 0 "s_register_operand" "=w")
4285 (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4286 (match_operand:V4HF 2 "s_register_operand"
4287 "<scalar_mul_constraint>")
4288 (match_operand:SI 3 "immediate_operand" "i")]
4290 "TARGET_NEON_FP16INST"
4291 "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4292 [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4295 (define_insn "neon_vmull<sup>_lane<mode>"
4296 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4297 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4298 (match_operand:VMDI 2 "s_register_operand"
4299 "<scalar_mul_constraint>")
4300 (match_operand:SI 3 "immediate_operand" "i")]
4304 return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4306 [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4309 (define_insn "neon_vqdmull_lane<mode>"
4310 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4311 (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4312 (match_operand:VMDI 2 "s_register_operand"
4313 "<scalar_mul_constraint>")
4314 (match_operand:SI 3 "immediate_operand" "i")]
4315 UNSPEC_VQDMULL_LANE))]
4318 return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4320 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4323 (define_insn "neon_vq<r>dmulh_lane<mode>"
4324 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4325 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4326 (match_operand:<V_HALF> 2 "s_register_operand"
4327 "<scalar_mul_constraint>")
4328 (match_operand:SI 3 "immediate_operand" "i")]
4332 return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4334 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4337 (define_insn "neon_vq<r>dmulh_lane<mode>"
4338 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4339 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4340 (match_operand:VMDI 2 "s_register_operand"
4341 "<scalar_mul_constraint>")
4342 (match_operand:SI 3 "immediate_operand" "i")]
4346 return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4348 [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4351 ;; vqrdmlah_lane, vqrdmlsh_lane
4352 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4353 [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4354 (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4355 (match_operand:VMQI 2 "s_register_operand" "w")
4356 (match_operand:<V_HALF> 3 "s_register_operand"
4357 "<scalar_mul_constraint>")
4358 (match_operand:SI 4 "immediate_operand" "i")]
4363 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4365 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4368 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4369 [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4370 (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4371 (match_operand:VMDI 2 "s_register_operand" "w")
4372 (match_operand:VMDI 3 "s_register_operand"
4373 "<scalar_mul_constraint>")
4374 (match_operand:SI 4 "immediate_operand" "i")]
4379 "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4381 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4384 (define_insn "neon_vmla_lane<mode>"
4385 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4386 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4387 (match_operand:VMD 2 "s_register_operand" "w")
4388 (match_operand:VMD 3 "s_register_operand"
4389 "<scalar_mul_constraint>")
4390 (match_operand:SI 4 "immediate_operand" "i")]
4394 return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4397 (if_then_else (match_test "<Is_float_mode>")
4398 (const_string "neon_fp_mla_s_scalar<q>")
4399 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4402 (define_insn "neon_vmla_lane<mode>"
4403 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4404 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4405 (match_operand:VMQ 2 "s_register_operand" "w")
4406 (match_operand:<V_HALF> 3 "s_register_operand"
4407 "<scalar_mul_constraint>")
4408 (match_operand:SI 4 "immediate_operand" "i")]
4412 return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4415 (if_then_else (match_test "<Is_float_mode>")
4416 (const_string "neon_fp_mla_s_scalar<q>")
4417 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4420 (define_insn "neon_vmlal<sup>_lane<mode>"
4421 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4422 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4423 (match_operand:VMDI 2 "s_register_operand" "w")
4424 (match_operand:VMDI 3 "s_register_operand"
4425 "<scalar_mul_constraint>")
4426 (match_operand:SI 4 "immediate_operand" "i")]
4430 return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4432 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4435 (define_insn "neon_vqdmlal_lane<mode>"
4436 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4437 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4438 (match_operand:VMDI 2 "s_register_operand" "w")
4439 (match_operand:VMDI 3 "s_register_operand"
4440 "<scalar_mul_constraint>")
4441 (match_operand:SI 4 "immediate_operand" "i")]
4442 UNSPEC_VQDMLAL_LANE))]
4445 return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4447 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4450 (define_insn "neon_vmls_lane<mode>"
4451 [(set (match_operand:VMD 0 "s_register_operand" "=w")
4452 (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4453 (match_operand:VMD 2 "s_register_operand" "w")
4454 (match_operand:VMD 3 "s_register_operand"
4455 "<scalar_mul_constraint>")
4456 (match_operand:SI 4 "immediate_operand" "i")]
4460 return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4463 (if_then_else (match_test "<Is_float_mode>")
4464 (const_string "neon_fp_mla_s_scalar<q>")
4465 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4468 (define_insn "neon_vmls_lane<mode>"
4469 [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4470 (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4471 (match_operand:VMQ 2 "s_register_operand" "w")
4472 (match_operand:<V_HALF> 3 "s_register_operand"
4473 "<scalar_mul_constraint>")
4474 (match_operand:SI 4 "immediate_operand" "i")]
4478 return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4481 (if_then_else (match_test "<Is_float_mode>")
4482 (const_string "neon_fp_mla_s_scalar<q>")
4483 (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4486 (define_insn "neon_vmlsl<sup>_lane<mode>"
4487 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4488 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4489 (match_operand:VMDI 2 "s_register_operand" "w")
4490 (match_operand:VMDI 3 "s_register_operand"
4491 "<scalar_mul_constraint>")
4492 (match_operand:SI 4 "immediate_operand" "i")]
4496 return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4498 [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4501 (define_insn "neon_vqdmlsl_lane<mode>"
4502 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4503 (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4504 (match_operand:VMDI 2 "s_register_operand" "w")
4505 (match_operand:VMDI 3 "s_register_operand"
4506 "<scalar_mul_constraint>")
4507 (match_operand:SI 4 "immediate_operand" "i")]
4508 UNSPEC_VQDMLSL_LANE))]
4511 return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4513 [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4516 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4517 ; core register into a temp register, then use a scalar taken from that. This
4518 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4519 ; or extracted from another vector. The latter case it's currently better to
4520 ; use the "_lane" variant, and the former case can probably be implemented
4521 ; using vld1_lane, but that hasn't been done yet.
4523 (define_expand "neon_vmul_n<mode>"
4524 [(match_operand:VMD 0 "s_register_operand" "")
4525 (match_operand:VMD 1 "s_register_operand" "")
4526 (match_operand:<V_elem> 2 "s_register_operand" "")]
4529 rtx tmp = gen_reg_rtx (<MODE>mode);
4530 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4531 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4536 (define_expand "neon_vmul_n<mode>"
4537 [(match_operand:VMQ 0 "s_register_operand" "")
4538 (match_operand:VMQ 1 "s_register_operand" "")
4539 (match_operand:<V_elem> 2 "s_register_operand" "")]
4542 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4543 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4544 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4549 (define_expand "neon_vmul_n<mode>"
4550 [(match_operand:VH 0 "s_register_operand")
4551 (match_operand:VH 1 "s_register_operand")
4552 (match_operand:<V_elem> 2 "s_register_operand")]
4553 "TARGET_NEON_FP16INST"
4555 rtx tmp = gen_reg_rtx (V4HFmode);
4556 emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4557 emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4562 (define_expand "neon_vmulls_n<mode>"
4563 [(match_operand:<V_widen> 0 "s_register_operand" "")
4564 (match_operand:VMDI 1 "s_register_operand" "")
4565 (match_operand:<V_elem> 2 "s_register_operand" "")]
4568 rtx tmp = gen_reg_rtx (<MODE>mode);
4569 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4570 emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4575 (define_expand "neon_vmullu_n<mode>"
4576 [(match_operand:<V_widen> 0 "s_register_operand" "")
4577 (match_operand:VMDI 1 "s_register_operand" "")
4578 (match_operand:<V_elem> 2 "s_register_operand" "")]
4581 rtx tmp = gen_reg_rtx (<MODE>mode);
4582 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4583 emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4588 (define_expand "neon_vqdmull_n<mode>"
4589 [(match_operand:<V_widen> 0 "s_register_operand" "")
4590 (match_operand:VMDI 1 "s_register_operand" "")
4591 (match_operand:<V_elem> 2 "s_register_operand" "")]
4594 rtx tmp = gen_reg_rtx (<MODE>mode);
4595 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4596 emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4601 (define_expand "neon_vqdmulh_n<mode>"
4602 [(match_operand:VMDI 0 "s_register_operand" "")
4603 (match_operand:VMDI 1 "s_register_operand" "")
4604 (match_operand:<V_elem> 2 "s_register_operand" "")]
4607 rtx tmp = gen_reg_rtx (<MODE>mode);
4608 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4609 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4614 (define_expand "neon_vqrdmulh_n<mode>"
4615 [(match_operand:VMDI 0 "s_register_operand" "")
4616 (match_operand:VMDI 1 "s_register_operand" "")
4617 (match_operand:<V_elem> 2 "s_register_operand" "")]
4620 rtx tmp = gen_reg_rtx (<MODE>mode);
4621 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4622 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4627 (define_expand "neon_vqdmulh_n<mode>"
4628 [(match_operand:VMQI 0 "s_register_operand" "")
4629 (match_operand:VMQI 1 "s_register_operand" "")
4630 (match_operand:<V_elem> 2 "s_register_operand" "")]
4633 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4634 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4635 emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4640 (define_expand "neon_vqrdmulh_n<mode>"
4641 [(match_operand:VMQI 0 "s_register_operand" "")
4642 (match_operand:VMQI 1 "s_register_operand" "")
4643 (match_operand:<V_elem> 2 "s_register_operand" "")]
4646 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4647 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4648 emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4653 (define_expand "neon_vmla_n<mode>"
4654 [(match_operand:VMD 0 "s_register_operand" "")
4655 (match_operand:VMD 1 "s_register_operand" "")
4656 (match_operand:VMD 2 "s_register_operand" "")
4657 (match_operand:<V_elem> 3 "s_register_operand" "")]
4660 rtx tmp = gen_reg_rtx (<MODE>mode);
4661 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4662 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4667 (define_expand "neon_vmla_n<mode>"
4668 [(match_operand:VMQ 0 "s_register_operand" "")
4669 (match_operand:VMQ 1 "s_register_operand" "")
4670 (match_operand:VMQ 2 "s_register_operand" "")
4671 (match_operand:<V_elem> 3 "s_register_operand" "")]
4674 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4675 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4676 emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4681 (define_expand "neon_vmlals_n<mode>"
4682 [(match_operand:<V_widen> 0 "s_register_operand" "")
4683 (match_operand:<V_widen> 1 "s_register_operand" "")
4684 (match_operand:VMDI 2 "s_register_operand" "")
4685 (match_operand:<V_elem> 3 "s_register_operand" "")]
4688 rtx tmp = gen_reg_rtx (<MODE>mode);
4689 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4690 emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4695 (define_expand "neon_vmlalu_n<mode>"
4696 [(match_operand:<V_widen> 0 "s_register_operand" "")
4697 (match_operand:<V_widen> 1 "s_register_operand" "")
4698 (match_operand:VMDI 2 "s_register_operand" "")
4699 (match_operand:<V_elem> 3 "s_register_operand" "")]
4702 rtx tmp = gen_reg_rtx (<MODE>mode);
4703 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4704 emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4709 (define_expand "neon_vqdmlal_n<mode>"
4710 [(match_operand:<V_widen> 0 "s_register_operand" "")
4711 (match_operand:<V_widen> 1 "s_register_operand" "")
4712 (match_operand:VMDI 2 "s_register_operand" "")
4713 (match_operand:<V_elem> 3 "s_register_operand" "")]
4716 rtx tmp = gen_reg_rtx (<MODE>mode);
4717 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4718 emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4723 (define_expand "neon_vmls_n<mode>"
4724 [(match_operand:VMD 0 "s_register_operand" "")
4725 (match_operand:VMD 1 "s_register_operand" "")
4726 (match_operand:VMD 2 "s_register_operand" "")
4727 (match_operand:<V_elem> 3 "s_register_operand" "")]
4730 rtx tmp = gen_reg_rtx (<MODE>mode);
4731 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4732 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4737 (define_expand "neon_vmls_n<mode>"
4738 [(match_operand:VMQ 0 "s_register_operand" "")
4739 (match_operand:VMQ 1 "s_register_operand" "")
4740 (match_operand:VMQ 2 "s_register_operand" "")
4741 (match_operand:<V_elem> 3 "s_register_operand" "")]
4744 rtx tmp = gen_reg_rtx (<V_HALF>mode);
4745 emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4746 emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4751 (define_expand "neon_vmlsls_n<mode>"
4752 [(match_operand:<V_widen> 0 "s_register_operand" "")
4753 (match_operand:<V_widen> 1 "s_register_operand" "")
4754 (match_operand:VMDI 2 "s_register_operand" "")
4755 (match_operand:<V_elem> 3 "s_register_operand" "")]
4758 rtx tmp = gen_reg_rtx (<MODE>mode);
4759 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4760 emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4765 (define_expand "neon_vmlslu_n<mode>"
4766 [(match_operand:<V_widen> 0 "s_register_operand" "")
4767 (match_operand:<V_widen> 1 "s_register_operand" "")
4768 (match_operand:VMDI 2 "s_register_operand" "")
4769 (match_operand:<V_elem> 3 "s_register_operand" "")]
4772 rtx tmp = gen_reg_rtx (<MODE>mode);
4773 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4774 emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4779 (define_expand "neon_vqdmlsl_n<mode>"
4780 [(match_operand:<V_widen> 0 "s_register_operand" "")
4781 (match_operand:<V_widen> 1 "s_register_operand" "")
4782 (match_operand:VMDI 2 "s_register_operand" "")
4783 (match_operand:<V_elem> 3 "s_register_operand" "")]
4786 rtx tmp = gen_reg_rtx (<MODE>mode);
4787 emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4788 emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4793 (define_insn "neon_vext<mode>"
4794 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4795 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4796 (match_operand:VDQX 2 "s_register_operand" "w")
4797 (match_operand:SI 3 "immediate_operand" "i")]
4801 arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4802 return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4804 [(set_attr "type" "neon_ext<q>")]
4807 (define_insn "neon_vrev64<mode>"
4808 [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4809 (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4812 "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4813 [(set_attr "type" "neon_rev<q>")]
4816 (define_insn "neon_vrev32<mode>"
4817 [(set (match_operand:VX 0 "s_register_operand" "=w")
4818 (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4821 "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4822 [(set_attr "type" "neon_rev<q>")]
4825 (define_insn "neon_vrev16<mode>"
4826 [(set (match_operand:VE 0 "s_register_operand" "=w")
4827 (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4830 "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4831 [(set_attr "type" "neon_rev<q>")]
4834 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4835 ; allocation. For an intrinsic of form:
4836 ; rD = vbsl_* (rS, rN, rM)
4837 ; We can use any of:
4838 ; vbsl rS, rN, rM (if D = S)
4839 ; vbit rD, rN, rS (if D = M, so 1-bits in rS choose bits from rN, else rM)
4840 ; vbif rD, rM, rS (if D = N, so 0-bits in rS choose bits from rM, else rN)
4842 (define_insn "neon_vbsl<mode>_internal"
4843 [(set (match_operand:VDQX 0 "s_register_operand" "=w,w,w")
4844 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4845 (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4846 (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4850 vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4851 vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4852 vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4853 [(set_attr "type" "neon_bsl<q>")]
4856 (define_expand "neon_vbsl<mode>"
4857 [(set (match_operand:VDQX 0 "s_register_operand" "")
4858 (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand" "")
4859 (match_operand:VDQX 2 "s_register_operand" "")
4860 (match_operand:VDQX 3 "s_register_operand" "")]
4864 /* We can't alias operands together if they have different modes. */
4865 operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4869 (define_insn "neon_v<shift_op><sup><mode>"
4870 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4871 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4872 (match_operand:VDQIX 2 "s_register_operand" "w")]
4875 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4876 [(set_attr "type" "neon_shift_imm<q>")]
4880 (define_insn "neon_v<shift_op><sup><mode>"
4881 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4882 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4883 (match_operand:VDQIX 2 "s_register_operand" "w")]
4886 "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4887 [(set_attr "type" "neon_sat_shift_imm<q>")]
4891 (define_insn "neon_v<shift_op><sup>_n<mode>"
4892 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4893 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4894 (match_operand:SI 2 "immediate_operand" "i")]
4898 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4899 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4901 [(set_attr "type" "neon_shift_imm<q>")]
4904 ;; vshrn_n, vrshrn_n
4905 (define_insn "neon_v<shift_op>_n<mode>"
4906 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4907 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4908 (match_operand:SI 2 "immediate_operand" "i")]
4912 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4913 return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4915 [(set_attr "type" "neon_shift_imm_narrow_q")]
4918 ;; vqshrn_n, vqrshrn_n
4919 (define_insn "neon_v<shift_op><sup>_n<mode>"
4920 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4921 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4922 (match_operand:SI 2 "immediate_operand" "i")]
4926 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4927 return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4929 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4932 ;; vqshrun_n, vqrshrun_n
4933 (define_insn "neon_v<shift_op>_n<mode>"
4934 [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4935 (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4936 (match_operand:SI 2 "immediate_operand" "i")]
4940 arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4941 return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4943 [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4946 (define_insn "neon_vshl_n<mode>"
4947 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4948 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4949 (match_operand:SI 2 "immediate_operand" "i")]
4953 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4954 return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4956 [(set_attr "type" "neon_shift_imm<q>")]
4959 (define_insn "neon_vqshl_<sup>_n<mode>"
4960 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4961 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4962 (match_operand:SI 2 "immediate_operand" "i")]
4966 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4967 return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4969 [(set_attr "type" "neon_sat_shift_imm<q>")]
4972 (define_insn "neon_vqshlu_n<mode>"
4973 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4974 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4975 (match_operand:SI 2 "immediate_operand" "i")]
4979 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4980 return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4982 [(set_attr "type" "neon_sat_shift_imm<q>")]
4985 (define_insn "neon_vshll<sup>_n<mode>"
4986 [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4987 (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4988 (match_operand:SI 2 "immediate_operand" "i")]
4992 /* The boundaries are: 0 < imm <= size. */
4993 arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4994 return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4996 [(set_attr "type" "neon_shift_imm_long")]
5000 (define_insn "neon_v<shift_op><sup>_n<mode>"
5001 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5002 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5003 (match_operand:VDQIX 2 "s_register_operand" "w")
5004 (match_operand:SI 3 "immediate_operand" "i")]
5008 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5009 return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5011 [(set_attr "type" "neon_shift_acc<q>")]
5014 (define_insn "neon_vsri_n<mode>"
5015 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5016 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5017 (match_operand:VDQIX 2 "s_register_operand" "w")
5018 (match_operand:SI 3 "immediate_operand" "i")]
5022 arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
5023 return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5025 [(set_attr "type" "neon_shift_reg<q>")]
5028 (define_insn "neon_vsli_n<mode>"
5029 [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
5030 (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
5031 (match_operand:VDQIX 2 "s_register_operand" "w")
5032 (match_operand:SI 3 "immediate_operand" "i")]
5036 arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
5037 return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
5039 [(set_attr "type" "neon_shift_reg<q>")]
5042 (define_insn "neon_vtbl1v8qi"
5043 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5044 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
5045 (match_operand:V8QI 2 "s_register_operand" "w")]
5048 "vtbl.8\t%P0, {%P1}, %P2"
5049 [(set_attr "type" "neon_tbl1")]
5052 (define_insn "neon_vtbl2v8qi"
5053 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5054 (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
5055 (match_operand:V8QI 2 "s_register_operand" "w")]
5060 int tabbase = REGNO (operands[1]);
5062 ops[0] = operands[0];
5063 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5064 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5065 ops[3] = operands[2];
5066 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
5070 [(set_attr "type" "neon_tbl2")]
5073 (define_insn "neon_vtbl3v8qi"
5074 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5075 (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
5076 (match_operand:V8QI 2 "s_register_operand" "w")]
5081 int tabbase = REGNO (operands[1]);
5083 ops[0] = operands[0];
5084 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5085 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5086 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5087 ops[4] = operands[2];
5088 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5092 [(set_attr "type" "neon_tbl3")]
5095 (define_insn "neon_vtbl4v8qi"
5096 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5097 (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5098 (match_operand:V8QI 2 "s_register_operand" "w")]
5103 int tabbase = REGNO (operands[1]);
5105 ops[0] = operands[0];
5106 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5107 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5108 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5109 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5110 ops[5] = operands[2];
5111 output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5115 [(set_attr "type" "neon_tbl4")]
5118 ;; These three are used by the vec_perm infrastructure for V16QImode.
5119 (define_insn_and_split "neon_vtbl1v16qi"
5120 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5121 (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5122 (match_operand:V16QI 2 "s_register_operand" "w")]
5126 "&& reload_completed"
5129 rtx op0, op1, op2, part0, part2;
5133 op1 = gen_lowpart (TImode, operands[1]);
5136 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5137 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5138 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5139 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5141 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5142 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5143 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5144 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5147 [(set_attr "type" "multiple")]
5150 (define_insn_and_split "neon_vtbl2v16qi"
5151 [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5152 (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5153 (match_operand:V16QI 2 "s_register_operand" "w")]
5157 "&& reload_completed"
5160 rtx op0, op1, op2, part0, part2;
5167 ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5168 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5169 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5170 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5172 ofs = subreg_highpart_offset (V8QImode, V16QImode);
5173 part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5174 part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5175 emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5178 [(set_attr "type" "multiple")]
5181 ;; ??? Logically we should extend the regular neon_vcombine pattern to
5182 ;; handle quad-word input modes, producing octa-word output modes. But
5183 ;; that requires us to add support for octa-word vector modes in moves.
5184 ;; That seems overkill for this one use in vec_perm.
5185 (define_insn_and_split "neon_vcombinev16qi"
5186 [(set (match_operand:OI 0 "s_register_operand" "=w")
5187 (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5188 (match_operand:V16QI 2 "s_register_operand" "w")]
5192 "&& reload_completed"
5195 neon_split_vcombine (operands);
5198 [(set_attr "type" "multiple")]
5201 (define_insn "neon_vtbx1v8qi"
5202 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5203 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5204 (match_operand:V8QI 2 "s_register_operand" "w")
5205 (match_operand:V8QI 3 "s_register_operand" "w")]
5208 "vtbx.8\t%P0, {%P2}, %P3"
5209 [(set_attr "type" "neon_tbl1")]
5212 (define_insn "neon_vtbx2v8qi"
5213 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5214 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5215 (match_operand:TI 2 "s_register_operand" "w")
5216 (match_operand:V8QI 3 "s_register_operand" "w")]
5221 int tabbase = REGNO (operands[2]);
5223 ops[0] = operands[0];
5224 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5225 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5226 ops[3] = operands[3];
5227 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5231 [(set_attr "type" "neon_tbl2")]
5234 (define_insn "neon_vtbx3v8qi"
5235 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5236 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5237 (match_operand:EI 2 "s_register_operand" "w")
5238 (match_operand:V8QI 3 "s_register_operand" "w")]
5243 int tabbase = REGNO (operands[2]);
5245 ops[0] = operands[0];
5246 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5247 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5248 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5249 ops[4] = operands[3];
5250 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5254 [(set_attr "type" "neon_tbl3")]
5257 (define_insn "neon_vtbx4v8qi"
5258 [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5259 (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5260 (match_operand:OI 2 "s_register_operand" "w")
5261 (match_operand:V8QI 3 "s_register_operand" "w")]
5266 int tabbase = REGNO (operands[2]);
5268 ops[0] = operands[0];
5269 ops[1] = gen_rtx_REG (V8QImode, tabbase);
5270 ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5271 ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5272 ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5273 ops[5] = operands[3];
5274 output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5278 [(set_attr "type" "neon_tbl4")]
5281 (define_expand "neon_vtrn<mode>_internal"
5283 [(set (match_operand:VDQWH 0 "s_register_operand")
5284 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5285 (match_operand:VDQWH 2 "s_register_operand")]
5287 (set (match_operand:VDQWH 3 "s_register_operand")
5288 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5293 ;; Note: Different operand numbering to handle tied registers correctly.
5294 (define_insn "*neon_vtrn<mode>_insn"
5295 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5296 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5297 (match_operand:VDQWH 3 "s_register_operand" "2")]
5299 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5300 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5303 "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5304 [(set_attr "type" "neon_permute<q>")]
5307 (define_expand "neon_vzip<mode>_internal"
5309 [(set (match_operand:VDQWH 0 "s_register_operand")
5310 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5311 (match_operand:VDQWH 2 "s_register_operand")]
5313 (set (match_operand:VDQWH 3 "s_register_operand")
5314 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5319 ;; Note: Different operand numbering to handle tied registers correctly.
5320 (define_insn "*neon_vzip<mode>_insn"
5321 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5322 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5323 (match_operand:VDQWH 3 "s_register_operand" "2")]
5325 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5326 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5329 "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5330 [(set_attr "type" "neon_zip<q>")]
5333 (define_expand "neon_vuzp<mode>_internal"
5335 [(set (match_operand:VDQWH 0 "s_register_operand")
5336 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5337 (match_operand:VDQWH 2 "s_register_operand")]
5339 (set (match_operand:VDQWH 3 "s_register_operand" "")
5340 (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5345 ;; Note: Different operand numbering to handle tied registers correctly.
5346 (define_insn "*neon_vuzp<mode>_insn"
5347 [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5348 (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5349 (match_operand:VDQWH 3 "s_register_operand" "2")]
5351 (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5352 (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5355 "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5356 [(set_attr "type" "neon_zip<q>")]
5359 (define_expand "vec_load_lanes<mode><mode>"
5360 [(set (match_operand:VDQX 0 "s_register_operand")
5361 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5365 (define_insn "neon_vld1<mode>"
5366 [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5367 (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5370 "vld1.<V_sz_elem>\t%h0, %A1"
5371 [(set_attr "type" "neon_load1_1reg<q>")]
5374 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5375 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5377 (define_insn "neon_vld1_lane<mode>"
5378 [(set (match_operand:VDX 0 "s_register_operand" "=w")
5379 (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5380 (match_operand:VDX 2 "s_register_operand" "0")
5381 (match_operand:SI 3 "immediate_operand" "i")]
5385 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5386 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5387 operands[3] = GEN_INT (lane);
5389 return "vld1.<V_sz_elem>\t%P0, %A1";
5391 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5393 [(set_attr "type" "neon_load1_one_lane<q>")]
5396 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5397 ;; here on big endian targets.
5398 (define_insn "neon_vld1_lane<mode>"
5399 [(set (match_operand:VQX 0 "s_register_operand" "=w")
5400 (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5401 (match_operand:VQX 2 "s_register_operand" "0")
5402 (match_operand:SI 3 "immediate_operand" "i")]
5406 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5407 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5408 operands[3] = GEN_INT (lane);
5409 int regno = REGNO (operands[0]);
5410 if (lane >= max / 2)
5414 operands[3] = GEN_INT (lane);
5416 operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5418 return "vld1.<V_sz_elem>\t%P0, %A1";
5420 return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5422 [(set_attr "type" "neon_load1_one_lane<q>")]
5425 (define_insn "neon_vld1_dup<mode>"
5426 [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5427 (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5429 "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5430 [(set_attr "type" "neon_load1_all_lanes<q>")]
5433 ;; Special case for DImode. Treat it exactly like a simple load.
5434 (define_expand "neon_vld1_dupdi"
5435 [(set (match_operand:DI 0 "s_register_operand" "")
5436 (unspec:DI [(match_operand:DI 1 "neon_struct_operand" "")]
5442 (define_insn "neon_vld1_dup<mode>"
5443 [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5444 (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5447 return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5449 [(set_attr "type" "neon_load1_all_lanes<q>")]
5452 (define_insn_and_split "neon_vld1_dupv2di"
5453 [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5454 (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5457 "&& reload_completed"
5460 rtx tmprtx = gen_lowpart (DImode, operands[0]);
5461 emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5462 emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5465 [(set_attr "length" "8")
5466 (set_attr "type" "neon_load1_all_lanes_q")]
5469 (define_expand "vec_store_lanes<mode><mode>"
5470 [(set (match_operand:VDQX 0 "neon_struct_operand")
5471 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5475 (define_insn "neon_vst1<mode>"
5476 [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5477 (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5480 "vst1.<V_sz_elem>\t%h1, %A0"
5481 [(set_attr "type" "neon_store1_1reg<q>")])
5483 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5484 ;; here on big endian targets.
5485 (define_insn "neon_vst1_lane<mode>"
5486 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5488 [(match_operand:VDX 1 "s_register_operand" "w")
5489 (match_operand:SI 2 "immediate_operand" "i")]
5493 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5494 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5495 operands[2] = GEN_INT (lane);
5497 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5499 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5501 [(set_attr "type" "neon_store1_one_lane<q>")]
5504 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5505 ;; here on big endian targets.
5506 (define_insn "neon_vst1_lane<mode>"
5507 [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5509 [(match_operand:VQX 1 "s_register_operand" "w")
5510 (match_operand:SI 2 "immediate_operand" "i")]
5514 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5515 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5516 int regno = REGNO (operands[1]);
5517 if (lane >= max / 2)
5522 operands[2] = GEN_INT (lane);
5523 operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5525 return "vst1.<V_sz_elem>\t{%P1}, %A0";
5527 return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5529 [(set_attr "type" "neon_store1_one_lane<q>")]
5532 (define_expand "vec_load_lanesti<mode>"
5533 [(set (match_operand:TI 0 "s_register_operand")
5534 (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5535 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5539 (define_insn "neon_vld2<mode>"
5540 [(set (match_operand:TI 0 "s_register_operand" "=w")
5541 (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5542 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5546 if (<V_sz_elem> == 64)
5547 return "vld1.64\t%h0, %A1";
5549 return "vld2.<V_sz_elem>\t%h0, %A1";
5552 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5553 (const_string "neon_load1_2reg<q>")
5554 (const_string "neon_load2_2reg<q>")))]
5557 (define_expand "vec_load_lanesoi<mode>"
5558 [(set (match_operand:OI 0 "s_register_operand")
5559 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5560 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5564 (define_insn "neon_vld2<mode>"
5565 [(set (match_operand:OI 0 "s_register_operand" "=w")
5566 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5567 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5570 "vld2.<V_sz_elem>\t%h0, %A1"
5571 [(set_attr "type" "neon_load2_2reg_q")])
5573 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5574 ;; here on big endian targets.
5575 (define_insn "neon_vld2_lane<mode>"
5576 [(set (match_operand:TI 0 "s_register_operand" "=w")
5577 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5578 (match_operand:TI 2 "s_register_operand" "0")
5579 (match_operand:SI 3 "immediate_operand" "i")
5580 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5584 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5585 int regno = REGNO (operands[0]);
5587 ops[0] = gen_rtx_REG (DImode, regno);
5588 ops[1] = gen_rtx_REG (DImode, regno + 2);
5589 ops[2] = operands[1];
5590 ops[3] = GEN_INT (lane);
5591 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5594 [(set_attr "type" "neon_load2_one_lane<q>")]
5597 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5598 ;; here on big endian targets.
5599 (define_insn "neon_vld2_lane<mode>"
5600 [(set (match_operand:OI 0 "s_register_operand" "=w")
5601 (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5602 (match_operand:OI 2 "s_register_operand" "0")
5603 (match_operand:SI 3 "immediate_operand" "i")
5604 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5608 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5609 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5610 int regno = REGNO (operands[0]);
5612 if (lane >= max / 2)
5617 ops[0] = gen_rtx_REG (DImode, regno);
5618 ops[1] = gen_rtx_REG (DImode, regno + 4);
5619 ops[2] = operands[1];
5620 ops[3] = GEN_INT (lane);
5621 output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5624 [(set_attr "type" "neon_load2_one_lane<q>")]
5627 (define_insn "neon_vld2_dup<mode>"
5628 [(set (match_operand:TI 0 "s_register_operand" "=w")
5629 (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5630 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5634 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5635 return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5637 return "vld1.<V_sz_elem>\t%h0, %A1";
5640 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5641 (const_string "neon_load2_all_lanes<q>")
5642 (const_string "neon_load1_1reg<q>")))]
5645 (define_expand "vec_store_lanesti<mode>"
5646 [(set (match_operand:TI 0 "neon_struct_operand")
5647 (unspec:TI [(match_operand:TI 1 "s_register_operand")
5648 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5652 (define_insn "neon_vst2<mode>"
5653 [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5654 (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5655 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5659 if (<V_sz_elem> == 64)
5660 return "vst1.64\t%h1, %A0";
5662 return "vst2.<V_sz_elem>\t%h1, %A0";
5665 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5666 (const_string "neon_store1_2reg<q>")
5667 (const_string "neon_store2_one_lane<q>")))]
5670 (define_expand "vec_store_lanesoi<mode>"
5671 [(set (match_operand:OI 0 "neon_struct_operand")
5672 (unspec:OI [(match_operand:OI 1 "s_register_operand")
5673 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5677 (define_insn "neon_vst2<mode>"
5678 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5679 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5680 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5683 "vst2.<V_sz_elem>\t%h1, %A0"
5684 [(set_attr "type" "neon_store2_4reg<q>")]
5687 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5688 ;; here on big endian targets.
5689 (define_insn "neon_vst2_lane<mode>"
5690 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5691 (unspec:<V_two_elem>
5692 [(match_operand:TI 1 "s_register_operand" "w")
5693 (match_operand:SI 2 "immediate_operand" "i")
5694 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5698 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5699 int regno = REGNO (operands[1]);
5701 ops[0] = operands[0];
5702 ops[1] = gen_rtx_REG (DImode, regno);
5703 ops[2] = gen_rtx_REG (DImode, regno + 2);
5704 ops[3] = GEN_INT (lane);
5705 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5708 [(set_attr "type" "neon_store2_one_lane<q>")]
5711 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5712 ;; here on big endian targets.
5713 (define_insn "neon_vst2_lane<mode>"
5714 [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5715 (unspec:<V_two_elem>
5716 [(match_operand:OI 1 "s_register_operand" "w")
5717 (match_operand:SI 2 "immediate_operand" "i")
5718 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5722 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5723 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5724 int regno = REGNO (operands[1]);
5726 if (lane >= max / 2)
5731 ops[0] = operands[0];
5732 ops[1] = gen_rtx_REG (DImode, regno);
5733 ops[2] = gen_rtx_REG (DImode, regno + 4);
5734 ops[3] = GEN_INT (lane);
5735 output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5738 [(set_attr "type" "neon_store2_one_lane<q>")]
5741 (define_expand "vec_load_lanesei<mode>"
5742 [(set (match_operand:EI 0 "s_register_operand")
5743 (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5744 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5748 (define_insn "neon_vld3<mode>"
5749 [(set (match_operand:EI 0 "s_register_operand" "=w")
5750 (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5751 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5755 if (<V_sz_elem> == 64)
5756 return "vld1.64\t%h0, %A1";
5758 return "vld3.<V_sz_elem>\t%h0, %A1";
5761 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5762 (const_string "neon_load1_3reg<q>")
5763 (const_string "neon_load3_3reg<q>")))]
5766 (define_expand "vec_load_lanesci<mode>"
5767 [(match_operand:CI 0 "s_register_operand")
5768 (match_operand:CI 1 "neon_struct_operand")
5769 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5772 emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5776 (define_expand "neon_vld3<mode>"
5777 [(match_operand:CI 0 "s_register_operand")
5778 (match_operand:CI 1 "neon_struct_operand")
5779 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5784 mem = adjust_address (operands[1], EImode, 0);
5785 emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5786 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5787 emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5791 (define_insn "neon_vld3qa<mode>"
5792 [(set (match_operand:CI 0 "s_register_operand" "=w")
5793 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5794 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5798 int regno = REGNO (operands[0]);
5800 ops[0] = gen_rtx_REG (DImode, regno);
5801 ops[1] = gen_rtx_REG (DImode, regno + 4);
5802 ops[2] = gen_rtx_REG (DImode, regno + 8);
5803 ops[3] = operands[1];
5804 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5807 [(set_attr "type" "neon_load3_3reg<q>")]
5810 (define_insn "neon_vld3qb<mode>"
5811 [(set (match_operand:CI 0 "s_register_operand" "=w")
5812 (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5813 (match_operand:CI 2 "s_register_operand" "0")
5814 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5818 int regno = REGNO (operands[0]);
5820 ops[0] = gen_rtx_REG (DImode, regno + 2);
5821 ops[1] = gen_rtx_REG (DImode, regno + 6);
5822 ops[2] = gen_rtx_REG (DImode, regno + 10);
5823 ops[3] = operands[1];
5824 output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5827 [(set_attr "type" "neon_load3_3reg<q>")]
5830 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5831 ;; here on big endian targets.
5832 (define_insn "neon_vld3_lane<mode>"
5833 [(set (match_operand:EI 0 "s_register_operand" "=w")
5834 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5835 (match_operand:EI 2 "s_register_operand" "0")
5836 (match_operand:SI 3 "immediate_operand" "i")
5837 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5841 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5842 int regno = REGNO (operands[0]);
5844 ops[0] = gen_rtx_REG (DImode, regno);
5845 ops[1] = gen_rtx_REG (DImode, regno + 2);
5846 ops[2] = gen_rtx_REG (DImode, regno + 4);
5847 ops[3] = operands[1];
5848 ops[4] = GEN_INT (lane);
5849 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5853 [(set_attr "type" "neon_load3_one_lane<q>")]
5856 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5857 ;; here on big endian targets.
5858 (define_insn "neon_vld3_lane<mode>"
5859 [(set (match_operand:CI 0 "s_register_operand" "=w")
5860 (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5861 (match_operand:CI 2 "s_register_operand" "0")
5862 (match_operand:SI 3 "immediate_operand" "i")
5863 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5867 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5868 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5869 int regno = REGNO (operands[0]);
5871 if (lane >= max / 2)
5876 ops[0] = gen_rtx_REG (DImode, regno);
5877 ops[1] = gen_rtx_REG (DImode, regno + 4);
5878 ops[2] = gen_rtx_REG (DImode, regno + 8);
5879 ops[3] = operands[1];
5880 ops[4] = GEN_INT (lane);
5881 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5885 [(set_attr "type" "neon_load3_one_lane<q>")]
5888 (define_insn "neon_vld3_dup<mode>"
5889 [(set (match_operand:EI 0 "s_register_operand" "=w")
5890 (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5891 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5895 if (GET_MODE_NUNITS (<MODE>mode) > 1)
5897 int regno = REGNO (operands[0]);
5899 ops[0] = gen_rtx_REG (DImode, regno);
5900 ops[1] = gen_rtx_REG (DImode, regno + 2);
5901 ops[2] = gen_rtx_REG (DImode, regno + 4);
5902 ops[3] = operands[1];
5903 output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5907 return "vld1.<V_sz_elem>\t%h0, %A1";
5910 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5911 (const_string "neon_load3_all_lanes<q>")
5912 (const_string "neon_load1_1reg<q>")))])
5914 (define_expand "vec_store_lanesei<mode>"
5915 [(set (match_operand:EI 0 "neon_struct_operand")
5916 (unspec:EI [(match_operand:EI 1 "s_register_operand")
5917 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5921 (define_insn "neon_vst3<mode>"
5922 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5923 (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5924 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5928 if (<V_sz_elem> == 64)
5929 return "vst1.64\t%h1, %A0";
5931 return "vst3.<V_sz_elem>\t%h1, %A0";
5934 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5935 (const_string "neon_store1_3reg<q>")
5936 (const_string "neon_store3_one_lane<q>")))])
5938 (define_expand "vec_store_lanesci<mode>"
5939 [(match_operand:CI 0 "neon_struct_operand")
5940 (match_operand:CI 1 "s_register_operand")
5941 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5944 emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5948 (define_expand "neon_vst3<mode>"
5949 [(match_operand:CI 0 "neon_struct_operand")
5950 (match_operand:CI 1 "s_register_operand")
5951 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5956 mem = adjust_address (operands[0], EImode, 0);
5957 emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5958 mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5959 emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5963 (define_insn "neon_vst3qa<mode>"
5964 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5965 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5966 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5970 int regno = REGNO (operands[1]);
5972 ops[0] = operands[0];
5973 ops[1] = gen_rtx_REG (DImode, regno);
5974 ops[2] = gen_rtx_REG (DImode, regno + 4);
5975 ops[3] = gen_rtx_REG (DImode, regno + 8);
5976 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5979 [(set_attr "type" "neon_store3_3reg<q>")]
5982 (define_insn "neon_vst3qb<mode>"
5983 [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5984 (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5985 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5989 int regno = REGNO (operands[1]);
5991 ops[0] = operands[0];
5992 ops[1] = gen_rtx_REG (DImode, regno + 2);
5993 ops[2] = gen_rtx_REG (DImode, regno + 6);
5994 ops[3] = gen_rtx_REG (DImode, regno + 10);
5995 output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5998 [(set_attr "type" "neon_store3_3reg<q>")]
6001 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6002 ;; here on big endian targets.
6003 (define_insn "neon_vst3_lane<mode>"
6004 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6005 (unspec:<V_three_elem>
6006 [(match_operand:EI 1 "s_register_operand" "w")
6007 (match_operand:SI 2 "immediate_operand" "i")
6008 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6012 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6013 int regno = REGNO (operands[1]);
6015 ops[0] = operands[0];
6016 ops[1] = gen_rtx_REG (DImode, regno);
6017 ops[2] = gen_rtx_REG (DImode, regno + 2);
6018 ops[3] = gen_rtx_REG (DImode, regno + 4);
6019 ops[4] = GEN_INT (lane);
6020 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6024 [(set_attr "type" "neon_store3_one_lane<q>")]
6027 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6028 ;; here on big endian targets.
6029 (define_insn "neon_vst3_lane<mode>"
6030 [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
6031 (unspec:<V_three_elem>
6032 [(match_operand:CI 1 "s_register_operand" "w")
6033 (match_operand:SI 2 "immediate_operand" "i")
6034 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6038 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6039 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6040 int regno = REGNO (operands[1]);
6042 if (lane >= max / 2)
6047 ops[0] = operands[0];
6048 ops[1] = gen_rtx_REG (DImode, regno);
6049 ops[2] = gen_rtx_REG (DImode, regno + 4);
6050 ops[3] = gen_rtx_REG (DImode, regno + 8);
6051 ops[4] = GEN_INT (lane);
6052 output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
6056 [(set_attr "type" "neon_store3_one_lane<q>")]
6059 (define_expand "vec_load_lanesoi<mode>"
6060 [(set (match_operand:OI 0 "s_register_operand")
6061 (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
6062 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6066 (define_insn "neon_vld4<mode>"
6067 [(set (match_operand:OI 0 "s_register_operand" "=w")
6068 (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
6069 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6073 if (<V_sz_elem> == 64)
6074 return "vld1.64\t%h0, %A1";
6076 return "vld4.<V_sz_elem>\t%h0, %A1";
6079 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6080 (const_string "neon_load1_4reg<q>")
6081 (const_string "neon_load4_4reg<q>")))]
6084 (define_expand "vec_load_lanesxi<mode>"
6085 [(match_operand:XI 0 "s_register_operand")
6086 (match_operand:XI 1 "neon_struct_operand")
6087 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6090 emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6094 (define_expand "neon_vld4<mode>"
6095 [(match_operand:XI 0 "s_register_operand")
6096 (match_operand:XI 1 "neon_struct_operand")
6097 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6102 mem = adjust_address (operands[1], OImode, 0);
6103 emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6104 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6105 emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6109 (define_insn "neon_vld4qa<mode>"
6110 [(set (match_operand:XI 0 "s_register_operand" "=w")
6111 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6112 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6116 int regno = REGNO (operands[0]);
6118 ops[0] = gen_rtx_REG (DImode, regno);
6119 ops[1] = gen_rtx_REG (DImode, regno + 4);
6120 ops[2] = gen_rtx_REG (DImode, regno + 8);
6121 ops[3] = gen_rtx_REG (DImode, regno + 12);
6122 ops[4] = operands[1];
6123 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6126 [(set_attr "type" "neon_load4_4reg<q>")]
6129 (define_insn "neon_vld4qb<mode>"
6130 [(set (match_operand:XI 0 "s_register_operand" "=w")
6131 (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6132 (match_operand:XI 2 "s_register_operand" "0")
6133 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6137 int regno = REGNO (operands[0]);
6139 ops[0] = gen_rtx_REG (DImode, regno + 2);
6140 ops[1] = gen_rtx_REG (DImode, regno + 6);
6141 ops[2] = gen_rtx_REG (DImode, regno + 10);
6142 ops[3] = gen_rtx_REG (DImode, regno + 14);
6143 ops[4] = operands[1];
6144 output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6147 [(set_attr "type" "neon_load4_4reg<q>")]
6150 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6151 ;; here on big endian targets.
6152 (define_insn "neon_vld4_lane<mode>"
6153 [(set (match_operand:OI 0 "s_register_operand" "=w")
6154 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6155 (match_operand:OI 2 "s_register_operand" "0")
6156 (match_operand:SI 3 "immediate_operand" "i")
6157 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6161 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6162 int regno = REGNO (operands[0]);
6164 ops[0] = gen_rtx_REG (DImode, regno);
6165 ops[1] = gen_rtx_REG (DImode, regno + 2);
6166 ops[2] = gen_rtx_REG (DImode, regno + 4);
6167 ops[3] = gen_rtx_REG (DImode, regno + 6);
6168 ops[4] = operands[1];
6169 ops[5] = GEN_INT (lane);
6170 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6174 [(set_attr "type" "neon_load4_one_lane<q>")]
6177 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6178 ;; here on big endian targets.
6179 (define_insn "neon_vld4_lane<mode>"
6180 [(set (match_operand:XI 0 "s_register_operand" "=w")
6181 (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6182 (match_operand:XI 2 "s_register_operand" "0")
6183 (match_operand:SI 3 "immediate_operand" "i")
6184 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6188 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6189 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6190 int regno = REGNO (operands[0]);
6192 if (lane >= max / 2)
6197 ops[0] = gen_rtx_REG (DImode, regno);
6198 ops[1] = gen_rtx_REG (DImode, regno + 4);
6199 ops[2] = gen_rtx_REG (DImode, regno + 8);
6200 ops[3] = gen_rtx_REG (DImode, regno + 12);
6201 ops[4] = operands[1];
6202 ops[5] = GEN_INT (lane);
6203 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6207 [(set_attr "type" "neon_load4_one_lane<q>")]
6210 (define_insn "neon_vld4_dup<mode>"
6211 [(set (match_operand:OI 0 "s_register_operand" "=w")
6212 (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6213 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6217 if (GET_MODE_NUNITS (<MODE>mode) > 1)
6219 int regno = REGNO (operands[0]);
6221 ops[0] = gen_rtx_REG (DImode, regno);
6222 ops[1] = gen_rtx_REG (DImode, regno + 2);
6223 ops[2] = gen_rtx_REG (DImode, regno + 4);
6224 ops[3] = gen_rtx_REG (DImode, regno + 6);
6225 ops[4] = operands[1];
6226 output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6231 return "vld1.<V_sz_elem>\t%h0, %A1";
6234 (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6235 (const_string "neon_load4_all_lanes<q>")
6236 (const_string "neon_load1_1reg<q>")))]
6239 (define_expand "vec_store_lanesoi<mode>"
6240 [(set (match_operand:OI 0 "neon_struct_operand")
6241 (unspec:OI [(match_operand:OI 1 "s_register_operand")
6242 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6246 (define_insn "neon_vst4<mode>"
6247 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6248 (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6249 (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6253 if (<V_sz_elem> == 64)
6254 return "vst1.64\t%h1, %A0";
6256 return "vst4.<V_sz_elem>\t%h1, %A0";
6259 (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6260 (const_string "neon_store1_4reg<q>")
6261 (const_string "neon_store4_4reg<q>")))]
6264 (define_expand "vec_store_lanesxi<mode>"
6265 [(match_operand:XI 0 "neon_struct_operand")
6266 (match_operand:XI 1 "s_register_operand")
6267 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6270 emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6274 (define_expand "neon_vst4<mode>"
6275 [(match_operand:XI 0 "neon_struct_operand")
6276 (match_operand:XI 1 "s_register_operand")
6277 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6282 mem = adjust_address (operands[0], OImode, 0);
6283 emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6284 mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6285 emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6289 (define_insn "neon_vst4qa<mode>"
6290 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6291 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6292 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6296 int regno = REGNO (operands[1]);
6298 ops[0] = operands[0];
6299 ops[1] = gen_rtx_REG (DImode, regno);
6300 ops[2] = gen_rtx_REG (DImode, regno + 4);
6301 ops[3] = gen_rtx_REG (DImode, regno + 8);
6302 ops[4] = gen_rtx_REG (DImode, regno + 12);
6303 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6306 [(set_attr "type" "neon_store4_4reg<q>")]
6309 (define_insn "neon_vst4qb<mode>"
6310 [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6311 (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6312 (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6316 int regno = REGNO (operands[1]);
6318 ops[0] = operands[0];
6319 ops[1] = gen_rtx_REG (DImode, regno + 2);
6320 ops[2] = gen_rtx_REG (DImode, regno + 6);
6321 ops[3] = gen_rtx_REG (DImode, regno + 10);
6322 ops[4] = gen_rtx_REG (DImode, regno + 14);
6323 output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6326 [(set_attr "type" "neon_store4_4reg<q>")]
6329 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6330 ;; here on big endian targets.
6331 (define_insn "neon_vst4_lane<mode>"
6332 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6333 (unspec:<V_four_elem>
6334 [(match_operand:OI 1 "s_register_operand" "w")
6335 (match_operand:SI 2 "immediate_operand" "i")
6336 (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6340 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6341 int regno = REGNO (operands[1]);
6343 ops[0] = operands[0];
6344 ops[1] = gen_rtx_REG (DImode, regno);
6345 ops[2] = gen_rtx_REG (DImode, regno + 2);
6346 ops[3] = gen_rtx_REG (DImode, regno + 4);
6347 ops[4] = gen_rtx_REG (DImode, regno + 6);
6348 ops[5] = GEN_INT (lane);
6349 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6353 [(set_attr "type" "neon_store4_one_lane<q>")]
6356 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6357 ;; here on big endian targets.
6358 (define_insn "neon_vst4_lane<mode>"
6359 [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6360 (unspec:<V_four_elem>
6361 [(match_operand:XI 1 "s_register_operand" "w")
6362 (match_operand:SI 2 "immediate_operand" "i")
6363 (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6367 HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6368 HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6369 int regno = REGNO (operands[1]);
6371 if (lane >= max / 2)
6376 ops[0] = operands[0];
6377 ops[1] = gen_rtx_REG (DImode, regno);
6378 ops[2] = gen_rtx_REG (DImode, regno + 4);
6379 ops[3] = gen_rtx_REG (DImode, regno + 8);
6380 ops[4] = gen_rtx_REG (DImode, regno + 12);
6381 ops[5] = GEN_INT (lane);
6382 output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6386 [(set_attr "type" "neon_store4_4reg<q>")]
6389 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6390 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6391 (SE:<V_unpack> (vec_select:<V_HALF>
6392 (match_operand:VU 1 "register_operand" "w")
6393 (match_operand:VU 2 "vect_par_constant_low" ""))))]
6394 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6395 "vmovl.<US><V_sz_elem> %q0, %e1"
6396 [(set_attr "type" "neon_shift_imm_long")]
6399 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6400 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6401 (SE:<V_unpack> (vec_select:<V_HALF>
6402 (match_operand:VU 1 "register_operand" "w")
6403 (match_operand:VU 2 "vect_par_constant_high" ""))))]
6404 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6405 "vmovl.<US><V_sz_elem> %q0, %f1"
6406 [(set_attr "type" "neon_shift_imm_long")]
6409 (define_expand "vec_unpack<US>_hi_<mode>"
6410 [(match_operand:<V_unpack> 0 "register_operand" "")
6411 (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6412 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6414 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6417 for (i = 0; i < (<V_mode_nunits>/2); i++)
6418 RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6420 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6421 emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6428 (define_expand "vec_unpack<US>_lo_<mode>"
6429 [(match_operand:<V_unpack> 0 "register_operand" "")
6430 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))]
6431 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6433 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6436 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6437 RTVEC_ELT (v, i) = GEN_INT (i);
6438 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6439 emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6446 (define_insn "neon_vec_<US>mult_lo_<mode>"
6447 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6448 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6449 (match_operand:VU 1 "register_operand" "w")
6450 (match_operand:VU 2 "vect_par_constant_low" "")))
6451 (SE:<V_unpack> (vec_select:<V_HALF>
6452 (match_operand:VU 3 "register_operand" "w")
6454 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6455 "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6456 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6459 (define_expand "vec_widen_<US>mult_lo_<mode>"
6460 [(match_operand:<V_unpack> 0 "register_operand" "")
6461 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6462 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6463 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6465 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6468 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6469 RTVEC_ELT (v, i) = GEN_INT (i);
6470 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6472 emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6480 (define_insn "neon_vec_<US>mult_hi_<mode>"
6481 [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6482 (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6483 (match_operand:VU 1 "register_operand" "w")
6484 (match_operand:VU 2 "vect_par_constant_high" "")))
6485 (SE:<V_unpack> (vec_select:<V_HALF>
6486 (match_operand:VU 3 "register_operand" "w")
6488 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6489 "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6490 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6493 (define_expand "vec_widen_<US>mult_hi_<mode>"
6494 [(match_operand:<V_unpack> 0 "register_operand" "")
6495 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6496 (SE:<V_unpack> (match_operand:VU 2 "register_operand" ""))]
6497 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6499 rtvec v = rtvec_alloc (<V_mode_nunits>/2) ;
6502 for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6503 RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6504 t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6506 emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6515 (define_insn "neon_vec_<US>shiftl_<mode>"
6516 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6517 (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6518 (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6521 return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6523 [(set_attr "type" "neon_shift_imm_long")]
6526 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6527 [(match_operand:<V_unpack> 0 "register_operand" "")
6528 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6529 (match_operand:SI 2 "immediate_operand" "i")]
6530 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6532 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6533 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6539 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6540 [(match_operand:<V_unpack> 0 "register_operand" "")
6541 (SE:<V_unpack> (match_operand:VU 1 "register_operand" ""))
6542 (match_operand:SI 2 "immediate_operand" "i")]
6543 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6545 emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6546 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6547 GET_MODE_SIZE (<V_HALF>mode)),
6553 ;; Vectorize for non-neon-quad case
6554 (define_insn "neon_unpack<US>_<mode>"
6555 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6556 (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6558 "vmovl.<US><V_sz_elem> %q0, %P1"
6559 [(set_attr "type" "neon_move")]
6562 (define_expand "vec_unpack<US>_lo_<mode>"
6563 [(match_operand:<V_double_width> 0 "register_operand" "")
6564 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6567 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6568 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6569 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6575 (define_expand "vec_unpack<US>_hi_<mode>"
6576 [(match_operand:<V_double_width> 0 "register_operand" "")
6577 (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6580 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6581 emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6582 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6588 (define_insn "neon_vec_<US>mult_<mode>"
6589 [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6590 (mult:<V_widen> (SE:<V_widen>
6591 (match_operand:VDI 1 "register_operand" "w"))
6593 (match_operand:VDI 2 "register_operand" "w"))))]
6595 "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6596 [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6599 (define_expand "vec_widen_<US>mult_hi_<mode>"
6600 [(match_operand:<V_double_width> 0 "register_operand" "")
6601 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6602 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6605 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6606 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6607 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6614 (define_expand "vec_widen_<US>mult_lo_<mode>"
6615 [(match_operand:<V_double_width> 0 "register_operand" "")
6616 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6617 (SE:<V_double_width> (match_operand:VDI 2 "register_operand" ""))]
6620 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6621 emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6622 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6629 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6630 [(match_operand:<V_double_width> 0 "register_operand" "")
6631 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6632 (match_operand:SI 2 "immediate_operand" "i")]
6635 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6636 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6637 emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6643 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6644 [(match_operand:<V_double_width> 0 "register_operand" "")
6645 (SE:<V_double_width> (match_operand:VDI 1 "register_operand" ""))
6646 (match_operand:SI 2 "immediate_operand" "i")]
6649 rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6650 emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6651 emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6657 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6658 ; because the ordering of vector elements in Q registers is different from what
6659 ; the semantics of the instructions require.
6661 (define_insn "vec_pack_trunc_<mode>"
6662 [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6663 (vec_concat:<V_narrow_pack>
6664 (truncate:<V_narrow>
6665 (match_operand:VN 1 "register_operand" "w"))
6666 (truncate:<V_narrow>
6667 (match_operand:VN 2 "register_operand" "w"))))]
6668 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6669 "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6670 [(set_attr "type" "multiple")
6671 (set_attr "length" "8")]
6674 ;; For the non-quad case.
6675 (define_insn "neon_vec_pack_trunc_<mode>"
6676 [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6677 (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6678 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6679 "vmovn.i<V_sz_elem>\t%P0, %q1"
6680 [(set_attr "type" "neon_move_narrow_q")]
6683 (define_expand "vec_pack_trunc_<mode>"
6684 [(match_operand:<V_narrow_pack> 0 "register_operand" "")
6685 (match_operand:VSHFT 1 "register_operand" "")
6686 (match_operand:VSHFT 2 "register_operand")]
6687 "TARGET_NEON && !BYTES_BIG_ENDIAN"
6689 rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6691 emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6692 emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6693 emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6697 (define_insn "neon_vabd<mode>_2"
6698 [(set (match_operand:VF 0 "s_register_operand" "=w")
6699 (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6700 (match_operand:VF 2 "s_register_operand" "w"))))]
6701 "TARGET_NEON && flag_unsafe_math_optimizations"
6702 "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6703 [(set_attr "type" "neon_fp_abd_s<q>")]
6706 (define_insn "neon_vabd<mode>_3"
6707 [(set (match_operand:VF 0 "s_register_operand" "=w")
6708 (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6709 (match_operand:VF 2 "s_register_operand" "w")]
6711 "TARGET_NEON && flag_unsafe_math_optimizations"
6712 "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6713 [(set_attr "type" "neon_fp_abd_s<q>")]
6716 ;; Copy from core-to-neon regs, then extend, not vice-versa
6719 [(set (match_operand:DI 0 "s_register_operand" "")
6720 (sign_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6721 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6722 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6723 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 32)))]
6725 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6729 [(set (match_operand:DI 0 "s_register_operand" "")
6730 (sign_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6731 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6732 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6733 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 48)))]
6735 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6739 [(set (match_operand:DI 0 "s_register_operand" "")
6740 (sign_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6741 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6742 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6743 (set (match_dup 0) (ashiftrt:DI (match_dup 0) (const_int 56)))]
6745 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));
6749 [(set (match_operand:DI 0 "s_register_operand" "")
6750 (zero_extend:DI (match_operand:SI 1 "s_register_operand" "")))]
6751 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6752 [(set (match_dup 2) (vec_duplicate:V2SI (match_dup 1)))
6753 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 32)))]
6755 operands[2] = gen_rtx_REG (V2SImode, REGNO (operands[0]));
6759 [(set (match_operand:DI 0 "s_register_operand" "")
6760 (zero_extend:DI (match_operand:HI 1 "s_register_operand" "")))]
6761 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6762 [(set (match_dup 2) (vec_duplicate:V4HI (match_dup 1)))
6763 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 48)))]
6765 operands[2] = gen_rtx_REG (V4HImode, REGNO (operands[0]));
6769 [(set (match_operand:DI 0 "s_register_operand" "")
6770 (zero_extend:DI (match_operand:QI 1 "s_register_operand" "")))]
6771 "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
6772 [(set (match_dup 2) (vec_duplicate:V8QI (match_dup 1)))
6773 (set (match_dup 0) (lshiftrt:DI (match_dup 0) (const_int 56)))]
6775 operands[2] = gen_rtx_REG (V8QImode, REGNO (operands[0]));